当前位置:网站首页>Analysis of tcpdump packet capturing kernel code

Analysis of tcpdump packet capturing kernel code

2022-06-24 21:24:00 already_ skb


register pf_packet agreement

   .create The function is in PF_PACKET type socket Call... On creation , The hook function is registered when calling. See packet_create Implementation of function .

static const struct net_proto_familypacket_family_ops = {

         .family=   PF_PACKET,

         .create=  packet_create,

         .owner     =       THIS_MODULE,

};

 

static int __init packet_init(void)

{

         …………..

         sock_register(&packet_family_ops);

         …………..

}

establish SOCK_PACKET sock Register the callback function

/*

 *     Create a packet of type SOCK_PACKET.

 */

static int packet_create(struct net *net,struct socket *sock, int protocol,

                             int kern)

{

         structsock *sk;

         structpacket_sock *po;

         __be16proto = (__force __be16)protocol; /* weird, but documented */

         interr;

 

         if(!ns_capable(net->user_ns, CAP_NET_RAW))

                   return-EPERM;

         if(sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&

             sock->type != SOCK_PACKET)

                   return-ESOCKTNOSUPPORT;

 

         sock->state= SS_UNCONNECTED;

 

         err= -ENOBUFS;

         sk= sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);

         if(sk == NULL)

                   gotoout;

 

         sock->ops= &packet_ops;

         if(sock->type == SOCK_PACKET)

                   sock->ops= &packet_ops_spkt;

 

         sock_init_data(sock,sk);

 

         po= pkt_sk(sk);

         sk->sk_family= PF_PACKET;

         po->num= proto;

 

         err= packet_alloc_pending(po);

         if(err)

                   gotoout2;

 

         packet_cached_dev_reset(po);

 

         sk->sk_destruct= packet_sock_destruct;

         sk_refcnt_debug_inc(sk);

 

         /*

          *     Attacha protocol block

          */

 

         spin_lock_init(&po->bind_lock);

         mutex_init(&po->pg_vec_lock);

         po->prot_hook.func= packet_rcv;

 

         // Register the handler

         if (sock->type == SOCK_PACKET)

                   po->prot_hook.func =packet_rcv_spkt;

 

         po->prot_hook.af_packet_priv= sk;

 

         if (proto) {

                   po->prot_hook.type =proto;

                   Put this socket Mount to ptype_all On the list

                   register_prot_hook(sk);

         }

 

         mutex_lock(&net->packet.sklist_lock);

         sk_add_node_rcu(sk,&net->packet.sklist);

         mutex_unlock(&net->packet.sklist_lock);

 

         preempt_disable();

         sock_prot_inuse_add(net,&packet_proto, 1);

         preempt_enable();

 

         return0;

out2:

         sk_free(sk);

out:

         returnerr;

}

 

Receive direction kernel packet capturing function

     Two invocation scenarios , One is that the network card is enabled NAPI, Call... In the polling process process_backlog; The other is right and wrong NAPI scene , direct netif_receive_skb Receive data message , Submit to the network layer .

static int __netif_receive_skb_core(structsk_buff *skb, bool pfmemalloc)

{

         structpacket_type *ptype, *pt_prev;

         rx_handler_func_t*rx_handler;

         structnet_device *orig_dev;

         structnet_device *null_or_dev;

         booldeliver_exact = false;

         intret = NET_RX_DROP;

         __be16type;

 

         net_timestamp_check(!netdev_tstamp_prequeue,skb);

 

         trace_netif_receive_skb(skb);

 

         orig_dev= skb->dev;

 

         skb_reset_network_header(skb);

         if(!skb_transport_header_was_set(skb))

                   skb_reset_transport_header(skb);

         skb_reset_mac_len(skb);

 

         pt_prev= NULL;

 

another_round:

         skb->skb_iif= skb->dev->ifindex;

 

         __this_cpu_inc(softnet_data.processed);

 

         if(skb->protocol == cpu_to_be16(ETH_P_8021Q) ||

             skb->protocol ==cpu_to_be16(ETH_P_8021AD)) {

                   skb= vlan_untag(skb);

                   if(unlikely(!skb))

                            gotoout;

         }

 

#ifdef CONFIG_NET_CLS_ACT

         if(skb->tc_verd & TC_NCLS) {

                   skb->tc_verd= CLR_TC_NCLS(skb->tc_verd);

                   gotoncls;

         }

#endif

 

         if(pfmemalloc)

                   gotoskip_taps;

 

// Traverse tcpdumpsocket Hook attached when creating

         list_for_each_entry_rcu(ptype,&ptype_all, list) {

                   if (!ptype->dev ||ptype->dev == skb->dev) {

                            if (pt_prev)

                                     // Copy data message

                                     ret =deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev = ptype;

                   }

         }

 

skip_taps:

#ifdef CONFIG_NET_CLS_ACT

         skb= handle_ing(skb, &pt_prev, &ret, orig_dev);

         if(!skb)

                   gotoout;

ncls:

#endif

 

         if(pfmemalloc && !skb_pfmemalloc_protocol(skb))

                   gotodrop;

 

         if(skb_vlan_tag_present(skb)) {

                   if(pt_prev) {

                            ret= deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev= NULL;

                   }

                   if(vlan_do_receive(&skb))

                            gotoanother_round;

                   elseif (unlikely(!skb))

                            gotoout;

         }

 

         rx_handler= rcu_dereference(skb->dev->rx_handler);

         if(rx_handler) {

                   if(pt_prev) {

                            ret= deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev= NULL;

                   }

                   switch(rx_handler(&skb)) {

                   caseRX_HANDLER_CONSUMED:

                            ret= NET_RX_SUCCESS;

                            gotoout;

                   caseRX_HANDLER_ANOTHER:

                            gotoanother_round;

                   caseRX_HANDLER_EXACT:

                            deliver_exact= true;

                   caseRX_HANDLER_PASS:

                            break;

                   default:

                            BUG();

                   }

         }

 

         if(unlikely(skb_vlan_tag_present(skb))) {

                   if(skb_vlan_tag_get_id(skb))

                            skb->pkt_type= PACKET_OTHERHOST;

                   /*Note: we might in the future use prio bits

                    * and set skb->priority like invlan_do_receive()

                    * For the time being, just ignore PriorityCode Point

                    */

                   skb->vlan_tci= 0;

         }

 

         /*deliver only exact match when indicated */

         null_or_dev= deliver_exact ? skb->dev : NULL;

 

         type= skb->protocol;

         // Real data message processing flow , If it is ip So called ip_rcv Function

        list_for_each_entry_rcu(ptype,

                            &ptype_base[ntohs(type)& PTYPE_HASH_MASK], list) {

                   if (ptype->type == type&&

                       (ptype->dev == null_or_dev ||ptype->dev == skb->dev ||

                        ptype->dev == orig_dev)) {

                            if (pt_prev)

                                     ret =deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev = ptype;

                   }

         }

 

         if(pt_prev) {

                   if(unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

                            gotodrop;

                   else

                            ret= pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

         }else {

drop:

                   atomic_long_inc(&skb->dev->rx_dropped);

                   kfree_skb(skb);

                   /*Jamal, now you will not able to escape explaining

                    * me how you were going to use this. :-)

                    */

                   ret= NET_RX_DROP;

         }

 

out:

         returnret;

}

Send direction kernel packet capturing function

         There are also two branches for data transmission , One is to call dev_queue_xmit Directly submit the data to the network card ( No configuration qdisc); The other branch is if qdisc,dev_queue_xmit Check whether the process is configured queue, If configured, will call __dev_xmit_skb Function puts data into qdisc In line , Then wait for the interrupt function to be sent net_tx_action Polling calls , This triggers the copy call process .

/*

 *     Support routine. Sends outgoing frames toany network

 *     taps currently in use.

 */

 

static void dev_queue_xmit_nit(structsk_buff *skb, struct net_device *dev)

{

         structpacket_type *ptype;

         structsk_buff *skb2 = NULL;

         structpacket_type *pt_prev = NULL;

 

         rcu_read_lock();

        // Traverse tcpdumpsocket Hook attached when creating

         list_for_each_entry_rcu(ptype,&ptype_all, list) {

                   /* Never send packets back tothe socket

                    * they originated from - MvS([email protected])

                    */

                   if ((ptype->dev == dev ||!ptype->dev) &&

                       (!skb_loop_sk(ptype, skb))) {

                            if (pt_prev) {

                                     // Copy data message

                                     deliver_skb(skb2,pt_prev, skb->dev);

                                     pt_prev =ptype;

                                     continue;

                            }

 

                            skb2 =skb_clone(skb, GFP_ATOMIC);

                            if (!skb2)

                                     break;

 

                            net_timestamp_set(skb2);

 

                            /* skb->nh shouldbe correctly

                               set by sender, so that the second statementis

                               just protection against buggy protocols.

                             */

                            skb_reset_mac_header(skb2);

 

                            if(skb_network_header(skb2) < skb2->data ||

                                skb_network_header(skb2) >skb_tail_pointer(skb2)) {

                                     net_crit_ratelimited("protocol%04x is buggy, dev %s\n",

                                                             ntohs(skb2->protocol),

                                                             dev->name);

                                     skb_reset_network_header(skb2);

                            }

 

                            skb2->transport_header= skb2->network_header;

                            skb2->pkt_type =PACKET_OUTGOING;

                            pt_prev = ptype;

                   }

         }

         if(pt_prev)

                   pt_prev->func(skb2,skb->dev, pt_prev, skb->dev);

         rcu_read_unlock();

}

 

 

 

The destruction SOCK_PACKET sock Register callbacks when

When sock_packet type socket When it is closed, it will call release function , At this time, the previous registration function will be removed

static int packet_release(struct socket*sock)

{

         structsock *sk = sock->sk;

         structpacket_sock *po;

         structnet *net;

         uniontpacket_req_u req_u;

 

         if(!sk)

                   return0;

 

         net= sock_net(sk);

         po= pkt_sk(sk);

 

         mutex_lock(&net->packet.sklist_lock);

         sk_del_node_init_rcu(sk);

         mutex_unlock(&net->packet.sklist_lock);

 

         preempt_disable();

         sock_prot_inuse_add(net,sk->sk_prot, -1);

         preempt_enable();

         spin_lock(&po->bind_lock);

         // from ptype_all Remove the registered hook function from the function

         unregister_prot_hook(sk, false);

         packet_cached_dev_reset(po);

 

         if(po->prot_hook.dev) {

                   dev_put(po->prot_hook.dev);

                   po->prot_hook.dev= NULL;

         }

         spin_unlock(&po->bind_lock);

 

         packet_flush_mclist(sk);

 

         if(po->rx_ring.pg_vec) {

                   memset(&req_u,0, sizeof(req_u));

                   packet_set_ring(sk,&req_u, 1, 0);

         }

 

         if(po->tx_ring.pg_vec) {

                   memset(&req_u,0, sizeof(req_u));

                   packet_set_ring(sk,&req_u, 1, 1);

         }

 

         fanout_release(sk);

 

         synchronize_net();

         /*

          *     Nowthe socket is dead. No more input will appear.

          */

         sock_orphan(sk);

         sock->sk= NULL;

 

         /*Purge queues */

 

         skb_queue_purge(&sk->sk_receive_queue);

         packet_free_pending(po);

         sk_refcnt_debug_release(sk);

 

         sock_put(sk);

         return0;

}

summary

        Tcpdump Create when capturing packets SOCK_PACKET Type of socket, And in socket When creating a process, a call was made to packet_family_opspacket_create function (packet_create), Then register the hook function of capturing packets to ptype_all Linked list , When in the data receiving direction __netif_receive_skb_core Call the registered hook function in the function to copy the data message to af_packet.c In the specific processing flow function of the file ; Also sending functions dev_queue_xmit_nit Call hook function to copy data message .


Jensonqiu[email protected] 2018/05/08


原网站

版权声明
本文为[already_ skb]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/02/202202211315156309.html