原文地址 blog.csdn.net @hhhhhyyyyy8 @4.15.1
前言 linux 内核源代码变动怎么这么大,handle_bridge 函数居然没有了,本来接着准备以 3.9.1 分析的,但发现和后面的又变了,还是以 4.15.1 现在电脑上用的版本分析吧。
linux kernel:4.15.1
best of best [link ](https://upload.wikimedia.org/wikipedia/commons/3/37/Netfilter-packet-flow.svg )
先看三张图片
IMG skb桥转发蓝图
IMG linux TCP/IP L2层数据包接收流程
IMG 浅析ebtables的概念和一些基本应用
tips: linux 内核版本不一样,流程函数会发生细微改变。
1. br_handle_frame() 作用:
对于需要转发的报文,调用 NF_BR_PRE_ROUTING
处钩子函数,结束后,进入 br_handle_frame_finish()
函数;
对于 STP 报文,调用 NF_BR_LOCAL_IN
处钩子函数,结束后,进入 br_handle_local_finish()
函数,在 br_handle_local_finish()
函数中会调用 br_pass_frame_up()
函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 rx_handler_result_t br_handle_frame (struct sk_buff **pskb) { struct net_bridge_port *p ; struct sk_buff *skb = *pskb ; const unsigned char *dest = eth_hdr(skb)->h_dest; br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return RX_HANDLER_CONSUMED; p = br_port_get_rcu(skb->dev); if (p->flags & BR_VLAN_TUNNEL) { if (br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p))) goto drop; } if (unlikely(is_link_local_ether_addr(dest))) { u16 fwd_mask = p->br->group_fwd_mask_required; fwd_mask |= p->group_fwd_mask; switch (dest[5 ]) { case 0x00 : if (p->br->stp_enabled == BR_NO_STP || fwd_mask & (1u << dest[5 ])) goto forward; *pskb = skb; __br_handle_local_finish(skb); return RX_HANDLER_PASS; case 0x01 : goto drop; case 0x0E : fwd_mask |= p->br->group_fwd_mask; if (fwd_mask & (1u << dest[5 ])) goto forward; *pskb = skb; __br_handle_local_finish(skb); return RX_HANDLER_PASS; default : fwd_mask |= p->br->group_fwd_mask; if (fwd_mask & (1u << dest[5 ])) goto forward; } NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), NULL , skb, skb->dev, NULL , br_handle_local_finish); return RX_HANDLER_CONSUMED; } forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); if (rhook) { if ((*rhook)(skb)) { *pskb = skb; return RX_HANDLER_PASS; } dest = eth_hdr(skb)->h_dest; } case BR_STATE_LEARNING: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, dev_net(skb->dev), NULL , skb, skb->dev, NULL , br_handle_frame_finish); break ; default : drop: kfree_skb(skb); } return RX_HANDLER_CONSUMED; }
相关函数
rx_handler_result_t
枚举类型1 2 3 4 5 6 7 enum rx_handler_result { RX_HANDLER_CONSUMED, RX_HANDLER_ANOTHER, RX_HANDLER_EXACT, RX_HANDLER_PASS, }; typedef enum rx_handler_result rx_handler_result_t ;
is_valid_ether_addr()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 static inline bool is_valid_ether_addr (const u8 *addr) { return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); }
br_handle_local_finish()
br_handle_local_finish()
函数中调用 br_pass_fame_up()
函数。
1 2 3 4 5 6 7 8 9 10 11 12 static int br_handle_local_finish (struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu (skb ->dev ); __br_handle_local_finish(skb); BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; br_pass_frame_up(skb); return 0 ; }
2. br_handle_frame_finish() 作用:
网桥设备是否处于混杂模式,如果是,则会发一份到本地进行处理
如果是广播包,则会进行广播洪泛,并会发一份到本地处理
如果是组播包,则根据组播表进行组播转发,并发一份数数包到本地处理
如果是单播包,发往本地的单播包则送到本地处理,在 fdb 表中可以找到转发表项的单播包则进行转发,未知单播包在广播域内进行洪泛
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 int br_handle_frame_finish (struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu (skb ->dev ); enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL ; struct net_bridge_mdb_entry *mdst ; bool local_rcv, mcast_hit = false ; const unsigned char *dest; struct net_bridge *br ; u16 vid = 0 ; if (!p || p->state == BR_STATE_DISABLED) goto drop; if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid)) goto out; nbp_switchdev_frame_mark(p, skb); br = p->br; if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false ); local_rcv = !!(br->dev->flags & IFF_PROMISC); dest = eth_hdr(skb)->h_dest; if (is_multicast_ether_addr(dest)) { if (is_broadcast_ether_addr(dest)) { pkt_type = BR_PKT_BROADCAST; local_rcv = true ; } else { pkt_type = BR_PKT_MULTICAST; if (br_multicast_rcv(br, p, skb, vid)) goto drop; } } if (p->state == BR_STATE_LEARNING) goto drop; BR_INPUT_SKB_CB(skb)->brdev = br->dev; if (IS_ENABLED(CONFIG_INET) && (skb->protocol == htons(ETH_P_ARP) || skb->protocol == htons(ETH_P_RARP))) { br_do_proxy_suppress_arp(skb, br, vid, p); } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6) && br->neigh_suppress_enabled && pskb_may_pull(skb, sizeof (struct ipv6hdr) + sizeof (struct nd_msg)) && ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { struct nd_msg *msg, _msg; msg = br_is_nd_neigh_msg(skb, &_msg); if (msg) br_do_suppress_nd(skb, br, vid, p, msg); } switch (pkt_type) { case BR_PKT_MULTICAST: mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(br, eth_hdr(skb))) { if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true ; br->dev->stats.multicast++; } mcast_hit = true ; } else { local_rcv = true ; br->dev->stats.multicast++; } break ; case BR_PKT_UNICAST: dst = br_fdb_find_rcu(br, dest, vid); default : break ; } if (dst) { unsigned long now = jiffies; if (dst->is_local) return br_pass_frame_up(skb); if (now != dst->used) dst->used = now; br_forward(dst->dst, skb, local_rcv, false ); } else { if (!mcast_hit) br_flood(br, skb, pkt_type, local_rcv, false ); else br_multicast_flood(mdst, skb, local_rcv, false ); } if (local_rcv) return br_pass_frame_up(skb); out: return 0 ; drop: kfree_skb(skb); goto out; }
3.br_pass_frame_up 数据包的目的 MAC 是本地的单播报文,广播,组播和网桥处于混杂模式时,报文都会通过 br_pass_frame_up 函数交由上层处理。
作用:
调用 NF_BR_LOCAL_IN 处钩子函数,最后调用 br_netif_receive_skb 函数,绕一圈后,交友上层处理。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 static int br_pass_frame_up (struct sk_buff *skb) { struct net_device *indev , *brdev = BR_INPUT_SKB_CB (skb )->brdev ; struct net_bridge *br = netdev_priv (brdev ); struct net_bridge_vlan_group *vg ; struct pcpu_sw_netstats *brstats = this_cpu_ptr (br ->stats ); u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); vg = br_vlan_group_rcu(br); if (!(brdev->flags & IFF_PROMISC) && !br_allowed_egress(vg, skb)) { kfree_skb(skb); return NET_RX_DROP; } indev = skb->dev; skb->dev = brdev; skb = br_handle_vlan(br, NULL , vg, skb); if (!skb) return NET_RX_DROP; br_multicast_count(br, NULL , skb, br_multicast_igmp_type(skb), BR_MCAST_DIR_TX); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL , skb, indev, NULL , br_netif_receive_skb); }
再次进入 netif_receive_skb,由于 skb-dev 被设置成了 bridge,而 bridge 设备的 rx_handler 函数是没有被设置的,所以就不会再次进入 bridge 逻辑,而直接进入了主机上层协议栈。
相关函数
br_netif_receive_skb()
可以看到在 br_netif_receive_skb() 函数中调用了 netif_receive_skb() 函数。
1 2 3 4 5 6 7 static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { br_drop_fake_rtable(skb); return netif_receive_skb(skb); }
4. br_forward() 不是发往本地的数据包,但在 fdb 表中能找到对应的表项,则进行转发 br_forward(),若在 fdb 表中找不到对应表项就进行洪泛 br_blood().
作用:
主要是调用__br_forward() 转发报文
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 void br_forward (const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig) { if (to && should_deliver(to, skb)) { if (local_rcv) deliver_clone(to, skb, local_orig); else __br_forward(to, skb, local_orig); return ; } if (!local_rcv) kfree_skb(skb); }
5. __br_forward() 作用:
__br_forward() 函数根据数据包的来源(local_orig)分别进入不同的钩子点,如果数据包是从本地发出的,则进入 NF_BR_LOCAL_OUT,如果不是本地发出的,则进入 NF_BR_FORWARD 钩子,最后都进入 br_forward_finish() 函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_orig) { struct net_bridge_vlan_group *vg ; struct net_device *indev ; struct net *net ; int br_hook; vg = nbp_vlan_group_rcu(to); skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) return ; indev = skb->dev; skb->dev = to->dev; if (!local_orig) { if (skb_warn_if_lro(skb)) { kfree_skb(skb); return ; } br_hook = NF_BR_FORWARD; skb_forward_csum(skb); net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); br_netpoll_send_skb(to, skb); } return ; } br_hook = NF_BR_LOCAL_OUT; net = dev_net(skb->dev); indev = NULL ; } NF_HOOK(NFPROTO_BRIDGE, br_hook, net, NULL , skb, indev, skb->dev, br_forward_finish); }
6. br_forward_finish() br_forward_finish()
函数比较简单,调用 NF_BR_POST_ROUTING
处的钩子函数,最后进入 br_dev_queue_push_xmit
函数。
1 2 3 4 5 6 7 8 int br_forward_finish (struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL , skb->dev, br_dev_queue_push_xmit); }
7. br_dev_queue_push_xmit 在 br_dev_queue_push_xmit()
中,会先 skb_push(skb,ETH,HLEN);
将 data 指向二层头部,然后调用 dev_queue_xmit()
发送报文。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 int br_dev_queue_push_xmit (struct net *net, struct sock *sk, struct sk_buff *skb) { if (!is_skb_forwardable(skb->dev, skb)) goto drop; skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && (skb->protocol == htons(ETH_P_8021Q) || skb->protocol == htons(ETH_P_8021AD))) { int depth; if (!__vlan_get_protocol(skb, skb->protocol, &depth)) goto drop; skb_set_network_header(skb, depth); } dev_queue_xmit(skb); return 0 ; drop: kfree_skb(skb); return 0 ; }
8. br_flood br_flood()
也是调用 __br_forward()
函数转发报文。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 void br_flood (struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig) { u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge_port *prev = NULL ; struct net_bridge_port *p ; list_for_each_entry_rcu(p, &br->port_list, list ) { switch (pkt_type) { case BR_PKT_UNICAST: if (!(p->flags & BR_FLOOD)) continue ; break ; case BR_PKT_MULTICAST: if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) continue ; break ; case BR_PKT_BROADCAST: if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) continue ; break ; } if (p->flags & BR_PROXYARP) continue ; if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) && BR_INPUT_SKB_CB(skb)->proxyarp_replied) continue ; prev = maybe_deliver(prev, p, skb, local_orig); if (IS_ERR(prev)) goto out; if (prev == p) br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX); } if (!prev) goto out; if (local_rcv) deliver_clone(prev, skb, local_orig); else __br_forward(prev, skb, local_orig); return ; out: if (!local_rcv) kfree_skb(skb); }
9. br_mulicast_flood() 以后再分析,肚子饿了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 void br_multicast_flood (struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, bool local_rcv, bool local_orig) { struct net_device *dev = BR_INPUT_SKB_CB (skb )->brdev ; u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge *br = netdev_priv (dev ); struct net_bridge_port *prev = NULL ; struct net_bridge_port_group *p ; struct hlist_node *rp ; rp = rcu_dereference(hlist_first_rcu(&br->router_list)); p = mdst ? rcu_dereference(mdst->ports) : NULL ; while (p || rp) { struct net_bridge_port *port , *lport , *rport ; lport = p ? p->port : NULL ; rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : NULL ; if ((unsigned long )lport > (unsigned long )rport) { port = lport; if (port->flags & BR_MULTICAST_TO_UNICAST) { maybe_deliver_addr(lport, skb, p->eth_addr, local_orig); goto delivered; } } else { port = rport; } prev = maybe_deliver(prev, port, skb, local_orig); delivered: if (IS_ERR(prev)) goto out; if (prev == port) br_multicast_count(port->br, port, skb, igmp_type, BR_MCAST_DIR_TX); if ((unsigned long )lport >= (unsigned long )port) p = rcu_dereference(p->next); if ((unsigned long )rport >= (unsigned long )port) rp = rcu_dereference(hlist_next_rcu(rp)); } if (!prev) goto out; if (local_rcv) deliver_clone(prev, skb, local_orig); else __br_forward(prev, skb, local_orig); return ; out: if (!local_rcv) kfree_skb(skb); }
参考 ebtables/iptables interaction on a Linux-based bridge @ebtables @2.4.x kernel
Iptables DNAT实现broadcast与unicast之间相互映射 @sxd2001 分析ebtables 的redirect 和 iptables 的redirect
Linux-4.20.8内核桥收包源码解析(七)--本地(br_pass_frame_up)or 转发(br_forward) @lw_yang @Linux-4.20.8
Linux 3.10 kernel bridge转发逻辑 @lvyilong316 @Linux 3.10 kernel
kernel 网桥代码分析 @海枫 @ 2.6.24.4
桥数据包处理函数——br_handle_frame_finish(七) @不留你的名字
Bridge实现 @SuperKing @linux 2.6.18
LInux下桥接模式详解三 @jack.chen @linux 3.10.1
协议栈报文接收之netif_receive_skb函数分析 @one_clouder @Linux4.1.12