diff options
Diffstat (limited to 'kernel/drivers/net/ipvlan')
-rw-r--r-- | kernel/drivers/net/ipvlan/ipvlan.h | 14 | ||||
-rw-r--r-- | kernel/drivers/net/ipvlan/ipvlan_core.c | 172 | ||||
-rw-r--r-- | kernel/drivers/net/ipvlan/ipvlan_main.c | 70 |
3 files changed, 146 insertions, 110 deletions
diff --git a/kernel/drivers/net/ipvlan/ipvlan.h b/kernel/drivers/net/ipvlan/ipvlan.h index 54549a622..9542b7bac 100644 --- a/kernel/drivers/net/ipvlan/ipvlan.h +++ b/kernel/drivers/net/ipvlan/ipvlan.h @@ -39,6 +39,8 @@ #define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) #define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) +#define IPVLAN_QBACKLOG_LIMIT 1000 + typedef enum { IPVL_IPV6 = 0, IPVL_ICMPV6, @@ -65,8 +67,6 @@ struct ipvl_dev { struct ipvl_port *port; struct net_device *phy_dev; struct list_head addrs; - int ipv4cnt; - int ipv6cnt; struct ipvl_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; @@ -93,6 +93,8 @@ struct ipvl_port { struct hlist_head hlhead[IPVLAN_HASH_SIZE]; struct list_head ipvlans; struct rcu_head rcu; + struct work_struct wq; + struct sk_buff_head backlog; int count; u16 mode; }; @@ -102,6 +104,11 @@ static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) return rcu_dereference(d->rx_handler_data); } +static inline struct ipvl_port *ipvlan_port_get_rcu_bh(const struct net_device *d) +{ + return rcu_dereference_bh(d->rx_handler_data); +} + static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) { return rtnl_dereference(d->rx_handler_data); @@ -112,6 +119,7 @@ void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); +void ipvlan_process_multicast(struct work_struct *work); int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, @@ -119,5 +127,5 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, const void *iaddr, bool is_v6); -void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); +void ipvlan_ht_addr_del(struct ipvl_addr *addr); #endif /* __IPVLAN_H */ diff --git a/kernel/drivers/net/ipvlan/ipvlan_core.c b/kernel/drivers/net/ipvlan/ipvlan_core.c index c30b5c300..8c48bb2a9 100644 --- a/kernel/drivers/net/ipvlan/ipvlan_core.c +++ b/kernel/drivers/net/ipvlan/ipvlan_core.c @@ -85,11 +85,9 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); } -void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) +void ipvlan_ht_addr_del(struct ipvl_addr *addr) { hlist_del_init_rcu(&addr->hlnode); - if (sync) - synchronize_rcu(); } struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, @@ -189,67 +187,74 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr) return hash & IPVLAN_MAC_FILTER_MASK; } -static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, - const struct ipvl_dev *in_dev, bool local) +void ipvlan_process_multicast(struct work_struct *work) { - struct ethhdr *eth = eth_hdr(skb); + struct ipvl_port *port = container_of(work, struct ipvl_port, wq); + struct ethhdr *ethh; struct ipvl_dev *ipvlan; - struct sk_buff *nskb; + struct sk_buff *skb, *nskb; + struct sk_buff_head list; unsigned int len; unsigned int mac_hash; int ret; + u8 pkt_type; + bool hlocal, dlocal; - if (skb->protocol == htons(ETH_P_PAUSE)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (local && (ipvlan == in_dev)) - continue; + __skb_queue_head_init(&list); - mac_hash = ipvlan_mac_hash(eth->h_dest); - if (!test_bit(mac_hash, ipvlan->mac_filters)) - continue; + spin_lock_bh(&port->backlog.lock); + skb_queue_splice_tail_init(&port->backlog, &list); + spin_unlock_bh(&port->backlog.lock); - ret = NET_RX_DROP; - len = skb->len + ETH_HLEN; - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - goto mcast_acct; + while ((skb = __skb_dequeue(&list)) != NULL) { + ethh = eth_hdr(skb); + hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); + mac_hash = ipvlan_mac_hash(ethh->h_dest); - if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; + if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) + pkt_type = PACKET_BROADCAST; else - nskb->pkt_type = PACKET_MULTICAST; - - nskb->dev = ipvlan->dev; - if (local) - ret = dev_forward_skb(ipvlan->dev, nskb); - else - ret = netif_rx(nskb); -mcast_acct: - ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); - } - rcu_read_unlock(); - - /* Locally generated? ...Forward a copy to the main-device as - * well. On the RX side we'll ignore it (wont give it to any - * of the virtual devices. - */ - if (local) { - nskb = skb_clone(skb, GFP_ATOMIC); - if (nskb) { - if (ether_addr_equal(eth->h_dest, port->dev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; + pkt_type = PACKET_MULTICAST; + + dlocal = false; + rcu_read_lock(); + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { + if (hlocal && (ipvlan->dev == skb->dev)) { + dlocal = true; + continue; + } + if (!test_bit(mac_hash, ipvlan->mac_filters)) + continue; + + ret = NET_RX_DROP; + len = skb->len + ETH_HLEN; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + goto acct; + + nskb->pkt_type = pkt_type; + nskb->dev = ipvlan->dev; + if (hlocal) + ret = dev_forward_skb(ipvlan->dev, nskb); else - nskb->pkt_type = PACKET_MULTICAST; - - dev_forward_skb(port->dev, nskb); + ret = netif_rx(nskb); +acct: + ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + } + rcu_read_unlock(); + + if (dlocal) { + /* If the packet originated here, send it out. */ + skb->dev = port->dev; + skb->pkt_type = pkt_type; + dev_queue_xmit(skb); + } else { + kfree_skb(skb); } } } -static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb, +static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, bool local) { struct ipvl_dev *ipvlan = addr->master; @@ -257,6 +262,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb, unsigned int len; rx_handler_result_t ret = RX_HANDLER_CONSUMED; bool success = false; + struct sk_buff *skb = *pskb; len = skb->len + ETH_HLEN; if (unlikely(!(dev->flags & IFF_UP))) { @@ -268,6 +274,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb, if (!skb) goto out; + *pskb = skb; skb->dev = dev; skb->pkt_type = PACKET_HOST; @@ -339,17 +346,18 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) { const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); struct rtable *rt; int err, ret = NET_XMIT_DROP; struct flowi4 fl4 = { - .flowi4_oif = dev_get_iflink(dev), + .flowi4_oif = dev->ifindex, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC, .daddr = ip4h->daddr, .saddr = ip4h->saddr, }; - rt = ip_route_output_flow(dev_net(dev), &fl4, NULL); + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; @@ -359,7 +367,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) } skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); - err = ip_local_out(skb); + err = ip_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -376,10 +384,11 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); struct dst_entry *dst; int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, @@ -388,7 +397,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) .flowi6_proto = ip6h->nexthdr, }; - dst = ip6_route_output(dev_net(dev), NULL, &fl6); + dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { ret = dst->error; dst_release(dst); @@ -396,7 +405,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) } skb_dst_drop(skb); skb_dst_set(skb, dst); - err = ip6_local_out(skb); + err = ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -446,6 +455,26 @@ out: return ret; } +static void ipvlan_multicast_enqueue(struct ipvl_port *port, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_PAUSE)) { + kfree_skb(skb); + return; + } + + spin_lock(&port->backlog.lock); + if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { + __skb_queue_tail(&port->backlog, skb); + spin_unlock(&port->backlog.lock); + schedule_work(&port->wq); + } else { + spin_unlock(&port->backlog.lock); + atomic_long_inc(&skb->dev->rx_dropped); + kfree_skb(skb); + } +} + static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -459,7 +488,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); if (addr) - return ipvlan_rcv_frame(addr, skb, true); + return ipvlan_rcv_frame(addr, &skb, true); out: skb->dev = ipvlan->phy_dev; @@ -479,7 +508,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) if (lyr3h) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); if (addr) - return ipvlan_rcv_frame(addr, skb, true); + return ipvlan_rcv_frame(addr, &skb, true); } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) @@ -493,11 +522,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) return dev_forward_skb(ipvlan->phy_dev, skb); } else if (is_multicast_ether_addr(eth->h_dest)) { - u8 ip_summed = skb->ip_summed; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true); - skb->ip_summed = ip_summed; + ipvlan_multicast_enqueue(ipvlan->port, skb); + return NET_XMIT_SUCCESS; } skb->dev = ipvlan->phy_dev; @@ -507,7 +533,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev); + struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); if (!port) goto out; @@ -565,7 +591,7 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); if (addr) - ret = ipvlan_rcv_frame(addr, skb, false); + ret = ipvlan_rcv_frame(addr, pskb, false); out: return ret; @@ -581,8 +607,18 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, int addr_type; if (is_multicast_ether_addr(eth->h_dest)) { - if (ipvlan_external_frame(skb, port)) - ipvlan_multicast_frame(port, skb, NULL, false); + if (ipvlan_external_frame(skb, port)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + /* External frames are queued for device local + * distribution, but a copy is given to master + * straight away to avoid sending duplicates later + * when work-queue processes this frame. This is + * achieved by returning RX_HANDLER_PASS. + */ + if (nskb) + ipvlan_multicast_enqueue(port, nskb); + } } else { struct ipvl_addr *addr; @@ -592,7 +628,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); if (addr) - ret = ipvlan_rcv_frame(addr, skb, false); + ret = ipvlan_rcv_frame(addr, pskb, false); } return ret; @@ -617,5 +653,5 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", port->mode); kfree_skb(skb); - return NET_RX_DROP; + return RX_HANDLER_CONSUMED; } diff --git a/kernel/drivers/net/ipvlan/ipvlan_main.c b/kernel/drivers/net/ipvlan/ipvlan_main.c index 77b92a0fe..a9268db4e 100644 --- a/kernel/drivers/net/ipvlan/ipvlan_main.c +++ b/kernel/drivers/net/ipvlan/ipvlan_main.c @@ -54,6 +54,9 @@ static int ipvlan_port_create(struct net_device *dev) for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) INIT_HLIST_HEAD(&port->hlhead[idx]); + skb_queue_head_init(&port->backlog); + INIT_WORK(&port->wq, ipvlan_process_multicast); + err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); if (err) goto err; @@ -72,6 +75,8 @@ static void ipvlan_port_destroy(struct net_device *dev) dev->priv_flags &= ~IFF_IPVLAN_MASTER; netdev_rx_handler_unregister(dev); + cancel_work_sync(&port->wq); + __skb_queue_purge(&port->backlog); kfree_rcu(port, rcu); } @@ -148,10 +153,9 @@ static int ipvlan_open(struct net_device *dev) else dev->flags &= ~IFF_NOARP; - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_add(ipvlan, addr); - } + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_add(ipvlan, addr); + return dev_uc_add(phy_dev, phy_dev->dev_addr); } @@ -166,10 +170,9 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry(addr, &ipvlan->addrs, anode) - ipvlan_ht_addr_del(addr, !dev->dismantle); - } + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_del(addr); + return 0; } @@ -213,17 +216,6 @@ static void ipvlan_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); } -static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set) -{ - struct net_device *dev = ipvlan->dev; - unsigned int hashbit = ipvlan_mac_hash(dev->broadcast); - - if (set && !test_bit(hashbit, ipvlan->mac_filters)) - __set_bit(hashbit, ipvlan->mac_filters); - else if (!set && test_bit(hashbit, ipvlan->mac_filters)) - __clear_bit(hashbit, ipvlan->mac_filters); -} - static void ipvlan_set_multicast_mac_filter(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -238,6 +230,12 @@ static void ipvlan_set_multicast_mac_filter(struct net_device *dev) netdev_for_each_mc_addr(ha, dev) __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); + /* Turn-on broadcast bit irrespective of address family, + * since broadcast is deferred to a work-queue, hence no + * impact on fast-path processing. + */ + __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); + bitmap_copy(ipvlan->mac_filters, mc_filters, IPVLAN_MAC_FILTER_SIZE); } @@ -471,8 +469,6 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->port = port; ipvlan->sfeatures = IPVLAN_FEATURES; INIT_LIST_HEAD(&ipvlan->addrs); - ipvlan->ipv4cnt = 0; - ipvlan->ipv6cnt = 0; /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -508,12 +504,12 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; - if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { - list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { - ipvlan_ht_addr_del(addr, !dev->dismantle); - list_del(&addr->anode); - } + list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { + ipvlan_ht_addr_del(addr); + list_del(&addr->anode); + kfree_rcu(addr, rcu); } + list_del_rcu(&ipvlan->pnode); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(ipvlan->phy_dev, dev); @@ -524,12 +520,11 @@ static void ipvlan_link_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); - dev->priv_flags |= IFF_UNICAST_FLT; + dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; dev->netdev_ops = &ipvlan_netdev_ops; dev->destructor = free_netdev; dev->header_ops = &ipvlan_header_ops; dev->ethtool_ops = &ipvlan_ethtool_ops; - dev->tx_queue_len = 0; } static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = @@ -627,7 +622,7 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); addr->atype = IPVL_IPV6; list_add_tail(&addr->anode, &ipvlan->addrs); - ipvlan->ipv6cnt++; + /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ @@ -645,10 +640,8 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) if (!addr) return; - ipvlan_ht_addr_del(addr, true); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); - ipvlan->ipv6cnt--; - WARN_ON(ipvlan->ipv6cnt < 0); kfree_rcu(addr, rcu); return; @@ -661,6 +654,10 @@ static int ipvlan_addr6_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + /* FIXME IPv6 autoconf calls us from bh without RTNL */ + if (in_softirq()) + return NOTIFY_DONE; + if (!netif_is_ipvlan(dev)) return NOTIFY_DONE; @@ -699,13 +696,12 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); addr->atype = IPVL_IPV4; list_add_tail(&addr->anode, &ipvlan->addrs); - ipvlan->ipv4cnt++; + /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ if (netif_running(ipvlan->dev)) ipvlan_ht_addr_add(ipvlan, addr); - ipvlan_set_broadcast_mac_filter(ipvlan, true); return 0; } @@ -718,12 +714,8 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) if (!addr) return; - ipvlan_ht_addr_del(addr, true); + ipvlan_ht_addr_del(addr); list_del(&addr->anode); - ipvlan->ipv4cnt--; - WARN_ON(ipvlan->ipv4cnt < 0); - if (!ipvlan->ipv4cnt) - ipvlan_set_broadcast_mac_filter(ipvlan, false); kfree_rcu(addr, rcu); return; |