diff options
Diffstat (limited to 'kernel/drivers/net/team')
-rw-r--r-- | kernel/drivers/net/team/Kconfig | 76 | ||||
-rw-r--r-- | kernel/drivers/net/team/Makefile | 10 | ||||
-rw-r--r-- | kernel/drivers/net/team/team.c | 2943 | ||||
-rw-r--r-- | kernel/drivers/net/team/team_mode_activebackup.c | 148 | ||||
-rw-r--r-- | kernel/drivers/net/team/team_mode_broadcast.c | 77 | ||||
-rw-r--r-- | kernel/drivers/net/team/team_mode_loadbalance.c | 682 | ||||
-rw-r--r-- | kernel/drivers/net/team/team_mode_random.c | 67 | ||||
-rw-r--r-- | kernel/drivers/net/team/team_mode_roundrobin.c | 79 |
8 files changed, 4082 insertions, 0 deletions
diff --git a/kernel/drivers/net/team/Kconfig b/kernel/drivers/net/team/Kconfig new file mode 100644 index 000000000..c853d84fd --- /dev/null +++ b/kernel/drivers/net/team/Kconfig @@ -0,0 +1,76 @@ +menuconfig NET_TEAM + tristate "Ethernet team driver support" + ---help--- + This allows one to create virtual interfaces that teams together + multiple ethernet devices. + + Team devices can be added using the "ip" command from the + iproute2 package: + + "ip link add link [ address MAC ] [ NAME ] type team" + + To compile this driver as a module, choose M here: the module + will be called team. + +if NET_TEAM + +config NET_TEAM_MODE_BROADCAST + tristate "Broadcast mode support" + depends on NET_TEAM + ---help--- + Basic mode where packets are transmitted always by all suitable ports. + + All added ports are setup to have team's device address. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_broadcast. + +config NET_TEAM_MODE_ROUNDROBIN + tristate "Round-robin mode support" + depends on NET_TEAM + ---help--- + Basic mode where port used for transmitting packets is selected in + round-robin fashion using packet counter. + + All added ports are setup to have team's device address. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_roundrobin. + +config NET_TEAM_MODE_RANDOM + tristate "Random mode support" + depends on NET_TEAM + ---help--- + Basic mode where port used for transmitting packets is selected + randomly. + + All added ports are setup to have team's device address. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_random. + +config NET_TEAM_MODE_ACTIVEBACKUP + tristate "Active-backup mode support" + depends on NET_TEAM + ---help--- + Only one port is active at a time and the rest of ports are used + for backup. + + Mac addresses of ports are not modified. Userspace is responsible + to do so. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_activebackup. + +config NET_TEAM_MODE_LOADBALANCE + tristate "Load-balance mode support" + depends on NET_TEAM + ---help--- + This mode provides load balancing functionality. Tx port selection + is done using BPF function set up from userspace (bpf_hash_func + option) + + To compile this team mode as a module, choose M here: the module + will be called team_mode_loadbalance. + +endif # NET_TEAM diff --git a/kernel/drivers/net/team/Makefile b/kernel/drivers/net/team/Makefile new file mode 100644 index 000000000..c57e85889 --- /dev/null +++ b/kernel/drivers/net/team/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for the network team driver +# + +obj-$(CONFIG_NET_TEAM) += team.o +obj-$(CONFIG_NET_TEAM_MODE_BROADCAST) += team_mode_broadcast.o +obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o +obj-$(CONFIG_NET_TEAM_MODE_RANDOM) += team_mode_random.o +obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o +obj-$(CONFIG_NET_TEAM_MODE_LOADBALANCE) += team_mode_loadbalance.o diff --git a/kernel/drivers/net/team/team.c b/kernel/drivers/net/team/team.c new file mode 100644 index 000000000..6928448f6 --- /dev/null +++ b/kernel/drivers/net/team/team.c @@ -0,0 +1,2943 @@ +/* + * drivers/net/team/team.c - Network team device driver + * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/rcupdate.h> +#include <linux/errno.h> +#include <linux/ctype.h> +#include <linux/notifier.h> +#include <linux/netdevice.h> +#include <linux/netpoll.h> +#include <linux/if_vlan.h> +#include <linux/if_arp.h> +#include <linux/socket.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <net/rtnetlink.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <net/sch_generic.h> +#include <net/switchdev.h> +#include <generated/utsrelease.h> +#include <linux/if_team.h> + +#define DRV_NAME "team" + + +/********** + * Helpers + **********/ + +#define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT) + +static struct team_port *team_port_get_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler_data); +} + +static struct team_port *team_port_get_rtnl(const struct net_device *dev) +{ + struct team_port *port = rtnl_dereference(dev->rx_handler_data); + + return team_port_exists(dev) ? port : NULL; +} + +/* + * Since the ability to change device address for open port device is tested in + * team_port_add, this function can be called without control of return value + */ +static int __set_port_dev_addr(struct net_device *port_dev, + const unsigned char *dev_addr) +{ + struct sockaddr addr; + + memcpy(addr.sa_data, dev_addr, port_dev->addr_len); + addr.sa_family = port_dev->type; + return dev_set_mac_address(port_dev, &addr); +} + +static int team_port_set_orig_dev_addr(struct team_port *port) +{ + return __set_port_dev_addr(port->dev, port->orig.dev_addr); +} + +static int team_port_set_team_dev_addr(struct team *team, + struct team_port *port) +{ + return __set_port_dev_addr(port->dev, team->dev->dev_addr); +} + +int team_modeop_port_enter(struct team *team, struct team_port *port) +{ + return team_port_set_team_dev_addr(team, port); +} +EXPORT_SYMBOL(team_modeop_port_enter); + +void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port) +{ + team_port_set_team_dev_addr(team, port); +} +EXPORT_SYMBOL(team_modeop_port_change_dev_addr); + +static void team_refresh_port_linkup(struct team_port *port) +{ + port->linkup = port->user.linkup_enabled ? port->user.linkup : + port->state.linkup; +} + + +/******************* + * Options handling + *******************/ + +struct team_option_inst { /* One for each option instance */ + struct list_head list; + struct list_head tmp_list; + struct team_option *option; + struct team_option_inst_info info; + bool changed; + bool removed; +}; + +static struct team_option *__team_find_option(struct team *team, + const char *opt_name) +{ + struct team_option *option; + + list_for_each_entry(option, &team->option_list, list) { + if (strcmp(option->name, opt_name) == 0) + return option; + } + return NULL; +} + +static void __team_option_inst_del(struct team_option_inst *opt_inst) +{ + list_del(&opt_inst->list); + kfree(opt_inst); +} + +static void __team_option_inst_del_option(struct team *team, + struct team_option *option) +{ + struct team_option_inst *opt_inst, *tmp; + + list_for_each_entry_safe(opt_inst, tmp, &team->option_inst_list, list) { + if (opt_inst->option == option) + __team_option_inst_del(opt_inst); + } +} + +static int __team_option_inst_add(struct team *team, struct team_option *option, + struct team_port *port) +{ + struct team_option_inst *opt_inst; + unsigned int array_size; + unsigned int i; + int err; + + array_size = option->array_size; + if (!array_size) + array_size = 1; /* No array but still need one instance */ + + for (i = 0; i < array_size; i++) { + opt_inst = kmalloc(sizeof(*opt_inst), GFP_KERNEL); + if (!opt_inst) + return -ENOMEM; + opt_inst->option = option; + opt_inst->info.port = port; + opt_inst->info.array_index = i; + opt_inst->changed = true; + opt_inst->removed = false; + list_add_tail(&opt_inst->list, &team->option_inst_list); + if (option->init) { + err = option->init(team, &opt_inst->info); + if (err) + return err; + } + + } + return 0; +} + +static int __team_option_inst_add_option(struct team *team, + struct team_option *option) +{ + int err; + + if (!option->per_port) { + err = __team_option_inst_add(team, option, NULL); + if (err) + goto inst_del_option; + } + return 0; + +inst_del_option: + __team_option_inst_del_option(team, option); + return err; +} + +static void __team_option_inst_mark_removed_option(struct team *team, + struct team_option *option) +{ + struct team_option_inst *opt_inst; + + list_for_each_entry(opt_inst, &team->option_inst_list, list) { + if (opt_inst->option == option) { + opt_inst->changed = true; + opt_inst->removed = true; + } + } +} + +static void __team_option_inst_del_port(struct team *team, + struct team_port *port) +{ + struct team_option_inst *opt_inst, *tmp; + + list_for_each_entry_safe(opt_inst, tmp, &team->option_inst_list, list) { + if (opt_inst->option->per_port && + opt_inst->info.port == port) + __team_option_inst_del(opt_inst); + } +} + +static int __team_option_inst_add_port(struct team *team, + struct team_port *port) +{ + struct team_option *option; + int err; + + list_for_each_entry(option, &team->option_list, list) { + if (!option->per_port) + continue; + err = __team_option_inst_add(team, option, port); + if (err) + goto inst_del_port; + } + return 0; + +inst_del_port: + __team_option_inst_del_port(team, port); + return err; +} + +static void __team_option_inst_mark_removed_port(struct team *team, + struct team_port *port) +{ + struct team_option_inst *opt_inst; + + list_for_each_entry(opt_inst, &team->option_inst_list, list) { + if (opt_inst->info.port == port) { + opt_inst->changed = true; + opt_inst->removed = true; + } + } +} + +static int __team_options_register(struct team *team, + const struct team_option *option, + size_t option_count) +{ + int i; + struct team_option **dst_opts; + int err; + + dst_opts = kzalloc(sizeof(struct team_option *) * option_count, + GFP_KERNEL); + if (!dst_opts) + return -ENOMEM; + for (i = 0; i < option_count; i++, option++) { + if (__team_find_option(team, option->name)) { + err = -EEXIST; + goto alloc_rollback; + } + dst_opts[i] = kmemdup(option, sizeof(*option), GFP_KERNEL); + if (!dst_opts[i]) { + err = -ENOMEM; + goto alloc_rollback; + } + } + + for (i = 0; i < option_count; i++) { + err = __team_option_inst_add_option(team, dst_opts[i]); + if (err) + goto inst_rollback; + list_add_tail(&dst_opts[i]->list, &team->option_list); + } + + kfree(dst_opts); + return 0; + +inst_rollback: + for (i--; i >= 0; i--) + __team_option_inst_del_option(team, dst_opts[i]); + + i = option_count - 1; +alloc_rollback: + for (i--; i >= 0; i--) + kfree(dst_opts[i]); + + kfree(dst_opts); + return err; +} + +static void __team_options_mark_removed(struct team *team, + const struct team_option *option, + size_t option_count) +{ + int i; + + for (i = 0; i < option_count; i++, option++) { + struct team_option *del_opt; + + del_opt = __team_find_option(team, option->name); + if (del_opt) + __team_option_inst_mark_removed_option(team, del_opt); + } +} + +static void __team_options_unregister(struct team *team, + const struct team_option *option, + size_t option_count) +{ + int i; + + for (i = 0; i < option_count; i++, option++) { + struct team_option *del_opt; + + del_opt = __team_find_option(team, option->name); + if (del_opt) { + __team_option_inst_del_option(team, del_opt); + list_del(&del_opt->list); + kfree(del_opt); + } + } +} + +static void __team_options_change_check(struct team *team); + +int team_options_register(struct team *team, + const struct team_option *option, + size_t option_count) +{ + int err; + + err = __team_options_register(team, option, option_count); + if (err) + return err; + __team_options_change_check(team); + return 0; +} +EXPORT_SYMBOL(team_options_register); + +void team_options_unregister(struct team *team, + const struct team_option *option, + size_t option_count) +{ + __team_options_mark_removed(team, option, option_count); + __team_options_change_check(team); + __team_options_unregister(team, option, option_count); +} +EXPORT_SYMBOL(team_options_unregister); + +static int team_option_get(struct team *team, + struct team_option_inst *opt_inst, + struct team_gsetter_ctx *ctx) +{ + if (!opt_inst->option->getter) + return -EOPNOTSUPP; + return opt_inst->option->getter(team, ctx); +} + +static int team_option_set(struct team *team, + struct team_option_inst *opt_inst, + struct team_gsetter_ctx *ctx) +{ + if (!opt_inst->option->setter) + return -EOPNOTSUPP; + return opt_inst->option->setter(team, ctx); +} + +void team_option_inst_set_change(struct team_option_inst_info *opt_inst_info) +{ + struct team_option_inst *opt_inst; + + opt_inst = container_of(opt_inst_info, struct team_option_inst, info); + opt_inst->changed = true; +} +EXPORT_SYMBOL(team_option_inst_set_change); + +void team_options_change_check(struct team *team) +{ + __team_options_change_check(team); +} +EXPORT_SYMBOL(team_options_change_check); + + +/**************** + * Mode handling + ****************/ + +static LIST_HEAD(mode_list); +static DEFINE_SPINLOCK(mode_list_lock); + +struct team_mode_item { + struct list_head list; + const struct team_mode *mode; +}; + +static struct team_mode_item *__find_mode(const char *kind) +{ + struct team_mode_item *mitem; + + list_for_each_entry(mitem, &mode_list, list) { + if (strcmp(mitem->mode->kind, kind) == 0) + return mitem; + } + return NULL; +} + +static bool is_good_mode_name(const char *name) +{ + while (*name != '\0') { + if (!isalpha(*name) && !isdigit(*name) && *name != '_') + return false; + name++; + } + return true; +} + +int team_mode_register(const struct team_mode *mode) +{ + int err = 0; + struct team_mode_item *mitem; + + if (!is_good_mode_name(mode->kind) || + mode->priv_size > TEAM_MODE_PRIV_SIZE) + return -EINVAL; + + mitem = kmalloc(sizeof(*mitem), GFP_KERNEL); + if (!mitem) + return -ENOMEM; + + spin_lock(&mode_list_lock); + if (__find_mode(mode->kind)) { + err = -EEXIST; + kfree(mitem); + goto unlock; + } + mitem->mode = mode; + list_add_tail(&mitem->list, &mode_list); +unlock: + spin_unlock(&mode_list_lock); + return err; +} +EXPORT_SYMBOL(team_mode_register); + +void team_mode_unregister(const struct team_mode *mode) +{ + struct team_mode_item *mitem; + + spin_lock(&mode_list_lock); + mitem = __find_mode(mode->kind); + if (mitem) { + list_del_init(&mitem->list); + kfree(mitem); + } + spin_unlock(&mode_list_lock); +} +EXPORT_SYMBOL(team_mode_unregister); + +static const struct team_mode *team_mode_get(const char *kind) +{ + struct team_mode_item *mitem; + const struct team_mode *mode = NULL; + + spin_lock(&mode_list_lock); + mitem = __find_mode(kind); + if (!mitem) { + spin_unlock(&mode_list_lock); + request_module("team-mode-%s", kind); + spin_lock(&mode_list_lock); + mitem = __find_mode(kind); + } + if (mitem) { + mode = mitem->mode; + if (!try_module_get(mode->owner)) + mode = NULL; + } + + spin_unlock(&mode_list_lock); + return mode; +} + +static void team_mode_put(const struct team_mode *mode) +{ + module_put(mode->owner); +} + +static bool team_dummy_transmit(struct team *team, struct sk_buff *skb) +{ + dev_kfree_skb_any(skb); + return false; +} + +static rx_handler_result_t team_dummy_receive(struct team *team, + struct team_port *port, + struct sk_buff *skb) +{ + return RX_HANDLER_ANOTHER; +} + +static const struct team_mode __team_no_mode = { + .kind = "*NOMODE*", +}; + +static bool team_is_mode_set(struct team *team) +{ + return team->mode != &__team_no_mode; +} + +static void team_set_no_mode(struct team *team) +{ + team->user_carrier_enabled = false; + team->mode = &__team_no_mode; +} + +static void team_adjust_ops(struct team *team) +{ + /* + * To avoid checks in rx/tx skb paths, ensure here that non-null and + * correct ops are always set. + */ + + if (!team->en_port_count || !team_is_mode_set(team) || + !team->mode->ops->transmit) + team->ops.transmit = team_dummy_transmit; + else + team->ops.transmit = team->mode->ops->transmit; + + if (!team->en_port_count || !team_is_mode_set(team) || + !team->mode->ops->receive) + team->ops.receive = team_dummy_receive; + else + team->ops.receive = team->mode->ops->receive; +} + +/* + * We can benefit from the fact that it's ensured no port is present + * at the time of mode change. Therefore no packets are in fly so there's no + * need to set mode operations in any special way. + */ +static int __team_change_mode(struct team *team, + const struct team_mode *new_mode) +{ + /* Check if mode was previously set and do cleanup if so */ + if (team_is_mode_set(team)) { + void (*exit_op)(struct team *team) = team->ops.exit; + + /* Clear ops area so no callback is called any longer */ + memset(&team->ops, 0, sizeof(struct team_mode_ops)); + team_adjust_ops(team); + + if (exit_op) + exit_op(team); + team_mode_put(team->mode); + team_set_no_mode(team); + /* zero private data area */ + memset(&team->mode_priv, 0, + sizeof(struct team) - offsetof(struct team, mode_priv)); + } + + if (!new_mode) + return 0; + + if (new_mode->ops->init) { + int err; + + err = new_mode->ops->init(team); + if (err) + return err; + } + + team->mode = new_mode; + memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops)); + team_adjust_ops(team); + + return 0; +} + +static int team_change_mode(struct team *team, const char *kind) +{ + const struct team_mode *new_mode; + struct net_device *dev = team->dev; + int err; + + if (!list_empty(&team->port_list)) { + netdev_err(dev, "No ports can be present during mode change\n"); + return -EBUSY; + } + + if (team_is_mode_set(team) && strcmp(team->mode->kind, kind) == 0) { + netdev_err(dev, "Unable to change to the same mode the team is in\n"); + return -EINVAL; + } + + new_mode = team_mode_get(kind); + if (!new_mode) { + netdev_err(dev, "Mode \"%s\" not found\n", kind); + return -EINVAL; + } + + err = __team_change_mode(team, new_mode); + if (err) { + netdev_err(dev, "Failed to change to mode \"%s\"\n", kind); + team_mode_put(new_mode); + return err; + } + + netdev_info(dev, "Mode changed to \"%s\"\n", kind); + return 0; +} + + +/********************* + * Peers notification + *********************/ + +static void team_notify_peers_work(struct work_struct *work) +{ + struct team *team; + int val; + + team = container_of(work, struct team, notify_peers.dw.work); + + if (!rtnl_trylock()) { + schedule_delayed_work(&team->notify_peers.dw, 0); + return; + } + val = atomic_dec_if_positive(&team->notify_peers.count_pending); + if (val < 0) { + rtnl_unlock(); + return; + } + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev); + rtnl_unlock(); + if (val) + schedule_delayed_work(&team->notify_peers.dw, + msecs_to_jiffies(team->notify_peers.interval)); +} + +static void team_notify_peers(struct team *team) +{ + if (!team->notify_peers.count || !netif_running(team->dev)) + return; + atomic_add(team->notify_peers.count, &team->notify_peers.count_pending); + schedule_delayed_work(&team->notify_peers.dw, 0); +} + +static void team_notify_peers_init(struct team *team) +{ + INIT_DELAYED_WORK(&team->notify_peers.dw, team_notify_peers_work); +} + +static void team_notify_peers_fini(struct team *team) +{ + cancel_delayed_work_sync(&team->notify_peers.dw); +} + + +/******************************* + * Send multicast group rejoins + *******************************/ + +static void team_mcast_rejoin_work(struct work_struct *work) +{ + struct team *team; + int val; + + team = container_of(work, struct team, mcast_rejoin.dw.work); + + if (!rtnl_trylock()) { + schedule_delayed_work(&team->mcast_rejoin.dw, 0); + return; + } + val = atomic_dec_if_positive(&team->mcast_rejoin.count_pending); + if (val < 0) { + rtnl_unlock(); + return; + } + call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev); + rtnl_unlock(); + if (val) + schedule_delayed_work(&team->mcast_rejoin.dw, + msecs_to_jiffies(team->mcast_rejoin.interval)); +} + +static void team_mcast_rejoin(struct team *team) +{ + if (!team->mcast_rejoin.count || !netif_running(team->dev)) + return; + atomic_add(team->mcast_rejoin.count, &team->mcast_rejoin.count_pending); + schedule_delayed_work(&team->mcast_rejoin.dw, 0); +} + +static void team_mcast_rejoin_init(struct team *team) +{ + INIT_DELAYED_WORK(&team->mcast_rejoin.dw, team_mcast_rejoin_work); +} + +static void team_mcast_rejoin_fini(struct team *team) +{ + cancel_delayed_work_sync(&team->mcast_rejoin.dw); +} + + +/************************ + * Rx path frame handler + ************************/ + +/* note: already called with rcu_read_lock */ +static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct team_port *port; + struct team *team; + rx_handler_result_t res; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return RX_HANDLER_CONSUMED; + + *pskb = skb; + + port = team_port_get_rcu(skb->dev); + team = port->team; + if (!team_port_enabled(port)) { + /* allow exact match delivery for disabled ports */ + res = RX_HANDLER_EXACT; + } else { + res = team->ops.receive(team, port, skb); + } + if (res == RX_HANDLER_ANOTHER) { + struct team_pcpu_stats *pcpu_stats; + + pcpu_stats = this_cpu_ptr(team->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + if (skb->pkt_type == PACKET_MULTICAST) + pcpu_stats->rx_multicast++; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->dev = team->dev; + } else { + this_cpu_inc(team->pcpu_stats->rx_dropped); + } + + return res; +} + + +/************************************* + * Multiqueue Tx port select override + *************************************/ + +static int team_queue_override_init(struct team *team) +{ + struct list_head *listarr; + unsigned int queue_cnt = team->dev->num_tx_queues - 1; + unsigned int i; + + if (!queue_cnt) + return 0; + listarr = kmalloc(sizeof(struct list_head) * queue_cnt, GFP_KERNEL); + if (!listarr) + return -ENOMEM; + team->qom_lists = listarr; + for (i = 0; i < queue_cnt; i++) + INIT_LIST_HEAD(listarr++); + return 0; +} + +static void team_queue_override_fini(struct team *team) +{ + kfree(team->qom_lists); +} + +static struct list_head *__team_get_qom_list(struct team *team, u16 queue_id) +{ + return &team->qom_lists[queue_id - 1]; +} + +/* + * note: already called with rcu_read_lock + */ +static bool team_queue_override_transmit(struct team *team, struct sk_buff *skb) +{ + struct list_head *qom_list; + struct team_port *port; + + if (!team->queue_override_enabled || !skb->queue_mapping) + return false; + qom_list = __team_get_qom_list(team, skb->queue_mapping); + list_for_each_entry_rcu(port, qom_list, qom_list) { + if (!team_dev_queue_xmit(team, port, skb)) + return true; + } + return false; +} + +static void __team_queue_override_port_del(struct team *team, + struct team_port *port) +{ + if (!port->queue_id) + return; + list_del_rcu(&port->qom_list); +} + +static bool team_queue_override_port_has_gt_prio_than(struct team_port *port, + struct team_port *cur) +{ + if (port->priority < cur->priority) + return true; + if (port->priority > cur->priority) + return false; + if (port->index < cur->index) + return true; + return false; +} + +static void __team_queue_override_port_add(struct team *team, + struct team_port *port) +{ + struct team_port *cur; + struct list_head *qom_list; + struct list_head *node; + + if (!port->queue_id) + return; + qom_list = __team_get_qom_list(team, port->queue_id); + node = qom_list; + list_for_each_entry(cur, qom_list, qom_list) { + if (team_queue_override_port_has_gt_prio_than(port, cur)) + break; + node = &cur->qom_list; + } + list_add_tail_rcu(&port->qom_list, node); +} + +static void __team_queue_override_enabled_check(struct team *team) +{ + struct team_port *port; + bool enabled = false; + + list_for_each_entry(port, &team->port_list, list) { + if (port->queue_id) { + enabled = true; + break; + } + } + if (enabled == team->queue_override_enabled) + return; + netdev_dbg(team->dev, "%s queue override\n", + enabled ? "Enabling" : "Disabling"); + team->queue_override_enabled = enabled; +} + +static void team_queue_override_port_prio_changed(struct team *team, + struct team_port *port) +{ + if (!port->queue_id || team_port_enabled(port)) + return; + __team_queue_override_port_del(team, port); + __team_queue_override_port_add(team, port); + __team_queue_override_enabled_check(team); +} + +static void team_queue_override_port_change_queue_id(struct team *team, + struct team_port *port, + u16 new_queue_id) +{ + if (team_port_enabled(port)) { + __team_queue_override_port_del(team, port); + port->queue_id = new_queue_id; + __team_queue_override_port_add(team, port); + __team_queue_override_enabled_check(team); + } else { + port->queue_id = new_queue_id; + } +} + +static void team_queue_override_port_add(struct team *team, + struct team_port *port) +{ + __team_queue_override_port_add(team, port); + __team_queue_override_enabled_check(team); +} + +static void team_queue_override_port_del(struct team *team, + struct team_port *port) +{ + __team_queue_override_port_del(team, port); + __team_queue_override_enabled_check(team); +} + + +/**************** + * Port handling + ****************/ + +static bool team_port_find(const struct team *team, + const struct team_port *port) +{ + struct team_port *cur; + + list_for_each_entry(cur, &team->port_list, list) + if (cur == port) + return true; + return false; +} + +/* + * Enable/disable port by adding to enabled port hashlist and setting + * port->index (Might be racy so reader could see incorrect ifindex when + * processing a flying packet, but that is not a problem). Write guarded + * by team->lock. + */ +static void team_port_enable(struct team *team, + struct team_port *port) +{ + if (team_port_enabled(port)) + return; + port->index = team->en_port_count++; + hlist_add_head_rcu(&port->hlist, + team_port_index_hash(team, port->index)); + team_adjust_ops(team); + team_queue_override_port_add(team, port); + if (team->ops.port_enabled) + team->ops.port_enabled(team, port); + team_notify_peers(team); + team_mcast_rejoin(team); +} + +static void __reconstruct_port_hlist(struct team *team, int rm_index) +{ + int i; + struct team_port *port; + + for (i = rm_index + 1; i < team->en_port_count; i++) { + port = team_get_port_by_index(team, i); + hlist_del_rcu(&port->hlist); + port->index--; + hlist_add_head_rcu(&port->hlist, + team_port_index_hash(team, port->index)); + } +} + +static void team_port_disable(struct team *team, + struct team_port *port) +{ + if (!team_port_enabled(port)) + return; + if (team->ops.port_disabled) + team->ops.port_disabled(team, port); + hlist_del_rcu(&port->hlist); + __reconstruct_port_hlist(team, port->index); + port->index = -1; + team->en_port_count--; + team_queue_override_port_del(team, port); + team_adjust_ops(team); + team_notify_peers(team); + team_mcast_rejoin(team); +} + +#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_HIGHDMA | NETIF_F_LRO) + +static void __team_compute_features(struct team *team) +{ + struct team_port *port; + u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL; + unsigned short max_hard_header_len = ETH_HLEN; + unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | + IFF_XMIT_DST_RELEASE_PERM; + + list_for_each_entry(port, &team->port_list, list) { + vlan_features = netdev_increment_features(vlan_features, + port->dev->vlan_features, + TEAM_VLAN_FEATURES); + + dst_release_flag &= port->dev->priv_flags; + if (port->dev->hard_header_len > max_hard_header_len) + max_hard_header_len = port->dev->hard_header_len; + } + + team->dev->vlan_features = vlan_features; + team->dev->hard_header_len = max_hard_header_len; + + team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) + team->dev->priv_flags |= IFF_XMIT_DST_RELEASE; + + netdev_change_features(team->dev); +} + +static void team_compute_features(struct team *team) +{ + mutex_lock(&team->lock); + __team_compute_features(team); + mutex_unlock(&team->lock); +} + +static int team_port_enter(struct team *team, struct team_port *port) +{ + int err = 0; + + dev_hold(team->dev); + if (team->ops.port_enter) { + err = team->ops.port_enter(team, port); + if (err) { + netdev_err(team->dev, "Device %s failed to enter team mode\n", + port->dev->name); + goto err_port_enter; + } + } + + return 0; + +err_port_enter: + dev_put(team->dev); + + return err; +} + +static void team_port_leave(struct team *team, struct team_port *port) +{ + if (team->ops.port_leave) + team->ops.port_leave(team, port); + dev_put(team->dev); +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static int team_port_enable_netpoll(struct team *team, struct team_port *port) +{ + struct netpoll *np; + int err; + + if (!team->dev->npinfo) + return 0; + + np = kzalloc(sizeof(*np), GFP_KERNEL); + if (!np) + return -ENOMEM; + + err = __netpoll_setup(np, port->dev); + if (err) { + kfree(np); + return err; + } + port->np = np; + return err; +} + +static void team_port_disable_netpoll(struct team_port *port) +{ + struct netpoll *np = port->np; + + if (!np) + return; + port->np = NULL; + + /* Wait for transmitting packets to finish before freeing. */ + synchronize_rcu_bh(); + __netpoll_cleanup(np); + kfree(np); +} +#else +static int team_port_enable_netpoll(struct team *team, struct team_port *port) +{ + return 0; +} +static void team_port_disable_netpoll(struct team_port *port) +{ +} +#endif + +static int team_upper_dev_link(struct net_device *dev, + struct net_device *port_dev) +{ + int err; + + err = netdev_master_upper_dev_link(port_dev, dev); + if (err) + return err; + port_dev->priv_flags |= IFF_TEAM_PORT; + return 0; +} + +static void team_upper_dev_unlink(struct net_device *dev, + struct net_device *port_dev) +{ + netdev_upper_dev_unlink(port_dev, dev); + port_dev->priv_flags &= ~IFF_TEAM_PORT; +} + +static void __team_port_change_port_added(struct team_port *port, bool linkup); +static int team_dev_type_check_change(struct net_device *dev, + struct net_device *port_dev); + +static int team_port_add(struct team *team, struct net_device *port_dev) +{ + struct net_device *dev = team->dev; + struct team_port *port; + char *portname = port_dev->name; + int err; + + if (port_dev->flags & IFF_LOOPBACK) { + netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n", + portname); + return -EINVAL; + } + + if (team_port_exists(port_dev)) { + netdev_err(dev, "Device %s is already a port " + "of a team device\n", portname); + return -EBUSY; + } + + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && + vlan_uses_dev(dev)) { + netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", + portname); + return -EPERM; + } + + err = team_dev_type_check_change(dev, port_dev); + if (err) + return err; + + if (port_dev->flags & IFF_UP) { + netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n", + portname); + return -EBUSY; + } + + port = kzalloc(sizeof(struct team_port) + team->mode->port_priv_size, + GFP_KERNEL); + if (!port) + return -ENOMEM; + + port->dev = port_dev; + port->team = team; + INIT_LIST_HEAD(&port->qom_list); + + port->orig.mtu = port_dev->mtu; + err = dev_set_mtu(port_dev, dev->mtu); + if (err) { + netdev_dbg(dev, "Error %d calling dev_set_mtu\n", err); + goto err_set_mtu; + } + + memcpy(port->orig.dev_addr, port_dev->dev_addr, port_dev->addr_len); + + err = team_port_enter(team, port); + if (err) { + netdev_err(dev, "Device %s failed to enter team mode\n", + portname); + goto err_port_enter; + } + + err = dev_open(port_dev); + if (err) { + netdev_dbg(dev, "Device %s opening failed\n", + portname); + goto err_dev_open; + } + + err = vlan_vids_add_by_dev(port_dev, dev); + if (err) { + netdev_err(dev, "Failed to add vlan ids to device %s\n", + portname); + goto err_vids_add; + } + + err = team_port_enable_netpoll(team, port); + if (err) { + netdev_err(dev, "Failed to enable netpoll on device %s\n", + portname); + goto err_enable_netpoll; + } + + if (!(dev->features & NETIF_F_LRO)) + dev_disable_lro(port_dev); + + err = netdev_rx_handler_register(port_dev, team_handle_frame, + port); + if (err) { + netdev_err(dev, "Device %s failed to register rx_handler\n", + portname); + goto err_handler_register; + } + + err = team_upper_dev_link(dev, port_dev); + if (err) { + netdev_err(dev, "Device %s failed to set upper link\n", + portname); + goto err_set_upper_link; + } + + err = __team_option_inst_add_port(team, port); + if (err) { + netdev_err(dev, "Device %s failed to add per-port options\n", + portname); + goto err_option_port_add; + } + + port->index = -1; + list_add_tail_rcu(&port->list, &team->port_list); + team_port_enable(team, port); + __team_compute_features(team); + __team_port_change_port_added(port, !!netif_carrier_ok(port_dev)); + __team_options_change_check(team); + + netdev_info(dev, "Port device %s added\n", portname); + + return 0; + +err_option_port_add: + team_upper_dev_unlink(dev, port_dev); + +err_set_upper_link: + netdev_rx_handler_unregister(port_dev); + +err_handler_register: + team_port_disable_netpoll(port); + +err_enable_netpoll: + vlan_vids_del_by_dev(port_dev, dev); + +err_vids_add: + dev_close(port_dev); + +err_dev_open: + team_port_leave(team, port); + team_port_set_orig_dev_addr(port); + +err_port_enter: + dev_set_mtu(port_dev, port->orig.mtu); + +err_set_mtu: + kfree(port); + + return err; +} + +static void __team_port_change_port_removed(struct team_port *port); + +static int team_port_del(struct team *team, struct net_device *port_dev) +{ + struct net_device *dev = team->dev; + struct team_port *port; + char *portname = port_dev->name; + + port = team_port_get_rtnl(port_dev); + if (!port || !team_port_find(team, port)) { + netdev_err(dev, "Device %s does not act as a port of this team\n", + portname); + return -ENOENT; + } + + team_port_disable(team, port); + list_del_rcu(&port->list); + team_upper_dev_unlink(dev, port_dev); + netdev_rx_handler_unregister(port_dev); + team_port_disable_netpoll(port); + vlan_vids_del_by_dev(port_dev, dev); + dev_uc_unsync(port_dev, dev); + dev_mc_unsync(port_dev, dev); + dev_close(port_dev); + team_port_leave(team, port); + + __team_option_inst_mark_removed_port(team, port); + __team_options_change_check(team); + __team_option_inst_del_port(team, port); + __team_port_change_port_removed(port); + + team_port_set_orig_dev_addr(port); + dev_set_mtu(port_dev, port->orig.mtu); + kfree_rcu(port, rcu); + netdev_info(dev, "Port device %s removed\n", portname); + __team_compute_features(team); + + return 0; +} + + +/***************** + * Net device ops + *****************/ + +static int team_mode_option_get(struct team *team, struct team_gsetter_ctx *ctx) +{ + ctx->data.str_val = team->mode->kind; + return 0; +} + +static int team_mode_option_set(struct team *team, struct team_gsetter_ctx *ctx) +{ + return team_change_mode(team, ctx->data.str_val); +} + +static int team_notify_peers_count_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + ctx->data.u32_val = team->notify_peers.count; + return 0; +} + +static int team_notify_peers_count_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + team->notify_peers.count = ctx->data.u32_val; + return 0; +} + +static int team_notify_peers_interval_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + ctx->data.u32_val = team->notify_peers.interval; + return 0; +} + +static int team_notify_peers_interval_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + team->notify_peers.interval = ctx->data.u32_val; + return 0; +} + +static int team_mcast_rejoin_count_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + ctx->data.u32_val = team->mcast_rejoin.count; + return 0; +} + +static int team_mcast_rejoin_count_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + team->mcast_rejoin.count = ctx->data.u32_val; + return 0; +} + +static int team_mcast_rejoin_interval_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + ctx->data.u32_val = team->mcast_rejoin.interval; + return 0; +} + +static int team_mcast_rejoin_interval_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + team->mcast_rejoin.interval = ctx->data.u32_val; + return 0; +} + +static int team_port_en_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.bool_val = team_port_enabled(port); + return 0; +} + +static int team_port_en_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + if (ctx->data.bool_val) + team_port_enable(team, port); + else + team_port_disable(team, port); + return 0; +} + +static int team_user_linkup_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.bool_val = port->user.linkup; + return 0; +} + +static void __team_carrier_check(struct team *team); + +static int team_user_linkup_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + port->user.linkup = ctx->data.bool_val; + team_refresh_port_linkup(port); + __team_carrier_check(port->team); + return 0; +} + +static int team_user_linkup_en_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.bool_val = port->user.linkup_enabled; + return 0; +} + +static int team_user_linkup_en_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + port->user.linkup_enabled = ctx->data.bool_val; + team_refresh_port_linkup(port); + __team_carrier_check(port->team); + return 0; +} + +static int team_priority_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.s32_val = port->priority; + return 0; +} + +static int team_priority_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + s32 priority = ctx->data.s32_val; + + if (port->priority == priority) + return 0; + port->priority = priority; + team_queue_override_port_prio_changed(team, port); + return 0; +} + +static int team_queue_id_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.u32_val = port->queue_id; + return 0; +} + +static int team_queue_id_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + u16 new_queue_id = ctx->data.u32_val; + + if (port->queue_id == new_queue_id) + return 0; + if (new_queue_id >= team->dev->real_num_tx_queues) + return -EINVAL; + team_queue_override_port_change_queue_id(team, port, new_queue_id); + return 0; +} + +static const struct team_option team_options[] = { + { + .name = "mode", + .type = TEAM_OPTION_TYPE_STRING, + .getter = team_mode_option_get, + .setter = team_mode_option_set, + }, + { + .name = "notify_peers_count", + .type = TEAM_OPTION_TYPE_U32, + .getter = team_notify_peers_count_get, + .setter = team_notify_peers_count_set, + }, + { + .name = "notify_peers_interval", + .type = TEAM_OPTION_TYPE_U32, + .getter = team_notify_peers_interval_get, + .setter = team_notify_peers_interval_set, + }, + { + .name = "mcast_rejoin_count", + .type = TEAM_OPTION_TYPE_U32, + .getter = team_mcast_rejoin_count_get, + .setter = team_mcast_rejoin_count_set, + }, + { + .name = "mcast_rejoin_interval", + .type = TEAM_OPTION_TYPE_U32, + .getter = team_mcast_rejoin_interval_get, + .setter = team_mcast_rejoin_interval_set, + }, + { + .name = "enabled", + .type = TEAM_OPTION_TYPE_BOOL, + .per_port = true, + .getter = team_port_en_option_get, + .setter = team_port_en_option_set, + }, + { + .name = "user_linkup", + .type = TEAM_OPTION_TYPE_BOOL, + .per_port = true, + .getter = team_user_linkup_option_get, + .setter = team_user_linkup_option_set, + }, + { + .name = "user_linkup_enabled", + .type = TEAM_OPTION_TYPE_BOOL, + .per_port = true, + .getter = team_user_linkup_en_option_get, + .setter = team_user_linkup_en_option_set, + }, + { + .name = "priority", + .type = TEAM_OPTION_TYPE_S32, + .per_port = true, + .getter = team_priority_option_get, + .setter = team_priority_option_set, + }, + { + .name = "queue_id", + .type = TEAM_OPTION_TYPE_U32, + .per_port = true, + .getter = team_queue_id_option_get, + .setter = team_queue_id_option_set, + }, +}; + +static struct lock_class_key team_netdev_xmit_lock_key; +static struct lock_class_key team_netdev_addr_lock_key; +static struct lock_class_key team_tx_busylock_key; + +static void team_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *unused) +{ + lockdep_set_class(&txq->_xmit_lock, &team_netdev_xmit_lock_key); +} + +static void team_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL); + dev->qdisc_tx_busylock = &team_tx_busylock_key; +} + +static int team_init(struct net_device *dev) +{ + struct team *team = netdev_priv(dev); + int i; + int err; + + team->dev = dev; + mutex_init(&team->lock); + team_set_no_mode(team); + + team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats); + if (!team->pcpu_stats) + return -ENOMEM; + + for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) + INIT_HLIST_HEAD(&team->en_port_hlist[i]); + INIT_LIST_HEAD(&team->port_list); + err = team_queue_override_init(team); + if (err) + goto err_team_queue_override_init; + + team_adjust_ops(team); + + INIT_LIST_HEAD(&team->option_list); + INIT_LIST_HEAD(&team->option_inst_list); + + team_notify_peers_init(team); + team_mcast_rejoin_init(team); + + err = team_options_register(team, team_options, ARRAY_SIZE(team_options)); + if (err) + goto err_options_register; + netif_carrier_off(dev); + + team_set_lockdep_class(dev); + + return 0; + +err_options_register: + team_mcast_rejoin_fini(team); + team_notify_peers_fini(team); + team_queue_override_fini(team); +err_team_queue_override_init: + free_percpu(team->pcpu_stats); + + return err; +} + +static void team_uninit(struct net_device *dev) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + struct team_port *tmp; + + mutex_lock(&team->lock); + list_for_each_entry_safe(port, tmp, &team->port_list, list) + team_port_del(team, port->dev); + + __team_change_mode(team, NULL); /* cleanup */ + __team_options_unregister(team, team_options, ARRAY_SIZE(team_options)); + team_mcast_rejoin_fini(team); + team_notify_peers_fini(team); + team_queue_override_fini(team); + mutex_unlock(&team->lock); +} + +static void team_destructor(struct net_device *dev) +{ + struct team *team = netdev_priv(dev); + + free_percpu(team->pcpu_stats); + free_netdev(dev); +} + +static int team_open(struct net_device *dev) +{ + return 0; +} + +static int team_close(struct net_device *dev) +{ + return 0; +} + +/* + * note: already called with rcu_read_lock + */ +static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct team *team = netdev_priv(dev); + bool tx_success; + unsigned int len = skb->len; + + tx_success = team_queue_override_transmit(team, skb); + if (!tx_success) + tx_success = team->ops.transmit(team, skb); + if (tx_success) { + struct team_pcpu_stats *pcpu_stats; + + pcpu_stats = this_cpu_ptr(team->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(team->pcpu_stats->tx_dropped); + } + + return NETDEV_TX_OK; +} + +static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + /* + * This helper function exists to help dev_pick_tx get the correct + * destination queue. Using a helper function skips a call to + * skb_tx_hash and will put the skbs in the queue we expect on their + * way down to the team driver. + */ + u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; + + /* + * Save the original txq to restore before passing to the driver + */ + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; + + if (unlikely(txq >= dev->real_num_tx_queues)) { + do { + txq -= dev->real_num_tx_queues; + } while (txq >= dev->real_num_tx_queues); + } + return txq; +} + +static void team_change_rx_flags(struct net_device *dev, int change) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + int inc; + + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { + if (change & IFF_PROMISC) { + inc = dev->flags & IFF_PROMISC ? 1 : -1; + dev_set_promiscuity(port->dev, inc); + } + if (change & IFF_ALLMULTI) { + inc = dev->flags & IFF_ALLMULTI ? 1 : -1; + dev_set_allmulti(port->dev, inc); + } + } + rcu_read_unlock(); +} + +static void team_set_rx_mode(struct net_device *dev) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { + dev_uc_sync_multiple(port->dev, dev); + dev_mc_sync_multiple(port->dev, dev); + } + rcu_read_unlock(); +} + +static int team_set_mac_address(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + struct team *team = netdev_priv(dev); + struct team_port *port; + + if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) + if (team->ops.port_change_dev_addr) + team->ops.port_change_dev_addr(team, port); + mutex_unlock(&team->lock); + return 0; +} + +static int team_change_mtu(struct net_device *dev, int new_mtu) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + int err; + + /* + * Alhough this is reader, it's guarded by team lock. It's not possible + * to traverse list in reverse under rcu_read_lock + */ + mutex_lock(&team->lock); + team->port_mtu_change_allowed = true; + list_for_each_entry(port, &team->port_list, list) { + err = dev_set_mtu(port->dev, new_mtu); + if (err) { + netdev_err(dev, "Device %s failed to change mtu", + port->dev->name); + goto unwind; + } + } + team->port_mtu_change_allowed = false; + mutex_unlock(&team->lock); + + dev->mtu = new_mtu; + + return 0; + +unwind: + list_for_each_entry_continue_reverse(port, &team->port_list, list) + dev_set_mtu(port->dev, dev->mtu); + team->port_mtu_change_allowed = false; + mutex_unlock(&team->lock); + + return err; +} + +static struct rtnl_link_stats64 * +team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct team *team = netdev_priv(dev); + struct team_pcpu_stats *p; + u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes; + u32 rx_dropped = 0, tx_dropped = 0; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + p = per_cpu_ptr(team->pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rx_packets = p->rx_packets; + rx_bytes = p->rx_bytes; + rx_multicast = p->rx_multicast; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->multicast += rx_multicast; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + /* + * rx_dropped & tx_dropped are u32, updated + * without syncp protection. + */ + rx_dropped += p->rx_dropped; + tx_dropped += p->tx_dropped; + } + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; + return stats; +} + +static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + int err; + + /* + * Alhough this is reader, it's guarded by team lock. It's not possible + * to traverse list in reverse under rcu_read_lock + */ + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) { + err = vlan_vid_add(port->dev, proto, vid); + if (err) + goto unwind; + } + mutex_unlock(&team->lock); + + return 0; + +unwind: + list_for_each_entry_continue_reverse(port, &team->port_list, list) + vlan_vid_del(port->dev, proto, vid); + mutex_unlock(&team->lock); + + return err; +} + +static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) + vlan_vid_del(port->dev, proto, vid); + rcu_read_unlock(); + + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void team_poll_controller(struct net_device *dev) +{ +} + +static void __team_netpoll_cleanup(struct team *team) +{ + struct team_port *port; + + list_for_each_entry(port, &team->port_list, list) + team_port_disable_netpoll(port); +} + +static void team_netpoll_cleanup(struct net_device *dev) +{ + struct team *team = netdev_priv(dev); + + mutex_lock(&team->lock); + __team_netpoll_cleanup(team); + mutex_unlock(&team->lock); +} + +static int team_netpoll_setup(struct net_device *dev, + struct netpoll_info *npifo) +{ + struct team *team = netdev_priv(dev); + struct team_port *port; + int err = 0; + + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) { + err = team_port_enable_netpoll(team, port); + if (err) { + __team_netpoll_cleanup(team); + break; + } + } + mutex_unlock(&team->lock); + return err; +} +#endif + +static int team_add_slave(struct net_device *dev, struct net_device *port_dev) +{ + struct team *team = netdev_priv(dev); + int err; + + mutex_lock(&team->lock); + err = team_port_add(team, port_dev); + mutex_unlock(&team->lock); + return err; +} + +static int team_del_slave(struct net_device *dev, struct net_device *port_dev) +{ + struct team *team = netdev_priv(dev); + int err; + + mutex_lock(&team->lock); + err = team_port_del(team, port_dev); + mutex_unlock(&team->lock); + return err; +} + +static netdev_features_t team_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct team_port *port; + struct team *team = netdev_priv(dev); + netdev_features_t mask; + + mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + features &= ~NETIF_F_ONE_FOR_ALL; + features |= NETIF_F_ALL_FOR_ALL; + + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { + features = netdev_increment_features(features, + port->dev->features, + mask); + } + rcu_read_unlock(); + + features = netdev_add_tso_features(features, mask); + + return features; +} + +static int team_change_carrier(struct net_device *dev, bool new_carrier) +{ + struct team *team = netdev_priv(dev); + + team->user_carrier_enabled = true; + + if (new_carrier) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + return 0; +} + +static const struct net_device_ops team_netdev_ops = { + .ndo_init = team_init, + .ndo_uninit = team_uninit, + .ndo_open = team_open, + .ndo_stop = team_close, + .ndo_start_xmit = team_xmit, + .ndo_select_queue = team_select_queue, + .ndo_change_rx_flags = team_change_rx_flags, + .ndo_set_rx_mode = team_set_rx_mode, + .ndo_set_mac_address = team_set_mac_address, + .ndo_change_mtu = team_change_mtu, + .ndo_get_stats64 = team_get_stats64, + .ndo_vlan_rx_add_vid = team_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = team_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = team_poll_controller, + .ndo_netpoll_setup = team_netpoll_setup, + .ndo_netpoll_cleanup = team_netpoll_cleanup, +#endif + .ndo_add_slave = team_add_slave, + .ndo_del_slave = team_del_slave, + .ndo_fix_features = team_fix_features, + .ndo_change_carrier = team_change_carrier, + .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, + .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_features_check = passthru_features_check, +}; + +/*********************** + * ethtool interface + ***********************/ + +static void team_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static const struct ethtool_ops team_ethtool_ops = { + .get_drvinfo = team_ethtool_get_drvinfo, + .get_link = ethtool_op_get_link, +}; + +/*********************** + * rt netlink interface + ***********************/ + +static void team_setup_by_port(struct net_device *dev, + struct net_device *port_dev) +{ + dev->header_ops = port_dev->header_ops; + dev->type = port_dev->type; + dev->hard_header_len = port_dev->hard_header_len; + dev->addr_len = port_dev->addr_len; + dev->mtu = port_dev->mtu; + memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len); + eth_hw_addr_inherit(dev, port_dev); +} + +static int team_dev_type_check_change(struct net_device *dev, + struct net_device *port_dev) +{ + struct team *team = netdev_priv(dev); + char *portname = port_dev->name; + int err; + + if (dev->type == port_dev->type) + return 0; + if (!list_empty(&team->port_list)) { + netdev_err(dev, "Device %s is of different type\n", portname); + return -EBUSY; + } + err = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev); + err = notifier_to_errno(err); + if (err) { + netdev_err(dev, "Refused to change device type\n"); + return err; + } + dev_uc_flush(dev); + dev_mc_flush(dev); + team_setup_by_port(dev, port_dev); + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); + return 0; +} + +static void team_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &team_netdev_ops; + dev->ethtool_ops = &team_ethtool_ops; + dev->destructor = team_destructor; + dev->tx_queue_len = 0; + dev->flags |= IFF_MULTICAST; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); + + /* + * Indicate we support unicast address filtering. That way core won't + * bring us to promisc mode in case a unicast addr is added. + * Let this up to underlay drivers. + */ + dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; + + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_GRO; + + /* Don't allow team devices to change network namespaces. */ + dev->features |= NETIF_F_NETNS_LOCAL; + + dev->hw_features = TEAM_VLAN_FEATURES | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; + + dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); + dev->features |= dev->hw_features; +} + +static int team_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS] == NULL) + eth_hw_addr_random(dev); + + return register_netdevice(dev); +} + +static int team_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + +static unsigned int team_get_num_tx_queues(void) +{ + return TEAM_DEFAULT_NUM_TX_QUEUES; +} + +static unsigned int team_get_num_rx_queues(void) +{ + return TEAM_DEFAULT_NUM_RX_QUEUES; +} + +static struct rtnl_link_ops team_link_ops __read_mostly = { + .kind = DRV_NAME, + .priv_size = sizeof(struct team), + .setup = team_setup, + .newlink = team_newlink, + .validate = team_validate, + .get_num_tx_queues = team_get_num_tx_queues, + .get_num_rx_queues = team_get_num_rx_queues, +}; + + +/*********************************** + * Generic netlink custom interface + ***********************************/ + +static struct genl_family team_nl_family = { + .id = GENL_ID_GENERATE, + .name = TEAM_GENL_NAME, + .version = TEAM_GENL_VERSION, + .maxattr = TEAM_ATTR_MAX, + .netnsok = true, +}; + +static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = { + [TEAM_ATTR_UNSPEC] = { .type = NLA_UNSPEC, }, + [TEAM_ATTR_TEAM_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_LIST_OPTION] = { .type = NLA_NESTED }, + [TEAM_ATTR_LIST_PORT] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { + [TEAM_ATTR_OPTION_UNSPEC] = { .type = NLA_UNSPEC, }, + [TEAM_ATTR_OPTION_NAME] = { + .type = NLA_STRING, + .len = TEAM_STRING_MAX_LEN, + }, + [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, + [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, + [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, +}; + +static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *hdr; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &team_nl_family, 0, TEAM_CMD_NOOP); + if (!hdr) { + err = -EMSGSIZE; + goto err_msg_put; + } + + genlmsg_end(msg, hdr); + + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); + +err_msg_put: + nlmsg_free(msg); + + return err; +} + +/* + * Netlink cmd functions should be locked by following two functions. + * Since dev gets held here, that ensures dev won't disappear in between. + */ +static struct team *team_nl_team_get(struct genl_info *info) +{ + struct net *net = genl_info_net(info); + int ifindex; + struct net_device *dev; + struct team *team; + + if (!info->attrs[TEAM_ATTR_TEAM_IFINDEX]) + return NULL; + + ifindex = nla_get_u32(info->attrs[TEAM_ATTR_TEAM_IFINDEX]); + dev = dev_get_by_index(net, ifindex); + if (!dev || dev->netdev_ops != &team_netdev_ops) { + if (dev) + dev_put(dev); + return NULL; + } + + team = netdev_priv(dev); + mutex_lock(&team->lock); + return team; +} + +static void team_nl_team_put(struct team *team) +{ + mutex_unlock(&team->lock); + dev_put(team->dev); +} + +typedef int team_nl_send_func_t(struct sk_buff *skb, + struct team *team, u32 portid); + +static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid) +{ + return genlmsg_unicast(dev_net(team->dev), skb, portid); +} + +static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team, + struct team_option_inst *opt_inst) +{ + struct nlattr *option_item; + struct team_option *option = opt_inst->option; + struct team_option_inst_info *opt_inst_info = &opt_inst->info; + struct team_gsetter_ctx ctx; + int err; + + ctx.info = opt_inst_info; + err = team_option_get(team, opt_inst, &ctx); + if (err) + return err; + + option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION); + if (!option_item) + return -EMSGSIZE; + + if (nla_put_string(skb, TEAM_ATTR_OPTION_NAME, option->name)) + goto nest_cancel; + if (opt_inst_info->port && + nla_put_u32(skb, TEAM_ATTR_OPTION_PORT_IFINDEX, + opt_inst_info->port->dev->ifindex)) + goto nest_cancel; + if (opt_inst->option->array_size && + nla_put_u32(skb, TEAM_ATTR_OPTION_ARRAY_INDEX, + opt_inst_info->array_index)) + goto nest_cancel; + + switch (option->type) { + case TEAM_OPTION_TYPE_U32: + if (nla_put_u8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32)) + goto nest_cancel; + if (nla_put_u32(skb, TEAM_ATTR_OPTION_DATA, ctx.data.u32_val)) + goto nest_cancel; + break; + case TEAM_OPTION_TYPE_STRING: + if (nla_put_u8(skb, TEAM_ATTR_OPTION_TYPE, NLA_STRING)) + goto nest_cancel; + if (nla_put_string(skb, TEAM_ATTR_OPTION_DATA, + ctx.data.str_val)) + goto nest_cancel; + break; + case TEAM_OPTION_TYPE_BINARY: + if (nla_put_u8(skb, TEAM_ATTR_OPTION_TYPE, NLA_BINARY)) + goto nest_cancel; + if (nla_put(skb, TEAM_ATTR_OPTION_DATA, ctx.data.bin_val.len, + ctx.data.bin_val.ptr)) + goto nest_cancel; + break; + case TEAM_OPTION_TYPE_BOOL: + if (nla_put_u8(skb, TEAM_ATTR_OPTION_TYPE, NLA_FLAG)) + goto nest_cancel; + if (ctx.data.bool_val && + nla_put_flag(skb, TEAM_ATTR_OPTION_DATA)) + goto nest_cancel; + break; + case TEAM_OPTION_TYPE_S32: + if (nla_put_u8(skb, TEAM_ATTR_OPTION_TYPE, NLA_S32)) + goto nest_cancel; + if (nla_put_s32(skb, TEAM_ATTR_OPTION_DATA, ctx.data.s32_val)) + goto nest_cancel; + break; + default: + BUG(); + } + if (opt_inst->removed && nla_put_flag(skb, TEAM_ATTR_OPTION_REMOVED)) + goto nest_cancel; + if (opt_inst->changed) { + if (nla_put_flag(skb, TEAM_ATTR_OPTION_CHANGED)) + goto nest_cancel; + opt_inst->changed = false; + } + nla_nest_end(skb, option_item); + return 0; + +nest_cancel: + nla_nest_cancel(skb, option_item); + return -EMSGSIZE; +} + +static int __send_and_alloc_skb(struct sk_buff **pskb, + struct team *team, u32 portid, + team_nl_send_func_t *send_func) +{ + int err; + + if (*pskb) { + err = send_func(*pskb, team, portid); + if (err) + return err; + } + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!*pskb) + return -ENOMEM; + return 0; +} + +static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq, + int flags, team_nl_send_func_t *send_func, + struct list_head *sel_opt_inst_list) +{ + struct nlattr *option_list; + struct nlmsghdr *nlh; + void *hdr; + struct team_option_inst *opt_inst; + int err; + struct sk_buff *skb = NULL; + bool incomplete; + int i; + + opt_inst = list_first_entry(sel_opt_inst_list, + struct team_option_inst, tmp_list); + +start_again: + err = __send_and_alloc_skb(&skb, team, portid, send_func); + if (err) + return err; + + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, + TEAM_CMD_OPTIONS_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) + goto nla_put_failure; + option_list = nla_nest_start(skb, TEAM_ATTR_LIST_OPTION); + if (!option_list) + goto nla_put_failure; + + i = 0; + incomplete = false; + list_for_each_entry_from(opt_inst, sel_opt_inst_list, tmp_list) { + err = team_nl_fill_one_option_get(skb, team, opt_inst); + if (err) { + if (err == -EMSGSIZE) { + if (!i) + goto errout; + incomplete = true; + break; + } + goto errout; + } + i++; + } + + nla_nest_end(skb, option_list); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, portid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = __send_and_alloc_skb(&skb, team, portid, send_func); + if (err) + goto errout; + goto send_done; + } + + return send_func(skb, team, portid); + +nla_put_failure: + err = -EMSGSIZE; +errout: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) +{ + struct team *team; + struct team_option_inst *opt_inst; + int err; + LIST_HEAD(sel_opt_inst_list); + + team = team_nl_team_get(info); + if (!team) + return -EINVAL; + + list_for_each_entry(opt_inst, &team->option_inst_list, list) + list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list); + err = team_nl_send_options_get(team, info->snd_portid, info->snd_seq, + NLM_F_ACK, team_nl_send_unicast, + &sel_opt_inst_list); + + team_nl_team_put(team); + + return err; +} + +static int team_nl_send_event_options_get(struct team *team, + struct list_head *sel_opt_inst_list); + +static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) +{ + struct team *team; + int err = 0; + int i; + struct nlattr *nl_option; + LIST_HEAD(opt_inst_list); + + team = team_nl_team_get(info); + if (!team) + return -EINVAL; + + err = -EINVAL; + if (!info->attrs[TEAM_ATTR_LIST_OPTION]) { + err = -EINVAL; + goto team_put; + } + + nla_for_each_nested(nl_option, info->attrs[TEAM_ATTR_LIST_OPTION], i) { + struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1]; + struct nlattr *attr; + struct nlattr *attr_data; + enum team_option_type opt_type; + int opt_port_ifindex = 0; /* != 0 for per-port options */ + u32 opt_array_index = 0; + bool opt_is_array = false; + struct team_option_inst *opt_inst; + char *opt_name; + bool opt_found = false; + + if (nla_type(nl_option) != TEAM_ATTR_ITEM_OPTION) { + err = -EINVAL; + goto team_put; + } + err = nla_parse_nested(opt_attrs, TEAM_ATTR_OPTION_MAX, + nl_option, team_nl_option_policy); + if (err) + goto team_put; + if (!opt_attrs[TEAM_ATTR_OPTION_NAME] || + !opt_attrs[TEAM_ATTR_OPTION_TYPE]) { + err = -EINVAL; + goto team_put; + } + switch (nla_get_u8(opt_attrs[TEAM_ATTR_OPTION_TYPE])) { + case NLA_U32: + opt_type = TEAM_OPTION_TYPE_U32; + break; + case NLA_STRING: + opt_type = TEAM_OPTION_TYPE_STRING; + break; + case NLA_BINARY: + opt_type = TEAM_OPTION_TYPE_BINARY; + break; + case NLA_FLAG: + opt_type = TEAM_OPTION_TYPE_BOOL; + break; + case NLA_S32: + opt_type = TEAM_OPTION_TYPE_S32; + break; + default: + goto team_put; + } + + attr_data = opt_attrs[TEAM_ATTR_OPTION_DATA]; + if (opt_type != TEAM_OPTION_TYPE_BOOL && !attr_data) { + err = -EINVAL; + goto team_put; + } + + opt_name = nla_data(opt_attrs[TEAM_ATTR_OPTION_NAME]); + attr = opt_attrs[TEAM_ATTR_OPTION_PORT_IFINDEX]; + if (attr) + opt_port_ifindex = nla_get_u32(attr); + + attr = opt_attrs[TEAM_ATTR_OPTION_ARRAY_INDEX]; + if (attr) { + opt_is_array = true; + opt_array_index = nla_get_u32(attr); + } + + list_for_each_entry(opt_inst, &team->option_inst_list, list) { + struct team_option *option = opt_inst->option; + struct team_gsetter_ctx ctx; + struct team_option_inst_info *opt_inst_info; + int tmp_ifindex; + + opt_inst_info = &opt_inst->info; + tmp_ifindex = opt_inst_info->port ? + opt_inst_info->port->dev->ifindex : 0; + if (option->type != opt_type || + strcmp(option->name, opt_name) || + tmp_ifindex != opt_port_ifindex || + (option->array_size && !opt_is_array) || + opt_inst_info->array_index != opt_array_index) + continue; + opt_found = true; + ctx.info = opt_inst_info; + switch (opt_type) { + case TEAM_OPTION_TYPE_U32: + ctx.data.u32_val = nla_get_u32(attr_data); + break; + case TEAM_OPTION_TYPE_STRING: + if (nla_len(attr_data) > TEAM_STRING_MAX_LEN) { + err = -EINVAL; + goto team_put; + } + ctx.data.str_val = nla_data(attr_data); + break; + case TEAM_OPTION_TYPE_BINARY: + ctx.data.bin_val.len = nla_len(attr_data); + ctx.data.bin_val.ptr = nla_data(attr_data); + break; + case TEAM_OPTION_TYPE_BOOL: + ctx.data.bool_val = attr_data ? true : false; + break; + case TEAM_OPTION_TYPE_S32: + ctx.data.s32_val = nla_get_s32(attr_data); + break; + default: + BUG(); + } + err = team_option_set(team, opt_inst, &ctx); + if (err) + goto team_put; + opt_inst->changed = true; + list_add(&opt_inst->tmp_list, &opt_inst_list); + } + if (!opt_found) { + err = -ENOENT; + goto team_put; + } + } + + err = team_nl_send_event_options_get(team, &opt_inst_list); + +team_put: + team_nl_team_put(team); + + return err; +} + +static int team_nl_fill_one_port_get(struct sk_buff *skb, + struct team_port *port) +{ + struct nlattr *port_item; + + port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT); + if (!port_item) + goto nest_cancel; + if (nla_put_u32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex)) + goto nest_cancel; + if (port->changed) { + if (nla_put_flag(skb, TEAM_ATTR_PORT_CHANGED)) + goto nest_cancel; + port->changed = false; + } + if ((port->removed && + nla_put_flag(skb, TEAM_ATTR_PORT_REMOVED)) || + (port->state.linkup && + nla_put_flag(skb, TEAM_ATTR_PORT_LINKUP)) || + nla_put_u32(skb, TEAM_ATTR_PORT_SPEED, port->state.speed) || + nla_put_u8(skb, TEAM_ATTR_PORT_DUPLEX, port->state.duplex)) + goto nest_cancel; + nla_nest_end(skb, port_item); + return 0; + +nest_cancel: + nla_nest_cancel(skb, port_item); + return -EMSGSIZE; +} + +static int team_nl_send_port_list_get(struct team *team, u32 portid, u32 seq, + int flags, team_nl_send_func_t *send_func, + struct team_port *one_port) +{ + struct nlattr *port_list; + struct nlmsghdr *nlh; + void *hdr; + struct team_port *port; + int err; + struct sk_buff *skb = NULL; + bool incomplete; + int i; + + port = list_first_entry_or_null(&team->port_list, + struct team_port, list); + +start_again: + err = __send_and_alloc_skb(&skb, team, portid, send_func); + if (err) + return err; + + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, + TEAM_CMD_PORT_LIST_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) + goto nla_put_failure; + port_list = nla_nest_start(skb, TEAM_ATTR_LIST_PORT); + if (!port_list) + goto nla_put_failure; + + i = 0; + incomplete = false; + + /* If one port is selected, called wants to send port list containing + * only this port. Otherwise go through all listed ports and send all + */ + if (one_port) { + err = team_nl_fill_one_port_get(skb, one_port); + if (err) + goto errout; + } else if (port) { + list_for_each_entry_from(port, &team->port_list, list) { + err = team_nl_fill_one_port_get(skb, port); + if (err) { + if (err == -EMSGSIZE) { + if (!i) + goto errout; + incomplete = true; + break; + } + goto errout; + } + i++; + } + } + + nla_nest_end(skb, port_list); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, portid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = __send_and_alloc_skb(&skb, team, portid, send_func); + if (err) + goto errout; + goto send_done; + } + + return send_func(skb, team, portid); + +nla_put_failure: + err = -EMSGSIZE; +errout: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int team_nl_cmd_port_list_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct team *team; + int err; + + team = team_nl_team_get(info); + if (!team) + return -EINVAL; + + err = team_nl_send_port_list_get(team, info->snd_portid, info->snd_seq, + NLM_F_ACK, team_nl_send_unicast, NULL); + + team_nl_team_put(team); + + return err; +} + +static const struct genl_ops team_nl_ops[] = { + { + .cmd = TEAM_CMD_NOOP, + .doit = team_nl_cmd_noop, + .policy = team_nl_policy, + }, + { + .cmd = TEAM_CMD_OPTIONS_SET, + .doit = team_nl_cmd_options_set, + .policy = team_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TEAM_CMD_OPTIONS_GET, + .doit = team_nl_cmd_options_get, + .policy = team_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TEAM_CMD_PORT_LIST_GET, + .doit = team_nl_cmd_port_list_get, + .policy = team_nl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static const struct genl_multicast_group team_nl_mcgrps[] = { + { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }, +}; + +static int team_nl_send_multicast(struct sk_buff *skb, + struct team *team, u32 portid) +{ + return genlmsg_multicast_netns(&team_nl_family, dev_net(team->dev), + skb, 0, 0, GFP_KERNEL); +} + +static int team_nl_send_event_options_get(struct team *team, + struct list_head *sel_opt_inst_list) +{ + return team_nl_send_options_get(team, 0, 0, 0, team_nl_send_multicast, + sel_opt_inst_list); +} + +static int team_nl_send_event_port_get(struct team *team, + struct team_port *port) +{ + return team_nl_send_port_list_get(team, 0, 0, 0, team_nl_send_multicast, + port); +} + +static int team_nl_init(void) +{ + return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops, + team_nl_mcgrps); +} + +static void team_nl_fini(void) +{ + genl_unregister_family(&team_nl_family); +} + + +/****************** + * Change checkers + ******************/ + +static void __team_options_change_check(struct team *team) +{ + int err; + struct team_option_inst *opt_inst; + LIST_HEAD(sel_opt_inst_list); + + list_for_each_entry(opt_inst, &team->option_inst_list, list) { + if (opt_inst->changed) + list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list); + } + err = team_nl_send_event_options_get(team, &sel_opt_inst_list); + if (err && err != -ESRCH) + netdev_warn(team->dev, "Failed to send options change via netlink (err %d)\n", + err); +} + +/* rtnl lock is held */ + +static void __team_port_change_send(struct team_port *port, bool linkup) +{ + int err; + + port->changed = true; + port->state.linkup = linkup; + team_refresh_port_linkup(port); + if (linkup) { + struct ethtool_cmd ecmd; + + err = __ethtool_get_settings(port->dev, &ecmd); + if (!err) { + port->state.speed = ethtool_cmd_speed(&ecmd); + port->state.duplex = ecmd.duplex; + goto send_event; + } + } + port->state.speed = 0; + port->state.duplex = 0; + +send_event: + err = team_nl_send_event_port_get(port->team, port); + if (err && err != -ESRCH) + netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink (err %d)\n", + port->dev->name, err); + +} + +static void __team_carrier_check(struct team *team) +{ + struct team_port *port; + bool team_linkup; + + if (team->user_carrier_enabled) + return; + + team_linkup = false; + list_for_each_entry(port, &team->port_list, list) { + if (port->linkup) { + team_linkup = true; + break; + } + } + + if (team_linkup) + netif_carrier_on(team->dev); + else + netif_carrier_off(team->dev); +} + +static void __team_port_change_check(struct team_port *port, bool linkup) +{ + if (port->state.linkup != linkup) + __team_port_change_send(port, linkup); + __team_carrier_check(port->team); +} + +static void __team_port_change_port_added(struct team_port *port, bool linkup) +{ + __team_port_change_send(port, linkup); + __team_carrier_check(port->team); +} + +static void __team_port_change_port_removed(struct team_port *port) +{ + port->removed = true; + __team_port_change_send(port, false); + __team_carrier_check(port->team); +} + +static void team_port_change_check(struct team_port *port, bool linkup) +{ + struct team *team = port->team; + + mutex_lock(&team->lock); + __team_port_change_check(port, linkup); + mutex_unlock(&team->lock); +} + + +/************************************ + * Net device notifier event handler + ************************************/ + +static int team_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct team_port *port; + + port = team_port_get_rtnl(dev); + if (!port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + if (netif_carrier_ok(dev)) + team_port_change_check(port, true); + break; + case NETDEV_DOWN: + team_port_change_check(port, false); + break; + case NETDEV_CHANGE: + if (netif_running(port->dev)) + team_port_change_check(port, + !!netif_carrier_ok(port->dev)); + break; + case NETDEV_UNREGISTER: + team_del_slave(port->team->dev, dev); + break; + case NETDEV_FEAT_CHANGE: + team_compute_features(port->team); + break; + case NETDEV_PRECHANGEMTU: + /* Forbid to change mtu of underlaying device */ + if (!port->team->port_mtu_change_allowed) + return NOTIFY_BAD; + break; + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid to change type of underlaying device */ + return NOTIFY_BAD; + case NETDEV_RESEND_IGMP: + /* Propagate to master device */ + call_netdevice_notifiers(event, port->team->dev); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block team_notifier_block __read_mostly = { + .notifier_call = team_device_event, +}; + + +/*********************** + * Module init and exit + ***********************/ + +static int __init team_module_init(void) +{ + int err; + + register_netdevice_notifier(&team_notifier_block); + + err = rtnl_link_register(&team_link_ops); + if (err) + goto err_rtnl_reg; + + err = team_nl_init(); + if (err) + goto err_nl_init; + + return 0; + +err_nl_init: + rtnl_link_unregister(&team_link_ops); + +err_rtnl_reg: + unregister_netdevice_notifier(&team_notifier_block); + + return err; +} + +static void __exit team_module_exit(void) +{ + team_nl_fini(); + rtnl_link_unregister(&team_link_ops); + unregister_netdevice_notifier(&team_notifier_block); +} + +module_init(team_module_init); +module_exit(team_module_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); +MODULE_DESCRIPTION("Ethernet team device driver"); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff --git a/kernel/drivers/net/team/team_mode_activebackup.c b/kernel/drivers/net/team/team_mode_activebackup.c new file mode 100644 index 000000000..40fd3381b --- /dev/null +++ b/kernel/drivers/net/team/team_mode_activebackup.c @@ -0,0 +1,148 @@ +/* + * drivers/net/team/team_mode_activebackup.c - Active-backup mode for team + * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <net/rtnetlink.h> +#include <linux/if_team.h> + +struct ab_priv { + struct team_port __rcu *active_port; + struct team_option_inst_info *ap_opt_inst_info; +}; + +static struct ab_priv *ab_priv(struct team *team) +{ + return (struct ab_priv *) &team->mode_priv; +} + +static rx_handler_result_t ab_receive(struct team *team, struct team_port *port, + struct sk_buff *skb) { + struct team_port *active_port; + + active_port = rcu_dereference(ab_priv(team)->active_port); + if (active_port != port) + return RX_HANDLER_EXACT; + return RX_HANDLER_ANOTHER; +} + +static bool ab_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *active_port; + + active_port = rcu_dereference_bh(ab_priv(team)->active_port); + if (unlikely(!active_port)) + goto drop; + if (team_dev_queue_xmit(team, active_port, skb)) + return false; + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static void ab_port_leave(struct team *team, struct team_port *port) +{ + if (ab_priv(team)->active_port == port) { + RCU_INIT_POINTER(ab_priv(team)->active_port, NULL); + team_option_inst_set_change(ab_priv(team)->ap_opt_inst_info); + } +} + +static int ab_active_port_init(struct team *team, + struct team_option_inst_info *info) +{ + ab_priv(team)->ap_opt_inst_info = info; + return 0; +} + +static int ab_active_port_get(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct team_port *active_port; + + active_port = rcu_dereference_protected(ab_priv(team)->active_port, + lockdep_is_held(&team->lock)); + if (active_port) + ctx->data.u32_val = active_port->dev->ifindex; + else + ctx->data.u32_val = 0; + return 0; +} + +static int ab_active_port_set(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct team_port *port; + + list_for_each_entry(port, &team->port_list, list) { + if (port->dev->ifindex == ctx->data.u32_val) { + rcu_assign_pointer(ab_priv(team)->active_port, port); + return 0; + } + } + return -ENOENT; +} + +static const struct team_option ab_options[] = { + { + .name = "activeport", + .type = TEAM_OPTION_TYPE_U32, + .init = ab_active_port_init, + .getter = ab_active_port_get, + .setter = ab_active_port_set, + }, +}; + +static int ab_init(struct team *team) +{ + return team_options_register(team, ab_options, ARRAY_SIZE(ab_options)); +} + +static void ab_exit(struct team *team) +{ + team_options_unregister(team, ab_options, ARRAY_SIZE(ab_options)); +} + +static const struct team_mode_ops ab_mode_ops = { + .init = ab_init, + .exit = ab_exit, + .receive = ab_receive, + .transmit = ab_transmit, + .port_leave = ab_port_leave, +}; + +static const struct team_mode ab_mode = { + .kind = "activebackup", + .owner = THIS_MODULE, + .priv_size = sizeof(struct ab_priv), + .ops = &ab_mode_ops, +}; + +static int __init ab_init_module(void) +{ + return team_mode_register(&ab_mode); +} + +static void __exit ab_cleanup_module(void) +{ + team_mode_unregister(&ab_mode); +} + +module_init(ab_init_module); +module_exit(ab_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); +MODULE_DESCRIPTION("Active-backup mode for team"); +MODULE_ALIAS("team-mode-activebackup"); diff --git a/kernel/drivers/net/team/team_mode_broadcast.c b/kernel/drivers/net/team/team_mode_broadcast.c new file mode 100644 index 000000000..c366cd299 --- /dev/null +++ b/kernel/drivers/net/team/team_mode_broadcast.c @@ -0,0 +1,77 @@ +/* + * drivers/net/team/team_mode_broadcast.c - Broadcast mode for team + * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/if_team.h> + +static bool bc_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *cur; + struct team_port *last = NULL; + struct sk_buff *skb2; + bool ret; + bool sum_ret = false; + + list_for_each_entry_rcu(cur, &team->port_list, list) { + if (team_port_txable(cur)) { + if (last) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + ret = !team_dev_queue_xmit(team, last, + skb2); + if (!sum_ret) + sum_ret = ret; + } + } + last = cur; + } + } + if (last) { + ret = !team_dev_queue_xmit(team, last, skb); + if (!sum_ret) + sum_ret = ret; + } + return sum_ret; +} + +static const struct team_mode_ops bc_mode_ops = { + .transmit = bc_transmit, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, +}; + +static const struct team_mode bc_mode = { + .kind = "broadcast", + .owner = THIS_MODULE, + .ops = &bc_mode_ops, +}; + +static int __init bc_init_module(void) +{ + return team_mode_register(&bc_mode); +} + +static void __exit bc_cleanup_module(void) +{ + team_mode_unregister(&bc_mode); +} + +module_init(bc_init_module); +module_exit(bc_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); +MODULE_DESCRIPTION("Broadcast mode for team"); +MODULE_ALIAS("team-mode-broadcast"); diff --git a/kernel/drivers/net/team/team_mode_loadbalance.c b/kernel/drivers/net/team/team_mode_loadbalance.c new file mode 100644 index 000000000..a1536d0d8 --- /dev/null +++ b/kernel/drivers/net/team/team_mode_loadbalance.c @@ -0,0 +1,682 @@ +/* + * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team + * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/filter.h> +#include <linux/if_team.h> + +struct lb_priv; + +typedef struct team_port *lb_select_tx_port_func_t(struct team *, + struct lb_priv *, + struct sk_buff *, + unsigned char); + +#define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */ + +struct lb_stats { + u64 tx_bytes; +}; + +struct lb_pcpu_stats { + struct lb_stats hash_stats[LB_TX_HASHTABLE_SIZE]; + struct u64_stats_sync syncp; +}; + +struct lb_stats_info { + struct lb_stats stats; + struct lb_stats last_stats; + struct team_option_inst_info *opt_inst_info; +}; + +struct lb_port_mapping { + struct team_port __rcu *port; + struct team_option_inst_info *opt_inst_info; +}; + +struct lb_priv_ex { + struct team *team; + struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; + struct sock_fprog_kern *orig_fprog; + struct { + unsigned int refresh_interval; /* in tenths of second */ + struct delayed_work refresh_dw; + struct lb_stats_info info[LB_TX_HASHTABLE_SIZE]; + } stats; +}; + +struct lb_priv { + struct bpf_prog __rcu *fp; + lb_select_tx_port_func_t __rcu *select_tx_port_func; + struct lb_pcpu_stats __percpu *pcpu_stats; + struct lb_priv_ex *ex; /* priv extension */ +}; + +static struct lb_priv *get_lb_priv(struct team *team) +{ + return (struct lb_priv *) &team->mode_priv; +} + +struct lb_port_priv { + struct lb_stats __percpu *pcpu_stats; + struct lb_stats_info stats_info; +}; + +static struct lb_port_priv *get_lb_port_priv(struct team_port *port) +{ + return (struct lb_port_priv *) &port->mode_priv; +} + +#define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \ + (lb_priv)->ex->tx_hash_to_port_mapping[hash].port + +#define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \ + (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info + +static void lb_tx_hash_to_port_mapping_null_port(struct team *team, + struct team_port *port) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + bool changed = false; + int i; + + for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) { + struct lb_port_mapping *pm; + + pm = &lb_priv->ex->tx_hash_to_port_mapping[i]; + if (rcu_access_pointer(pm->port) == port) { + RCU_INIT_POINTER(pm->port, NULL); + team_option_inst_set_change(pm->opt_inst_info); + changed = true; + } + } + if (changed) + team_options_change_check(team); +} + +/* Basic tx selection based solely by hash */ +static struct team_port *lb_hash_select_tx_port(struct team *team, + struct lb_priv *lb_priv, + struct sk_buff *skb, + unsigned char hash) +{ + int port_index = team_num_to_port_index(team, hash); + + return team_get_port_by_index_rcu(team, port_index); +} + +/* Hash to port mapping select tx port */ +static struct team_port *lb_htpm_select_tx_port(struct team *team, + struct lb_priv *lb_priv, + struct sk_buff *skb, + unsigned char hash) +{ + return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); +} + +struct lb_select_tx_port { + char *name; + lb_select_tx_port_func_t *func; +}; + +static const struct lb_select_tx_port lb_select_tx_port_list[] = { + { + .name = "hash", + .func = lb_hash_select_tx_port, + }, + { + .name = "hash_to_port_mapping", + .func = lb_htpm_select_tx_port, + }, +}; +#define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list) + +static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func) +{ + int i; + + for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { + const struct lb_select_tx_port *item; + + item = &lb_select_tx_port_list[i]; + if (item->func == func) + return item->name; + } + return NULL; +} + +static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) +{ + int i; + + for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { + const struct lb_select_tx_port *item; + + item = &lb_select_tx_port_list[i]; + if (!strcmp(item->name, name)) + return item->func; + } + return NULL; +} + +static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, + struct sk_buff *skb) +{ + struct bpf_prog *fp; + uint32_t lhash; + unsigned char *c; + + fp = rcu_dereference_bh(lb_priv->fp); + if (unlikely(!fp)) + return 0; + lhash = BPF_PROG_RUN(fp, skb); + c = (char *) &lhash; + return c[0] ^ c[1] ^ c[2] ^ c[3]; +} + +static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv, + struct lb_port_priv *lb_port_priv, + unsigned char hash) +{ + struct lb_pcpu_stats *pcpu_stats; + struct lb_stats *port_stats; + struct lb_stats *hash_stats; + + pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats); + port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats); + hash_stats = &pcpu_stats->hash_stats[hash]; + u64_stats_update_begin(&pcpu_stats->syncp); + port_stats->tx_bytes += tx_bytes; + hash_stats->tx_bytes += tx_bytes; + u64_stats_update_end(&pcpu_stats->syncp); +} + +static bool lb_transmit(struct team *team, struct sk_buff *skb) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + lb_select_tx_port_func_t *select_tx_port_func; + struct team_port *port; + unsigned char hash; + unsigned int tx_bytes = skb->len; + + hash = lb_get_skb_hash(lb_priv, skb); + select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func); + port = select_tx_port_func(team, lb_priv, skb, hash); + if (unlikely(!port)) + goto drop; + if (team_dev_queue_xmit(team, port, skb)) + return false; + lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash); + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + + if (!lb_priv->ex->orig_fprog) { + ctx->data.bin_val.len = 0; + ctx->data.bin_val.ptr = NULL; + return 0; + } + ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len * + sizeof(struct sock_filter); + ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter; + return 0; +} + +static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len, + const void *data) +{ + struct sock_fprog_kern *fprog; + struct sock_filter *filter = (struct sock_filter *) data; + + if (data_len % sizeof(struct sock_filter)) + return -EINVAL; + fprog = kmalloc(sizeof(*fprog), GFP_KERNEL); + if (!fprog) + return -ENOMEM; + fprog->filter = kmemdup(filter, data_len, GFP_KERNEL); + if (!fprog->filter) { + kfree(fprog); + return -ENOMEM; + } + fprog->len = data_len / sizeof(struct sock_filter); + *pfprog = fprog; + return 0; +} + +static void __fprog_destroy(struct sock_fprog_kern *fprog) +{ + kfree(fprog->filter); + kfree(fprog); +} + +static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + struct bpf_prog *fp = NULL; + struct bpf_prog *orig_fp = NULL; + struct sock_fprog_kern *fprog = NULL; + int err; + + if (ctx->data.bin_val.len) { + err = __fprog_create(&fprog, ctx->data.bin_val.len, + ctx->data.bin_val.ptr); + if (err) + return err; + err = bpf_prog_create(&fp, fprog); + if (err) { + __fprog_destroy(fprog); + return err; + } + } + + if (lb_priv->ex->orig_fprog) { + /* Clear old filter data */ + __fprog_destroy(lb_priv->ex->orig_fprog); + orig_fp = rcu_dereference_protected(lb_priv->fp, + lockdep_is_held(&team->lock)); + } + + rcu_assign_pointer(lb_priv->fp, fp); + lb_priv->ex->orig_fprog = fprog; + + if (orig_fp) { + synchronize_rcu(); + bpf_prog_destroy(orig_fp); + } + return 0; +} + +static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + lb_select_tx_port_func_t *func; + char *name; + + func = rcu_dereference_protected(lb_priv->select_tx_port_func, + lockdep_is_held(&team->lock)); + name = lb_select_tx_port_get_name(func); + BUG_ON(!name); + ctx->data.str_val = name; + return 0; +} + +static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + lb_select_tx_port_func_t *func; + + func = lb_select_tx_port_get_func(ctx->data.str_val); + if (!func) + return -EINVAL; + rcu_assign_pointer(lb_priv->select_tx_port_func, func); + return 0; +} + +static int lb_tx_hash_to_port_mapping_init(struct team *team, + struct team_option_inst_info *info) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + unsigned char hash = info->array_index; + + LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info; + return 0; +} + +static int lb_tx_hash_to_port_mapping_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + struct team_port *port; + unsigned char hash = ctx->info->array_index; + + port = LB_HTPM_PORT_BY_HASH(lb_priv, hash); + ctx->data.u32_val = port ? port->dev->ifindex : 0; + return 0; +} + +static int lb_tx_hash_to_port_mapping_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + struct team_port *port; + unsigned char hash = ctx->info->array_index; + + list_for_each_entry(port, &team->port_list, list) { + if (ctx->data.u32_val == port->dev->ifindex && + team_port_enabled(port)) { + rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash), + port); + return 0; + } + } + return -ENODEV; +} + +static int lb_hash_stats_init(struct team *team, + struct team_option_inst_info *info) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + unsigned char hash = info->array_index; + + lb_priv->ex->stats.info[hash].opt_inst_info = info; + return 0; +} + +static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + unsigned char hash = ctx->info->array_index; + + ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats; + ctx->data.bin_val.len = sizeof(struct lb_stats); + return 0; +} + +static int lb_port_stats_init(struct team *team, + struct team_option_inst_info *info) +{ + struct team_port *port = info->port; + struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + + lb_port_priv->stats_info.opt_inst_info = info; + return 0; +} + +static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + + ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats; + ctx->data.bin_val.len = sizeof(struct lb_stats); + return 0; +} + +static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info) +{ + memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats)); + memset(&s_info->stats, 0, sizeof(struct lb_stats)); +} + +static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info, + struct team *team) +{ + if (memcmp(&s_info->last_stats, &s_info->stats, + sizeof(struct lb_stats))) { + team_option_inst_set_change(s_info->opt_inst_info); + return true; + } + return false; +} + +static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, + struct lb_stats *cpu_stats, + struct u64_stats_sync *syncp) +{ + unsigned int start; + struct lb_stats tmp; + + do { + start = u64_stats_fetch_begin_irq(syncp); + tmp.tx_bytes = cpu_stats->tx_bytes; + } while (u64_stats_fetch_retry_irq(syncp, start)); + acc_stats->tx_bytes += tmp.tx_bytes; +} + +static void lb_stats_refresh(struct work_struct *work) +{ + struct team *team; + struct lb_priv *lb_priv; + struct lb_priv_ex *lb_priv_ex; + struct lb_pcpu_stats *pcpu_stats; + struct lb_stats *stats; + struct lb_stats_info *s_info; + struct team_port *port; + bool changed = false; + int i; + int j; + + lb_priv_ex = container_of(work, struct lb_priv_ex, + stats.refresh_dw.work); + + team = lb_priv_ex->team; + lb_priv = get_lb_priv(team); + + if (!mutex_trylock(&team->lock)) { + schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); + return; + } + + for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) { + s_info = &lb_priv->ex->stats.info[j]; + __lb_stats_info_refresh_prepare(s_info); + for_each_possible_cpu(i) { + pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + stats = &pcpu_stats->hash_stats[j]; + __lb_one_cpu_stats_add(&s_info->stats, stats, + &pcpu_stats->syncp); + } + changed |= __lb_stats_info_refresh_check(s_info, team); + } + + list_for_each_entry(port, &team->port_list, list) { + struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + + s_info = &lb_port_priv->stats_info; + __lb_stats_info_refresh_prepare(s_info); + for_each_possible_cpu(i) { + pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i); + __lb_one_cpu_stats_add(&s_info->stats, stats, + &pcpu_stats->syncp); + } + changed |= __lb_stats_info_refresh_check(s_info, team); + } + + if (changed) + team_options_change_check(team); + + schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, + (lb_priv_ex->stats.refresh_interval * HZ) / 10); + + mutex_unlock(&team->lock); +} + +static int lb_stats_refresh_interval_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + + ctx->data.u32_val = lb_priv->ex->stats.refresh_interval; + return 0; +} + +static int lb_stats_refresh_interval_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + unsigned int interval; + + interval = ctx->data.u32_val; + if (lb_priv->ex->stats.refresh_interval == interval) + return 0; + lb_priv->ex->stats.refresh_interval = interval; + if (interval) + schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0); + else + cancel_delayed_work(&lb_priv->ex->stats.refresh_dw); + return 0; +} + +static const struct team_option lb_options[] = { + { + .name = "bpf_hash_func", + .type = TEAM_OPTION_TYPE_BINARY, + .getter = lb_bpf_func_get, + .setter = lb_bpf_func_set, + }, + { + .name = "lb_tx_method", + .type = TEAM_OPTION_TYPE_STRING, + .getter = lb_tx_method_get, + .setter = lb_tx_method_set, + }, + { + .name = "lb_tx_hash_to_port_mapping", + .array_size = LB_TX_HASHTABLE_SIZE, + .type = TEAM_OPTION_TYPE_U32, + .init = lb_tx_hash_to_port_mapping_init, + .getter = lb_tx_hash_to_port_mapping_get, + .setter = lb_tx_hash_to_port_mapping_set, + }, + { + .name = "lb_hash_stats", + .array_size = LB_TX_HASHTABLE_SIZE, + .type = TEAM_OPTION_TYPE_BINARY, + .init = lb_hash_stats_init, + .getter = lb_hash_stats_get, + }, + { + .name = "lb_port_stats", + .per_port = true, + .type = TEAM_OPTION_TYPE_BINARY, + .init = lb_port_stats_init, + .getter = lb_port_stats_get, + }, + { + .name = "lb_stats_refresh_interval", + .type = TEAM_OPTION_TYPE_U32, + .getter = lb_stats_refresh_interval_get, + .setter = lb_stats_refresh_interval_set, + }, +}; + +static int lb_init(struct team *team) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + lb_select_tx_port_func_t *func; + int i, err; + + /* set default tx port selector */ + func = lb_select_tx_port_get_func("hash"); + BUG_ON(!func); + rcu_assign_pointer(lb_priv->select_tx_port_func, func); + + lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL); + if (!lb_priv->ex) + return -ENOMEM; + lb_priv->ex->team = team; + + lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats); + if (!lb_priv->pcpu_stats) { + err = -ENOMEM; + goto err_alloc_pcpu_stats; + } + + for_each_possible_cpu(i) { + struct lb_pcpu_stats *team_lb_stats; + team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + u64_stats_init(&team_lb_stats->syncp); + } + + + INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); + + err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); + if (err) + goto err_options_register; + return 0; + +err_options_register: + free_percpu(lb_priv->pcpu_stats); +err_alloc_pcpu_stats: + kfree(lb_priv->ex); + return err; +} + +static void lb_exit(struct team *team) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + + team_options_unregister(team, lb_options, + ARRAY_SIZE(lb_options)); + cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); + free_percpu(lb_priv->pcpu_stats); + kfree(lb_priv->ex); +} + +static int lb_port_enter(struct team *team, struct team_port *port) +{ + struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + + lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats); + if (!lb_port_priv->pcpu_stats) + return -ENOMEM; + return 0; +} + +static void lb_port_leave(struct team *team, struct team_port *port) +{ + struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + + free_percpu(lb_port_priv->pcpu_stats); +} + +static void lb_port_disabled(struct team *team, struct team_port *port) +{ + lb_tx_hash_to_port_mapping_null_port(team, port); +} + +static const struct team_mode_ops lb_mode_ops = { + .init = lb_init, + .exit = lb_exit, + .port_enter = lb_port_enter, + .port_leave = lb_port_leave, + .port_disabled = lb_port_disabled, + .transmit = lb_transmit, +}; + +static const struct team_mode lb_mode = { + .kind = "loadbalance", + .owner = THIS_MODULE, + .priv_size = sizeof(struct lb_priv), + .port_priv_size = sizeof(struct lb_port_priv), + .ops = &lb_mode_ops, +}; + +static int __init lb_init_module(void) +{ + return team_mode_register(&lb_mode); +} + +static void __exit lb_cleanup_module(void) +{ + team_mode_unregister(&lb_mode); +} + +module_init(lb_init_module); +module_exit(lb_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); +MODULE_DESCRIPTION("Load-balancing mode for team"); +MODULE_ALIAS("team-mode-loadbalance"); diff --git a/kernel/drivers/net/team/team_mode_random.c b/kernel/drivers/net/team/team_mode_random.c new file mode 100644 index 000000000..cd2f692b8 --- /dev/null +++ b/kernel/drivers/net/team/team_mode_random.c @@ -0,0 +1,67 @@ +/* + * drivers/net/team/team_mode_random.c - Random mode for team + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/if_team.h> + +static bool rnd_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *port; + int port_index; + + port_index = prandom_u32_max(team->en_port_count); + port = team_get_port_by_index_rcu(team, port_index); + if (unlikely(!port)) + goto drop; + port = team_get_first_port_txable_rcu(team, port); + if (unlikely(!port)) + goto drop; + if (team_dev_queue_xmit(team, port, skb)) + return false; + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static const struct team_mode_ops rnd_mode_ops = { + .transmit = rnd_transmit, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, +}; + +static const struct team_mode rnd_mode = { + .kind = "random", + .owner = THIS_MODULE, + .ops = &rnd_mode_ops, +}; + +static int __init rnd_init_module(void) +{ + return team_mode_register(&rnd_mode); +} + +static void __exit rnd_cleanup_module(void) +{ + team_mode_unregister(&rnd_mode); +} + +module_init(rnd_init_module); +module_exit(rnd_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("Random mode for team"); +MODULE_ALIAS("team-mode-random"); diff --git a/kernel/drivers/net/team/team_mode_roundrobin.c b/kernel/drivers/net/team/team_mode_roundrobin.c new file mode 100644 index 000000000..53665850b --- /dev/null +++ b/kernel/drivers/net/team/team_mode_roundrobin.c @@ -0,0 +1,79 @@ +/* + * drivers/net/team/team_mode_roundrobin.c - Round-robin mode for team + * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/if_team.h> + +struct rr_priv { + unsigned int sent_packets; +}; + +static struct rr_priv *rr_priv(struct team *team) +{ + return (struct rr_priv *) &team->mode_priv; +} + +static bool rr_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *port; + int port_index; + + port_index = team_num_to_port_index(team, + rr_priv(team)->sent_packets++); + port = team_get_port_by_index_rcu(team, port_index); + if (unlikely(!port)) + goto drop; + port = team_get_first_port_txable_rcu(team, port); + if (unlikely(!port)) + goto drop; + if (team_dev_queue_xmit(team, port, skb)) + return false; + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static const struct team_mode_ops rr_mode_ops = { + .transmit = rr_transmit, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, +}; + +static const struct team_mode rr_mode = { + .kind = "roundrobin", + .owner = THIS_MODULE, + .priv_size = sizeof(struct rr_priv), + .ops = &rr_mode_ops, +}; + +static int __init rr_init_module(void) +{ + return team_mode_register(&rr_mode); +} + +static void __exit rr_cleanup_module(void) +{ + team_mode_unregister(&rr_mode); +} + +module_init(rr_init_module); +module_exit(rr_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); +MODULE_DESCRIPTION("Round-robin mode for team"); +MODULE_ALIAS("team-mode-roundrobin"); |