summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/net/veth.c
diff options
context:
space:
mode:
authorYunhong Jiang <yunhong.jiang@intel.com>2015-08-04 12:17:53 -0700
committerYunhong Jiang <yunhong.jiang@intel.com>2015-08-04 15:44:42 -0700
commit9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/drivers/net/veth.c
parent98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/drivers/net/veth.c')
-rw-r--r--kernel/drivers/net/veth.c526
1 files changed, 526 insertions, 0 deletions
diff --git a/kernel/drivers/net/veth.c b/kernel/drivers/net/veth.c
new file mode 100644
index 000000000..c8186ffda
--- /dev/null
+++ b/kernel/drivers/net/veth.c
@@ -0,0 +1,526 @@
+/*
+ * drivers/net/veth.c
+ *
+ * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/u64_stats_sync.h>
+
+#include <net/rtnetlink.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <linux/veth.h>
+#include <linux/module.h>
+
+#define DRV_NAME "veth"
+#define DRV_VERSION "1.0"
+
+#define MIN_MTU 68 /* Min L3 MTU */
+#define MAX_MTU 65535 /* Max L3 MTU (arbitrary) */
+
+struct pcpu_vstats {
+ u64 packets;
+ u64 bytes;
+ struct u64_stats_sync syncp;
+};
+
+struct veth_priv {
+ struct net_device __rcu *peer;
+ atomic64_t dropped;
+};
+
+/*
+ * ethtool interface
+ */
+
+static struct {
+ const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+ { "peer_ifindex" },
+};
+
+static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ cmd->supported = 0;
+ cmd->advertising = 0;
+ ethtool_cmd_speed_set(cmd, SPEED_10000);
+ cmd->duplex = DUPLEX_FULL;
+ cmd->port = PORT_TP;
+ cmd->phy_address = 0;
+ cmd->transceiver = XCVR_INTERNAL;
+ cmd->autoneg = AUTONEG_DISABLE;
+ cmd->maxtxpkt = 0;
+ cmd->maxrxpkt = 0;
+ return 0;
+}
+
+static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strlcpy(info->version, DRV_VERSION, sizeof(info->version));
+}
+
+static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+ switch(stringset) {
+ case ETH_SS_STATS:
+ memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+ break;
+ }
+}
+
+static int veth_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ARRAY_SIZE(ethtool_stats_keys);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void veth_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer = rtnl_dereference(priv->peer);
+
+ data[0] = peer ? peer->ifindex : 0;
+}
+
+static const struct ethtool_ops veth_ethtool_ops = {
+ .get_settings = veth_get_settings,
+ .get_drvinfo = veth_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = veth_get_strings,
+ .get_sset_count = veth_get_sset_count,
+ .get_ethtool_stats = veth_get_ethtool_stats,
+};
+
+static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *rcv;
+ int length = skb->len;
+
+ rcu_read_lock();
+ rcv = rcu_dereference(priv->peer);
+ if (unlikely(!rcv)) {
+ kfree_skb(skb);
+ goto drop;
+ }
+ /* don't change ip_summed == CHECKSUM_PARTIAL, as that
+ * will cause bad checksum on forwarded packets
+ */
+ if (skb->ip_summed == CHECKSUM_NONE &&
+ rcv->features & NETIF_F_RXCSUM)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+ struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->bytes += length;
+ stats->packets++;
+ u64_stats_update_end(&stats->syncp);
+ } else {
+drop:
+ atomic64_inc(&priv->dropped);
+ }
+ rcu_read_unlock();
+ return NETDEV_TX_OK;
+}
+
+/*
+ * general routines
+ */
+
+static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ int cpu;
+
+ result->packets = 0;
+ result->bytes = 0;
+ for_each_possible_cpu(cpu) {
+ struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
+ u64 packets, bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ packets = stats->packets;
+ bytes = stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ result->packets += packets;
+ result->bytes += bytes;
+ }
+ return atomic64_read(&priv->dropped);
+}
+
+static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
+ struct pcpu_vstats one;
+
+ tot->tx_dropped = veth_stats_one(&one, dev);
+ tot->tx_bytes = one.bytes;
+ tot->tx_packets = one.packets;
+
+ rcu_read_lock();
+ peer = rcu_dereference(priv->peer);
+ if (peer) {
+ tot->rx_dropped = veth_stats_one(&one, peer);
+ tot->rx_bytes = one.bytes;
+ tot->rx_packets = one.packets;
+ }
+ rcu_read_unlock();
+
+ return tot;
+}
+
+/* fake multicast ability */
+static void veth_set_multicast_list(struct net_device *dev)
+{
+}
+
+static int veth_open(struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer = rtnl_dereference(priv->peer);
+
+ if (!peer)
+ return -ENOTCONN;
+
+ if (peer->flags & IFF_UP) {
+ netif_carrier_on(dev);
+ netif_carrier_on(peer);
+ }
+ return 0;
+}
+
+static int veth_close(struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer = rtnl_dereference(priv->peer);
+
+ netif_carrier_off(dev);
+ if (peer)
+ netif_carrier_off(peer);
+
+ return 0;
+}
+
+static int is_valid_veth_mtu(int new_mtu)
+{
+ return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
+}
+
+static int veth_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (!is_valid_veth_mtu(new_mtu))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static int veth_dev_init(struct net_device *dev)
+{
+ dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
+ if (!dev->vstats)
+ return -ENOMEM;
+ return 0;
+}
+
+static void veth_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->vstats);
+ free_netdev(dev);
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void veth_poll_controller(struct net_device *dev)
+{
+ /* veth only receives frames when its peer sends one
+ * Since it's a synchronous operation, we are guaranteed
+ * never to have pending data when we poll for it so
+ * there is nothing to do here.
+ *
+ * We need this though so netpoll recognizes us as an interface that
+ * supports polling, which enables bridge devices in virt setups to
+ * still use netconsole
+ */
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+static int veth_get_iflink(const struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
+ int iflink;
+
+ rcu_read_lock();
+ peer = rcu_dereference(priv->peer);
+ iflink = peer ? peer->ifindex : 0;
+ rcu_read_unlock();
+
+ return iflink;
+}
+
+static const struct net_device_ops veth_netdev_ops = {
+ .ndo_init = veth_dev_init,
+ .ndo_open = veth_open,
+ .ndo_stop = veth_close,
+ .ndo_start_xmit = veth_xmit,
+ .ndo_change_mtu = veth_change_mtu,
+ .ndo_get_stats64 = veth_get_stats64,
+ .ndo_set_rx_mode = veth_set_multicast_list,
+ .ndo_set_mac_address = eth_mac_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = veth_poll_controller,
+#endif
+ .ndo_get_iflink = veth_get_iflink,
+};
+
+#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+ NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | \
+ NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT | NETIF_F_UFO | \
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
+
+static void veth_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ dev->netdev_ops = &veth_netdev_ops;
+ dev->ethtool_ops = &veth_ethtool_ops;
+ dev->features |= NETIF_F_LLTX;
+ dev->features |= VETH_FEATURES;
+ dev->vlan_features = dev->features &
+ ~(NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_STAG_RX);
+ dev->destructor = veth_dev_free;
+
+ dev->hw_features = VETH_FEATURES;
+ dev->hw_enc_features = VETH_FEATURES;
+}
+
+/*
+ * netlink interface
+ */
+
+static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ if (tb[IFLA_ADDRESS]) {
+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+ return -EINVAL;
+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+ return -EADDRNOTAVAIL;
+ }
+ if (tb[IFLA_MTU]) {
+ if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct rtnl_link_ops veth_link_ops;
+
+static int veth_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ int err;
+ struct net_device *peer;
+ struct veth_priv *priv;
+ char ifname[IFNAMSIZ];
+ struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
+ unsigned char name_assign_type;
+ struct ifinfomsg *ifmp;
+ struct net *net;
+
+ /*
+ * create and register peer first
+ */
+ if (data != NULL && data[VETH_INFO_PEER] != NULL) {
+ struct nlattr *nla_peer;
+
+ nla_peer = data[VETH_INFO_PEER];
+ ifmp = nla_data(nla_peer);
+ err = rtnl_nla_parse_ifla(peer_tb,
+ nla_data(nla_peer) + sizeof(struct ifinfomsg),
+ nla_len(nla_peer) - sizeof(struct ifinfomsg));
+ if (err < 0)
+ return err;
+
+ err = veth_validate(peer_tb, NULL);
+ if (err < 0)
+ return err;
+
+ tbp = peer_tb;
+ } else {
+ ifmp = NULL;
+ tbp = tb;
+ }
+
+ if (tbp[IFLA_IFNAME]) {
+ nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
+ name_assign_type = NET_NAME_USER;
+ } else {
+ snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
+ name_assign_type = NET_NAME_ENUM;
+ }
+
+ net = rtnl_link_get_net(src_net, tbp);
+ if (IS_ERR(net))
+ return PTR_ERR(net);
+
+ peer = rtnl_create_link(net, ifname, name_assign_type,
+ &veth_link_ops, tbp);
+ if (IS_ERR(peer)) {
+ put_net(net);
+ return PTR_ERR(peer);
+ }
+
+ if (tbp[IFLA_ADDRESS] == NULL)
+ eth_hw_addr_random(peer);
+
+ if (ifmp && (dev->ifindex != 0))
+ peer->ifindex = ifmp->ifi_index;
+
+ err = register_netdevice(peer);
+ put_net(net);
+ net = NULL;
+ if (err < 0)
+ goto err_register_peer;
+
+ netif_carrier_off(peer);
+
+ err = rtnl_configure_link(peer, ifmp);
+ if (err < 0)
+ goto err_configure_peer;
+
+ /*
+ * register dev last
+ *
+ * note, that since we've registered new device the dev's name
+ * should be re-allocated
+ */
+
+ if (tb[IFLA_ADDRESS] == NULL)
+ eth_hw_addr_random(dev);
+
+ if (tb[IFLA_IFNAME])
+ nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
+ else
+ snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto err_register_dev;
+
+ netif_carrier_off(dev);
+
+ /*
+ * tie the deviced together
+ */
+
+ priv = netdev_priv(dev);
+ rcu_assign_pointer(priv->peer, peer);
+
+ priv = netdev_priv(peer);
+ rcu_assign_pointer(priv->peer, dev);
+ return 0;
+
+err_register_dev:
+ /* nothing to do */
+err_configure_peer:
+ unregister_netdevice(peer);
+ return err;
+
+err_register_peer:
+ free_netdev(peer);
+ return err;
+}
+
+static void veth_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct veth_priv *priv;
+ struct net_device *peer;
+
+ priv = netdev_priv(dev);
+ peer = rtnl_dereference(priv->peer);
+
+ /* Note : dellink() is called from default_device_exit_batch(),
+ * before a rcu_synchronize() point. The devices are guaranteed
+ * not being freed before one RCU grace period.
+ */
+ RCU_INIT_POINTER(priv->peer, NULL);
+ unregister_netdevice_queue(dev, head);
+
+ if (peer) {
+ priv = netdev_priv(peer);
+ RCU_INIT_POINTER(priv->peer, NULL);
+ unregister_netdevice_queue(peer, head);
+ }
+}
+
+static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
+ [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) },
+};
+
+static struct net *veth_get_link_net(const struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer = rtnl_dereference(priv->peer);
+
+ return peer ? dev_net(peer) : dev_net(dev);
+}
+
+static struct rtnl_link_ops veth_link_ops = {
+ .kind = DRV_NAME,
+ .priv_size = sizeof(struct veth_priv),
+ .setup = veth_setup,
+ .validate = veth_validate,
+ .newlink = veth_newlink,
+ .dellink = veth_dellink,
+ .policy = veth_policy,
+ .maxtype = VETH_INFO_MAX,
+ .get_link_net = veth_get_link_net,
+};
+
+/*
+ * init/fini
+ */
+
+static __init int veth_init(void)
+{
+ return rtnl_link_register(&veth_link_ops);
+}
+
+static __exit void veth_exit(void)
+{
+ rtnl_link_unregister(&veth_link_ops);
+}
+
+module_init(veth_init);
+module_exit(veth_exit);
+
+MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);