From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001
From: José Pekkarinen <jose.pekkarinen@nokia.com>
Date: Mon, 11 Apr 2016 10:41:07 +0300
Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources
 are taken from kernel.org, and rt patch from the rt wiki download page.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the rebasing, the following patch collided:

Force tick interrupt and get rid of softirq magic(I70131fb85).

Collisions have been removed because its logic was found on the
source already.

Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
---
 kernel/drivers/net/ethernet/marvell/Kconfig       |   5 +-
 kernel/drivers/net/ethernet/marvell/mv643xx_eth.c | 108 +++--
 kernel/drivers/net/ethernet/marvell/mvneta.c      | 498 ++++++++++++++++------
 kernel/drivers/net/ethernet/marvell/mvpp2.c       | 292 +++++++++----
 kernel/drivers/net/ethernet/marvell/sky2.c        |  12 +
 5 files changed, 655 insertions(+), 260 deletions(-)

(limited to 'kernel/drivers/net/ethernet/marvell')

diff --git a/kernel/drivers/net/ethernet/marvell/Kconfig b/kernel/drivers/net/ethernet/marvell/Kconfig
index d323a695d..a1c862b46 100644
--- a/kernel/drivers/net/ethernet/marvell/Kconfig
+++ b/kernel/drivers/net/ethernet/marvell/Kconfig
@@ -7,9 +7,7 @@ config NET_VENDOR_MARVELL
 	default y
 	depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET
 	---help---
-	  If you have a network (Ethernet) card belonging to this class, say Y
-	  and read the Ethernet-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.
+	  If you have a network (Ethernet) card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
@@ -46,6 +44,7 @@ config MVNETA
 	tristate "Marvell Armada 370/38x/XP network interface support"
 	depends on PLAT_ORION
 	select MVMDIO
+	select FIXED_PHY
 	---help---
 	  This driver supports the network interface units in the
 	  Marvell ARMADA XP, ARMADA 370 and ARMADA 38x SoC family.
diff --git a/kernel/drivers/net/ethernet/marvell/mv643xx_eth.c b/kernel/drivers/net/ethernet/marvell/mv643xx_eth.c
index 1c75829eb..4182290fd 100644
--- a/kernel/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/kernel/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -759,11 +759,23 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 
 	desc->l4i_chk = 0;
 	desc->byte_cnt = length;
-	desc->buf_ptr = dma_map_single(dev->dev.parent, data,
-				       length, DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
-		WARN(1, "dma_map_single failed!\n");
-		return -ENOMEM;
+
+	if (length <= 8 && (uintptr_t)data & 0x7) {
+		/* Copy unaligned small data fragment to TSO header data area */
+		memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+		       data, length);
+		desc->buf_ptr = txq->tso_hdrs_dma
+			+ txq->tx_curr_desc * TSO_HEADER_SIZE;
+	} else {
+		/* Alignment is okay, map buffer and hand off to hardware */
+		txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
+		desc->buf_ptr = dma_map_single(dev->dev.parent, data,
+			length, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(dev->dev.parent,
+					       desc->buf_ptr))) {
+			WARN(1, "dma_map_single failed!\n");
+			return -ENOMEM;
+		}
 	}
 
 	cmd_sts = BUFFER_OWNED_BY_DMA;
@@ -779,7 +791,8 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 }
 
 static inline void
-txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
+txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length,
+		u32 *first_cmd_sts, bool first_desc)
 {
 	struct mv643xx_eth_private *mp = txq_to_mp(txq);
 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -788,6 +801,7 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
 	int ret;
 	u32 cmd_csum = 0;
 	u16 l4i_chk = 0;
+	u32 cmd_sts;
 
 	tx_index = txq->tx_curr_desc;
 	desc = &txq->tx_desc_area[tx_index];
@@ -803,9 +817,17 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
 	desc->byte_cnt = hdr_len;
 	desc->buf_ptr = txq->tso_hdrs_dma +
 			txq->tx_curr_desc * TSO_HEADER_SIZE;
-	desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
+	cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
 				   GEN_CRC;
 
+	/* Defer updating the first command descriptor until all
+	 * following descriptors have been written.
+	 */
+	if (first_desc)
+		*first_cmd_sts = cmd_sts;
+	else
+		desc->cmd_sts = cmd_sts;
+
 	txq->tx_curr_desc++;
 	if (txq->tx_curr_desc == txq->tx_ring_size)
 		txq->tx_curr_desc = 0;
@@ -819,6 +841,8 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 	int desc_count = 0;
 	struct tso_t tso;
 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	struct tx_desc *first_tx_desc;
+	u32 first_cmd_sts = 0;
 
 	/* Count needed descriptors */
 	if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
@@ -826,11 +850,14 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 		return -EBUSY;
 	}
 
+	first_tx_desc = &txq->tx_desc_area[txq->tx_curr_desc];
+
 	/* Initialize the TSO handler, and prepare the first payload */
 	tso_start(skb, &tso);
 
 	total_len = skb->len - hdr_len;
 	while (total_len > 0) {
+		bool first_desc = (desc_count == 0);
 		char *hdr;
 
 		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
@@ -840,7 +867,8 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 		/* prepare packet headers: MAC + IP + TCP */
 		hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
 		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
-		txq_put_hdr_tso(skb, txq, data_left);
+		txq_put_hdr_tso(skb, txq, data_left, &first_cmd_sts,
+				first_desc);
 
 		while (data_left > 0) {
 			int size;
@@ -860,6 +888,10 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 	__skb_queue_tail(&txq->tx_skb, skb);
 	skb_tx_timestamp(skb);
 
+	/* ensure all other descriptors are written before first cmd_sts */
+	wmb();
+	first_tx_desc->cmd_sts = first_cmd_sts;
+
 	/* clear TX_END status */
 	mp->work_tx_end &= ~(1 << txq->index);
 
@@ -1586,7 +1618,6 @@ static void mv643xx_eth_get_drvinfo(struct net_device *dev,
 		sizeof(drvinfo->version));
 	strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
 	strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
-	drvinfo->n_stats = ARRAY_SIZE(mv643xx_eth_stats);
 }
 
 static int mv643xx_eth_nway_reset(struct net_device *dev)
@@ -1845,32 +1876,19 @@ static void mv643xx_eth_program_multicast_filter(struct net_device *dev)
 	struct netdev_hw_addr *ha;
 	int i;
 
-	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
-		int port_num;
-		u32 accept;
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI))
+		goto promiscuous;
 
-oom:
-		port_num = mp->port_num;
-		accept = 0x01010101;
-		for (i = 0; i < 0x100; i += 4) {
-			wrl(mp, SPECIAL_MCAST_TABLE(port_num) + i, accept);
-			wrl(mp, OTHER_MCAST_TABLE(port_num) + i, accept);
-		}
-		return;
-	}
-
-	mc_spec = kmalloc(0x200, GFP_ATOMIC);
-	if (mc_spec == NULL)
-		goto oom;
-	mc_other = mc_spec + (0x100 >> 2);
-
-	memset(mc_spec, 0, 0x100);
-	memset(mc_other, 0, 0x100);
+	/* Allocate both mc_spec and mc_other tables */
+	mc_spec = kcalloc(128, sizeof(u32), GFP_ATOMIC);
+	if (!mc_spec)
+		goto promiscuous;
+	mc_other = &mc_spec[64];
 
 	netdev_for_each_mc_addr(ha, dev) {
 		u8 *a = ha->addr;
 		u32 *table;
-		int entry;
+		u8 entry;
 
 		if (memcmp(a, "\x01\x00\x5e\x00\x00", 5) == 0) {
 			table = mc_spec;
@@ -1883,12 +1901,23 @@ oom:
 		table[entry >> 2] |= 1 << (8 * (entry & 3));
 	}
 
-	for (i = 0; i < 0x100; i += 4) {
-		wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i, mc_spec[i >> 2]);
-		wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i, mc_other[i >> 2]);
+	for (i = 0; i < 64; i++) {
+		wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+		    mc_spec[i]);
+		wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+		    mc_other[i]);
 	}
 
 	kfree(mc_spec);
+	return;
+
+promiscuous:
+	for (i = 0; i < 64; i++) {
+		wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+		    0x01010101u);
+		wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+		    0x01010101u);
+	}
 }
 
 static void mv643xx_eth_set_rx_mode(struct net_device *dev)
@@ -2788,8 +2817,10 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
 
 	for_each_available_child_of_node(np, pnp) {
 		ret = mv643xx_eth_shared_of_add_port(pdev, pnp);
-		if (ret)
+		if (ret) {
+			of_node_put(pnp);
 			return ret;
+		}
 	}
 	return 0;
 }
@@ -3125,9 +3156,8 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 
 	mib_counters_clear(mp);
 
-	init_timer(&mp->mib_counters_timer);
-	mp->mib_counters_timer.data = (unsigned long)mp;
-	mp->mib_counters_timer.function = mib_counters_timer_wrapper;
+	setup_timer(&mp->mib_counters_timer, mib_counters_timer_wrapper,
+		    (unsigned long)mp);
 	mp->mib_counters_timer.expires = jiffies + 30 * HZ;
 
 	spin_lock_init(&mp->mib_counters_lock);
@@ -3136,9 +3166,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 
 	netif_napi_add(dev, &mp->napi, mv643xx_eth_poll, NAPI_POLL_WEIGHT);
 
-	init_timer(&mp->rx_oom);
-	mp->rx_oom.data = (unsigned long)mp;
-	mp->rx_oom.function = oom_timer_wrapper;
+	setup_timer(&mp->rx_oom, oom_timer_wrapper, (unsigned long)mp);
 
 
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
diff --git a/kernel/drivers/net/ethernet/marvell/mvneta.c b/kernel/drivers/net/ethernet/marvell/mvneta.c
index 4d608f011..ed622fa29 100644
--- a/kernel/drivers/net/ethernet/marvell/mvneta.c
+++ b/kernel/drivers/net/ethernet/marvell/mvneta.c
@@ -32,10 +32,11 @@
 #include <linux/of_address.h>
 #include <linux/phy.h>
 #include <linux/clk.h>
+#include <linux/cpu.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
-#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(1)
+#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
 #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
 #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
 #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
@@ -61,6 +62,7 @@
 #define MVNETA_WIN_SIZE(w)                      (0x2204 + ((w) << 3))
 #define MVNETA_WIN_REMAP(w)                     (0x2280 + ((w) << 2))
 #define MVNETA_BASE_ADDR_ENABLE                 0x2290
+#define MVNETA_ACCESS_PROTECT_ENABLE            0x2294
 #define MVNETA_PORT_CONFIG                      0x2400
 #define      MVNETA_UNI_PROMISC_MODE            BIT(0)
 #define      MVNETA_DEF_RXQ(q)                  ((q) << 1)
@@ -100,6 +102,8 @@
 #define MVNETA_TXQ_CMD                           0x2448
 #define      MVNETA_TXQ_DISABLE_SHIFT            8
 #define      MVNETA_TXQ_ENABLE_MASK              0x000000ff
+#define MVNETA_RX_DISCARD_FRAME_COUNT		 0x2484
+#define MVNETA_OVERRUN_FRAME_COUNT		 0x2488
 #define MVNETA_GMAC_CLOCK_DIVIDER                0x24f4
 #define      MVNETA_GMAC_1MS_CLOCK_ENABLE        BIT(31)
 #define MVNETA_ACC_MODE                          0x2500
@@ -156,7 +160,7 @@
 
 #define MVNETA_INTR_ENABLE                       0x25b8
 #define      MVNETA_TXQ_INTR_ENABLE_ALL_MASK     0x0000ff00
-#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000  // note: neta says it's 0x000000FF
+#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0x000000ff
 
 #define MVNETA_RXQ_CMD                           0x2680
 #define      MVNETA_RXQ_DISABLE_SHIFT            8
@@ -191,7 +195,7 @@
 #define      MVNETA_GMAC_AN_FLOW_CTRL_EN         BIT(11)
 #define      MVNETA_GMAC_CONFIG_FULL_DUPLEX      BIT(12)
 #define      MVNETA_GMAC_AN_DUPLEX_EN            BIT(13)
-#define MVNETA_MIB_COUNTERS_BASE                 0x3080
+#define MVNETA_MIB_COUNTERS_BASE                 0x3000
 #define      MVNETA_MIB_LATE_COLLISION           0x7c
 #define MVNETA_DA_FILT_SPEC_MCAST                0x3400
 #define MVNETA_DA_FILT_OTH_MCAST                 0x3500
@@ -239,6 +243,7 @@
 #define MVNETA_VLAN_TAG_LEN             4
 
 #define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+#define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
 #define MVNETA_ACC_MODE_EXT		1
 
@@ -277,6 +282,50 @@
 
 #define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
 
+struct mvneta_statistic {
+	unsigned short offset;
+	unsigned short type;
+	const char name[ETH_GSTRING_LEN];
+};
+
+#define T_REG_32	32
+#define T_REG_64	64
+
+static const struct mvneta_statistic mvneta_statistics[] = {
+	{ 0x3000, T_REG_64, "good_octets_received", },
+	{ 0x3010, T_REG_32, "good_frames_received", },
+	{ 0x3008, T_REG_32, "bad_octets_received", },
+	{ 0x3014, T_REG_32, "bad_frames_received", },
+	{ 0x3018, T_REG_32, "broadcast_frames_received", },
+	{ 0x301c, T_REG_32, "multicast_frames_received", },
+	{ 0x3050, T_REG_32, "unrec_mac_control_received", },
+	{ 0x3058, T_REG_32, "good_fc_received", },
+	{ 0x305c, T_REG_32, "bad_fc_received", },
+	{ 0x3060, T_REG_32, "undersize_received", },
+	{ 0x3064, T_REG_32, "fragments_received", },
+	{ 0x3068, T_REG_32, "oversize_received", },
+	{ 0x306c, T_REG_32, "jabber_received", },
+	{ 0x3070, T_REG_32, "mac_receive_error", },
+	{ 0x3074, T_REG_32, "bad_crc_event", },
+	{ 0x3078, T_REG_32, "collision", },
+	{ 0x307c, T_REG_32, "late_collision", },
+	{ 0x2484, T_REG_32, "rx_discard", },
+	{ 0x2488, T_REG_32, "rx_overrun", },
+	{ 0x3020, T_REG_32, "frames_64_octets", },
+	{ 0x3024, T_REG_32, "frames_65_to_127_octets", },
+	{ 0x3028, T_REG_32, "frames_128_to_255_octets", },
+	{ 0x302c, T_REG_32, "frames_256_to_511_octets", },
+	{ 0x3030, T_REG_32, "frames_512_to_1023_octets", },
+	{ 0x3034, T_REG_32, "frames_1024_to_max_octets", },
+	{ 0x3038, T_REG_64, "good_octets_sent", },
+	{ 0x3040, T_REG_32, "good_frames_sent", },
+	{ 0x3044, T_REG_32, "excessive_collision", },
+	{ 0x3048, T_REG_32, "multicast_frames_sent", },
+	{ 0x304c, T_REG_32, "broadcast_frames_sent", },
+	{ 0x3054, T_REG_32, "fc_sent", },
+	{ 0x300c, T_REG_32, "internal_mac_transmit_err", },
+};
+
 struct mvneta_pcpu_stats {
 	struct	u64_stats_sync syncp;
 	u64	rx_packets;
@@ -285,23 +334,34 @@ struct mvneta_pcpu_stats {
 	u64	tx_bytes;
 };
 
+struct mvneta_pcpu_port {
+	/* Pointer to the shared port */
+	struct mvneta_port	*pp;
+
+	/* Pointer to the CPU-local NAPI struct */
+	struct napi_struct	napi;
+
+	/* Cause of the previous interrupt */
+	u32			cause_rx_tx;
+};
+
 struct mvneta_port {
+	struct mvneta_pcpu_port __percpu	*ports;
+	struct mvneta_pcpu_stats __percpu	*stats;
+
 	int pkt_size;
 	unsigned int frag_size;
 	void __iomem *base;
 	struct mvneta_rx_queue *rxqs;
 	struct mvneta_tx_queue *txqs;
 	struct net_device *dev;
-
-	u32 cause_rx_tx;
-	struct napi_struct napi;
+	struct notifier_block cpu_notifier;
 
 	/* Core clock */
 	struct clk *clk;
 	u8 mcast_count[256];
 	u16 tx_ring_size;
 	u16 rx_ring_size;
-	struct mvneta_pcpu_stats *stats;
 
 	struct mii_bus *mii_bus;
 	struct phy_device *phy_dev;
@@ -312,6 +372,8 @@ struct mvneta_port {
 	unsigned int speed;
 	unsigned int tx_csum_limit;
 	int use_inband_status:1;
+
+	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -468,7 +530,7 @@ struct mvneta_rx_queue {
 /* The hardware supports eight (8) rx queues, but we are only allowing
  * the first one to be used. Therefore, let's just allocate one queue.
  */
-static int rxq_number = 1;
+static int rxq_number = 8;
 static int txq_number = 8;
 
 static int rxq_def;
@@ -518,6 +580,8 @@ static void mvneta_mib_counters_clear(struct mvneta_port *pp)
 	/* Perform dummy reads from MIB counters */
 	for (i = 0; i < MVNETA_MIB_LATE_COLLISION; i += 4)
 		dummy = mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i));
+	dummy = mvreg_read(pp, MVNETA_RX_DISCARD_FRAME_COUNT);
+	dummy = mvreg_read(pp, MVNETA_OVERRUN_FRAME_COUNT);
 }
 
 /* Get System Network Statistics */
@@ -746,7 +810,6 @@ static void mvneta_port_up(struct mvneta_port *pp)
 	u32 q_map;
 
 	/* Enable all initialized TXs. */
-	mvneta_mib_counters_clear(pp);
 	q_map = 0;
 	for (queue = 0; queue < txq_number; queue++) {
 		struct mvneta_tx_queue *txq = &pp->txqs[queue];
@@ -756,14 +819,7 @@ static void mvneta_port_up(struct mvneta_port *pp)
 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
 
 	/* Enable all initialized RXQs. */
-	q_map = 0;
-	for (queue = 0; queue < rxq_number; queue++) {
-		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
-		if (rxq->descs != NULL)
-			q_map |= (1 << queue);
-	}
-
-	mvreg_write(pp, MVNETA_RXQ_CMD, q_map);
+	mvreg_write(pp, MVNETA_RXQ_CMD, BIT(rxq_def));
 }
 
 /* Stop the Ethernet port activity */
@@ -949,7 +1005,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	/* Set CPU queue access map - all CPUs have access to all RX
 	 * queues and to all TX queues
 	 */
-	for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++)
+	for_each_present_cpu(cpu)
 		mvreg_write(pp, MVNETA_CPU_MAP(cpu),
 			    (MVNETA_CPU_RXQ_ACCESS_ALL_MASK |
 			     MVNETA_CPU_TXQ_ACCESS_ALL_MASK));
@@ -1030,6 +1086,8 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	mvreg_write(pp, MVNETA_INTR_ENABLE,
 		    (MVNETA_RXQ_INTR_ENABLE_ALL_MASK
 		     | MVNETA_TXQ_INTR_ENABLE_ALL_MASK));
+
+	mvneta_mib_counters_clear(pp);
 }
 
 /* Set max sizes for tx queues */
@@ -1366,7 +1424,7 @@ static void *mvneta_frag_alloc(const struct mvneta_port *pp)
 static void mvneta_frag_free(const struct mvneta_port *pp, void *data)
 {
 	if (likely(pp->frag_size <= PAGE_SIZE))
-		put_page(virt_to_head_page(data));
+		skb_free_frag(data);
 	else
 		kfree(data);
 }
@@ -1426,17 +1484,6 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
 	return MVNETA_TX_L4_CSUM_NOT;
 }
 
-/* Returns rx queue pointer (find last set bit) according to causeRxTx
- * value
- */
-static struct mvneta_rx_queue *mvneta_rx_policy(struct mvneta_port *pp,
-						u32 cause)
-{
-	int queue = fls(cause >> 8) - 1;
-
-	return (queue < 0 || queue >= rxq_number) ? NULL : &pp->rxqs[queue];
-}
-
 /* Drop packets received by the RXQ and free buffers */
 static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 				 struct mvneta_rx_queue *rxq)
@@ -1448,9 +1495,9 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 		struct mvneta_rx_desc *rx_desc = rxq->descs + i;
 		void *data = (void *)rx_desc->buf_cookie;
 
-		mvneta_frag_free(pp, data);
 		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
+		mvneta_frag_free(pp, data);
 	}
 
 	if (rx_done)
@@ -1461,8 +1508,9 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		     struct mvneta_rx_queue *rxq)
 {
+	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 	struct net_device *dev = pp->dev;
-	int rx_done, rx_filled;
+	int rx_done;
 	u32 rcvd_pkts = 0;
 	u32 rcvd_bytes = 0;
 
@@ -1473,21 +1521,21 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		rx_todo = rx_done;
 
 	rx_done = 0;
-	rx_filled = 0;
 
 	/* Fairness NAPI loop */
 	while (rx_done < rx_todo) {
 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
 		struct sk_buff *skb;
 		unsigned char *data;
+		dma_addr_t phys_addr;
 		u32 rx_status;
 		int rx_bytes, err;
 
 		rx_done++;
-		rx_filled++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
 		data = (unsigned char *)rx_desc->buf_cookie;
+		phys_addr = rx_desc->buf_phys_addr;
 
 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
@@ -1515,7 +1563,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 
 			skb->protocol = eth_type_trans(skb, dev);
 			mvneta_rx_csum(pp, rx_status, skb);
-			napi_gro_receive(&pp->napi, skb);
+			napi_gro_receive(&port->napi, skb);
 
 			rcvd_pkts++;
 			rcvd_bytes += rx_bytes;
@@ -1524,13 +1572,25 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 			continue;
 		}
 
-		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
-		if (!skb)
+		/* Refill processing */
+		err = mvneta_rx_refill(pp, rx_desc);
+		if (err) {
+			netdev_err(dev, "Linux processing - Can't refill\n");
+			rxq->missed++;
 			goto err_drop_frame;
+		}
+
+		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
 
-		dma_unmap_single(dev->dev.parent, rx_desc->buf_phys_addr,
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
+		dma_unmap_single(dev->dev.parent, phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
 
+		if (!skb)
+			goto err_drop_frame;
+
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
 
@@ -1542,15 +1602,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 
 		mvneta_rx_csum(pp, rx_status, skb);
 
-		napi_gro_receive(&pp->napi, skb);
-
-		/* Refill processing */
-		err = mvneta_rx_refill(pp, rx_desc);
-		if (err) {
-			netdev_err(dev, "Linux processing - Can't refill\n");
-			rxq->missed++;
-			rx_filled--;
-		}
+		napi_gro_receive(&port->napi, skb);
 	}
 
 	if (rcvd_pkts) {
@@ -1563,7 +1615,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 	}
 
 	/* Update rxq management counters */
-	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled);
+	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
 
 	return rx_done;
 }
@@ -2061,12 +2113,10 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 /* Interrupt handling - the callback for request_irq() */
 static irqreturn_t mvneta_isr(int irq, void *dev_id)
 {
-	struct mvneta_port *pp = (struct mvneta_port *)dev_id;
-
-	/* Mask all interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	struct mvneta_pcpu_port *port = (struct mvneta_pcpu_port *)dev_id;
 
-	napi_schedule(&pp->napi);
+	disable_percpu_irq(port->pp->dev->irq);
+	napi_schedule(&port->napi);
 
 	return IRQ_HANDLED;
 }
@@ -2104,11 +2154,11 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 {
 	int rx_done = 0;
 	u32 cause_rx_tx;
-	unsigned long flags;
 	struct mvneta_port *pp = netdev_priv(napi->dev);
+	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
 	if (!netif_running(pp->dev)) {
-		napi_complete(napi);
+		napi_complete(&port->napi);
 		return rx_done;
 	}
 
@@ -2135,47 +2185,17 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	/* For the case where the last mvneta_poll did not process all
 	 * RX packets
 	 */
-	cause_rx_tx |= pp->cause_rx_tx;
-	if (rxq_number > 1) {
-		while ((cause_rx_tx & MVNETA_RX_INTR_MASK_ALL) && (budget > 0)) {
-			int count;
-			struct mvneta_rx_queue *rxq;
-			/* get rx queue number from cause_rx_tx */
-			rxq = mvneta_rx_policy(pp, cause_rx_tx);
-			if (!rxq)
-				break;
-
-			/* process the packet in that rx queue */
-			count = mvneta_rx(pp, budget, rxq);
-			rx_done += count;
-			budget -= count;
-			if (budget > 0) {
-				/* set off the rx bit of the
-				 * corresponding bit in the cause rx
-				 * tx register, so that next iteration
-				 * will find the next rx queue where
-				 * packets are received on
-				 */
-				cause_rx_tx &= ~((1 << rxq->id) << 8);
-			}
-		}
-	} else {
-		rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]);
-		budget -= rx_done;
-	}
+	cause_rx_tx |= port->cause_rx_tx;
+	rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]);
+	budget -= rx_done;
 
 	if (budget > 0) {
 		cause_rx_tx = 0;
-		napi_complete(napi);
-		local_irq_save(flags);
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			    MVNETA_RX_INTR_MASK(rxq_number) |
-			    MVNETA_TX_INTR_MASK(txq_number) |
-			    MVNETA_MISCINTR_INTR_MASK);
-		local_irq_restore(flags);
+		napi_complete(&port->napi);
+		enable_percpu_irq(pp->dev->irq, 0);
 	}
 
-	pp->cause_rx_tx = cause_rx_tx;
+	port->cause_rx_tx = cause_rx_tx;
 	return rx_done;
 }
 
@@ -2379,26 +2399,19 @@ static void mvneta_cleanup_txqs(struct mvneta_port *pp)
 /* Cleanup all Rx queues */
 static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
-	int queue;
-
-	for (queue = 0; queue < rxq_number; queue++)
-		mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
+	mvneta_rxq_deinit(pp, &pp->rxqs[rxq_def]);
 }
 
 
 /* Init all Rx queues */
 static int mvneta_setup_rxqs(struct mvneta_port *pp)
 {
-	int queue;
-
-	for (queue = 0; queue < rxq_number; queue++) {
-		int err = mvneta_rxq_init(pp, &pp->rxqs[queue]);
-		if (err) {
-			netdev_err(pp->dev, "%s: can't create rxq=%d\n",
-				   __func__, queue);
-			mvneta_cleanup_rxqs(pp);
-			return err;
-		}
+	int err = mvneta_rxq_init(pp, &pp->rxqs[rxq_def]);
+	if (err) {
+		netdev_err(pp->dev, "%s: can't create rxq=%d\n",
+			   __func__, rxq_def);
+		mvneta_cleanup_rxqs(pp);
+		return err;
 	}
 
 	return 0;
@@ -2424,6 +2437,8 @@ static int mvneta_setup_txqs(struct mvneta_port *pp)
 
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
+	unsigned int cpu;
+
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
 
@@ -2431,7 +2446,11 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 	mvneta_port_enable(pp);
 
 	/* Enable polling on the port */
-	napi_enable(&pp->napi);
+	for_each_present_cpu(cpu) {
+		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+		napi_enable(&port->napi);
+	}
 
 	/* Unmask interrupts */
 	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
@@ -2449,9 +2468,15 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 
 static void mvneta_stop_dev(struct mvneta_port *pp)
 {
+	unsigned int cpu;
+
 	phy_stop(pp->phy_dev);
 
-	napi_disable(&pp->napi);
+	for_each_present_cpu(cpu) {
+		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+		napi_disable(&port->napi);
+	}
 
 	netif_carrier_off(pp->dev);
 
@@ -2691,6 +2716,125 @@ static void mvneta_mdio_remove(struct mvneta_port *pp)
 	pp->phy_dev = NULL;
 }
 
+static void mvneta_percpu_enable(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE);
+}
+
+static void mvneta_percpu_disable(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	disable_percpu_irq(pp->dev->irq);
+}
+
+static void mvneta_percpu_elect(struct mvneta_port *pp)
+{
+	int online_cpu_idx, cpu, i = 0;
+
+	online_cpu_idx = rxq_def % num_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		if (i == online_cpu_idx)
+			/* Enable per-CPU interrupt on the one CPU we
+			 * just elected
+			 */
+			smp_call_function_single(cpu, mvneta_percpu_enable,
+						pp, true);
+		else
+			/* Disable per-CPU interrupt on all the other CPU */
+			smp_call_function_single(cpu, mvneta_percpu_disable,
+						pp, true);
+		i++;
+	}
+};
+
+static int mvneta_percpu_notifier(struct notifier_block *nfb,
+				  unsigned long action, void *hcpu)
+{
+	struct mvneta_port *pp = container_of(nfb, struct mvneta_port,
+					      cpu_notifier);
+	int cpu = (unsigned long)hcpu, other_cpu;
+	struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		netif_tx_stop_all_queues(pp->dev);
+
+		/* We have to synchronise on tha napi of each CPU
+		 * except the one just being waked up
+		 */
+		for_each_online_cpu(other_cpu) {
+			if (other_cpu != cpu) {
+				struct mvneta_pcpu_port *other_port =
+					per_cpu_ptr(pp->ports, other_cpu);
+
+				napi_synchronize(&other_port->napi);
+			}
+		}
+
+		/* Mask all ethernet port interrupts */
+		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		napi_enable(&port->napi);
+
+		/* Enable per-CPU interrupt on the one CPU we care
+		 * about.
+		 */
+		mvneta_percpu_elect(pp);
+
+		/* Unmask all ethernet port interrupts */
+		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+			MVNETA_RX_INTR_MASK(rxq_number) |
+			MVNETA_TX_INTR_MASK(txq_number) |
+			MVNETA_MISCINTR_INTR_MASK);
+		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+			MVNETA_CAUSE_PHY_STATUS_CHANGE |
+			MVNETA_CAUSE_LINK_CHANGE |
+			MVNETA_CAUSE_PSC_SYNC_CHANGE);
+		netif_tx_start_all_queues(pp->dev);
+		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		netif_tx_stop_all_queues(pp->dev);
+		/* Mask all ethernet port interrupts */
+		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+
+		napi_synchronize(&port->napi);
+		napi_disable(&port->napi);
+		/* Disable per-CPU interrupts on the CPU that is
+		 * brought down.
+		 */
+		smp_call_function_single(cpu, mvneta_percpu_disable,
+					 pp, true);
+
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		/* Check if a new CPU must be elected now this on is down */
+		mvneta_percpu_elect(pp);
+		/* Unmask all ethernet port interrupts */
+		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+			MVNETA_RX_INTR_MASK(rxq_number) |
+			MVNETA_TX_INTR_MASK(txq_number) |
+			MVNETA_MISCINTR_INTR_MASK);
+		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+			MVNETA_CAUSE_PHY_STATUS_CHANGE |
+			MVNETA_CAUSE_LINK_CHANGE |
+			MVNETA_CAUSE_PSC_SYNC_CHANGE);
+		netif_tx_start_all_queues(pp->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 static int mvneta_open(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
@@ -2709,13 +2853,29 @@ static int mvneta_open(struct net_device *dev)
 		goto err_cleanup_rxqs;
 
 	/* Connect to port interrupt line */
-	ret = request_irq(pp->dev->irq, mvneta_isr, 0,
-			  MVNETA_DRIVER_NAME, pp);
+	ret = request_percpu_irq(pp->dev->irq, mvneta_isr,
+				 MVNETA_DRIVER_NAME, pp->ports);
 	if (ret) {
 		netdev_err(pp->dev, "cannot request irq %d\n", pp->dev->irq);
 		goto err_cleanup_txqs;
 	}
 
+	/* Even though the documentation says that request_percpu_irq
+	 * doesn't enable the interrupts automatically, it actually
+	 * does so on the local CPU.
+	 *
+	 * Make sure it's disabled.
+	 */
+	mvneta_percpu_disable(pp);
+
+	/* Elect a CPU to handle our RX queue interrupt */
+	mvneta_percpu_elect(pp);
+
+	/* Register a CPU notifier to handle the case where our CPU
+	 * might be taken offline.
+	 */
+	register_cpu_notifier(&pp->cpu_notifier);
+
 	/* In default link is down */
 	netif_carrier_off(pp->dev);
 
@@ -2730,7 +2890,7 @@ static int mvneta_open(struct net_device *dev)
 	return 0;
 
 err_free_irq:
-	free_irq(pp->dev->irq, pp);
+	free_percpu_irq(pp->dev->irq, pp->ports);
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
 err_cleanup_rxqs:
@@ -2742,10 +2902,14 @@ err_cleanup_rxqs:
 static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
+	int cpu;
 
 	mvneta_stop_dev(pp);
 	mvneta_mdio_remove(pp);
-	free_irq(dev->irq, pp);
+	unregister_cpu_notifier(&pp->cpu_notifier);
+	for_each_present_cpu(cpu)
+		smp_call_function_single(cpu, mvneta_percpu_disable, pp, true);
+	free_percpu_irq(dev->irq, pp->ports);
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
 
@@ -2875,6 +3039,65 @@ static int mvneta_ethtool_set_ringparam(struct net_device *dev,
 	return 0;
 }
 
+static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset,
+				       u8 *data)
+{
+	if (sset == ETH_SS_STATS) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       mvneta_statistics[i].name, ETH_GSTRING_LEN);
+	}
+}
+
+static void mvneta_ethtool_update_stats(struct mvneta_port *pp)
+{
+	const struct mvneta_statistic *s;
+	void __iomem *base = pp->base;
+	u32 high, low, val;
+	int i;
+
+	for (i = 0, s = mvneta_statistics;
+	     s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
+	     s++, i++) {
+		val = 0;
+
+		switch (s->type) {
+		case T_REG_32:
+			val = readl_relaxed(base + s->offset);
+			break;
+		case T_REG_64:
+			/* Docs say to read low 32-bit then high */
+			low = readl_relaxed(base + s->offset);
+			high = readl_relaxed(base + s->offset + 4);
+			val = (u64)high << 32 | low;
+			break;
+		}
+
+		pp->ethtool_stats[i] += val;
+	}
+}
+
+static void mvneta_ethtool_get_stats(struct net_device *dev,
+				     struct ethtool_stats *stats, u64 *data)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	int i;
+
+	mvneta_ethtool_update_stats(pp);
+
+	for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
+		*data++ = pp->ethtool_stats[i];
+}
+
+static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return ARRAY_SIZE(mvneta_statistics);
+	return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_open            = mvneta_open,
 	.ndo_stop            = mvneta_stop,
@@ -2896,6 +3119,9 @@ const struct ethtool_ops mvneta_eth_tool_ops = {
 	.get_drvinfo    = mvneta_ethtool_get_drvinfo,
 	.get_ringparam  = mvneta_ethtool_get_ringparam,
 	.set_ringparam	= mvneta_ethtool_set_ringparam,
+	.get_strings	= mvneta_ethtool_get_strings,
+	.get_ethtool_stats = mvneta_ethtool_get_stats,
+	.get_sset_count	= mvneta_ethtool_get_sset_count,
 };
 
 /* Initialize hw */
@@ -2971,6 +3197,7 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
 }
 
 /* Power up the port */
@@ -3030,16 +3257,10 @@ static int mvneta_probe(struct platform_device *pdev)
 	char hw_mac_addr[ETH_ALEN];
 	const char *mac_from;
 	const char *managed;
+	int tx_csum_limit;
 	int phy_mode;
 	int err;
-
-	/* Our multiqueue support is not complete, so for now, only
-	 * allow the usage of the first RX queue
-	 */
-	if (rxq_def != 0) {
-		dev_err(&pdev->dev, "Invalid rxq_def argument: %d\n", rxq_def);
-		return -EINVAL;
-	}
+	int cpu;
 
 	dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number);
 	if (!dev)
@@ -3091,6 +3312,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	err = of_property_read_string(dn, "managed", &managed);
 	pp->use_inband_status = (err == 0 &&
 				 strcmp(managed, "in-band-status") == 0);
+	pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
 
 	pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
@@ -3107,11 +3329,18 @@ static int mvneta_probe(struct platform_device *pdev)
 		goto err_clk;
 	}
 
+	/* Alloc per-cpu port structure */
+	pp->ports = alloc_percpu(struct mvneta_pcpu_port);
+	if (!pp->ports) {
+		err = -ENOMEM;
+		goto err_clk;
+	}
+
 	/* Alloc per-cpu stats */
 	pp->stats = netdev_alloc_pcpu_stats(struct mvneta_pcpu_stats);
 	if (!pp->stats) {
 		err = -ENOMEM;
-		goto err_clk;
+		goto err_free_ports;
 	}
 
 	dt_mac_addr = of_get_mac_address(dn);
@@ -3129,8 +3358,21 @@ static int mvneta_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
-		pp->tx_csum_limit = 1600;
+	if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) {
+		if (tx_csum_limit < 0 ||
+		    tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) {
+			tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+			dev_info(&pdev->dev,
+				 "Wrong TX csum limit in DT, set to %dB\n",
+				 MVNETA_TX_CSUM_DEF_SIZE);
+		}
+	} else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) {
+		tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+	} else {
+		tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE;
+	}
+
+	pp->tx_csum_limit = tx_csum_limit;
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
@@ -3152,7 +3394,12 @@ static int mvneta_probe(struct platform_device *pdev)
 	if (dram_target_info)
 		mvneta_conf_mbus_windows(pp, dram_target_info);
 
-	netif_napi_add(dev, &pp->napi, mvneta_poll, NAPI_POLL_WEIGHT);
+	for_each_present_cpu(cpu) {
+		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+		netif_napi_add(dev, &port->napi, mvneta_poll, NAPI_POLL_WEIGHT);
+		port->pp = pp;
+	}
 
 	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->hw_features |= dev->features;
@@ -3175,12 +3422,16 @@ static int mvneta_probe(struct platform_device *pdev)
 		struct phy_device *phy = of_phy_find_device(dn);
 
 		mvneta_fixed_link_update(pp, phy);
+
+		put_device(&phy->dev);
 	}
 
 	return 0;
 
 err_free_stats:
 	free_percpu(pp->stats);
+err_free_ports:
+	free_percpu(pp->ports);
 err_clk:
 	clk_disable_unprepare(pp->clk);
 err_put_phy_node:
@@ -3200,6 +3451,7 @@ static int mvneta_remove(struct platform_device *pdev)
 
 	unregister_netdev(dev);
 	clk_disable_unprepare(pp->clk);
+	free_percpu(pp->ports);
 	free_percpu(pp->stats);
 	irq_dispose_mapping(dev->irq);
 	of_node_put(pp->phy_node);
diff --git a/kernel/drivers/net/ethernet/marvell/mvpp2.c b/kernel/drivers/net/ethernet/marvell/mvpp2.c
index 3e8b1bfb1..a4beccf1f 100644
--- a/kernel/drivers/net/ethernet/marvell/mvpp2.c
+++ b/kernel/drivers/net/ethernet/marvell/mvpp2.c
@@ -27,6 +27,8 @@
 #include <linux/of_address.h>
 #include <linux/phy.h>
 #include <linux/clk.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <uapi/linux/ppp_defs.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -299,6 +301,7 @@
 
 /* Coalescing */
 #define MVPP2_TXDONE_COAL_PKTS_THRESH	15
+#define MVPP2_TXDONE_HRTIMER_PERIOD_NS	1000000UL
 #define MVPP2_RX_COAL_PKTS		32
 #define MVPP2_RX_COAL_USEC		100
 
@@ -660,6 +663,14 @@ struct mvpp2_pcpu_stats {
 	u64	tx_bytes;
 };
 
+/* Per-CPU port control */
+struct mvpp2_port_pcpu {
+	struct hrtimer tx_done_timer;
+	bool timer_scheduled;
+	/* Tasklet for egress finalization */
+	struct tasklet_struct tx_done_tasklet;
+};
+
 struct mvpp2_port {
 	u8 id;
 
@@ -679,6 +690,9 @@ struct mvpp2_port {
 	u32 pending_cause_rx;
 	struct napi_struct napi;
 
+	/* Per-CPU port control */
+	struct mvpp2_port_pcpu __percpu *pcpu;
+
 	/* Flags */
 	unsigned long flags;
 
@@ -776,6 +790,9 @@ struct mvpp2_txq_pcpu {
 	/* Array of transmitted skb */
 	struct sk_buff **tx_skb;
 
+	/* Array of transmitted buffers' physical addresses */
+	dma_addr_t *tx_buffs;
+
 	/* Index of last TX DMA descriptor that was inserted */
 	int txq_put_index;
 
@@ -913,8 +930,6 @@ struct mvpp2_bm_pool {
 	/* Occupied buffers indicator */
 	atomic_t in_use;
 	int in_use_thresh;
-
-	spinlock_t lock;
 };
 
 struct mvpp2_buff_hdr {
@@ -963,9 +978,13 @@ static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
 }
 
 static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu,
-			      struct sk_buff *skb)
+			      struct sk_buff *skb,
+			      struct mvpp2_tx_desc *tx_desc)
 {
 	txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb;
+	if (skb)
+		txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] =
+							 tx_desc->buf_phys_addr;
 	txq_pcpu->txq_put_index++;
 	if (txq_pcpu->txq_put_index == txq_pcpu->size)
 		txq_pcpu->txq_put_index = 0;
@@ -3376,7 +3395,6 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
 	bm_pool->pkt_size = 0;
 	bm_pool->buf_num = 0;
 	atomic_set(&bm_pool->in_use, 0);
-	spin_lock_init(&bm_pool->lock);
 
 	return 0;
 }
@@ -3395,16 +3413,23 @@ static void mvpp2_bm_pool_bufsize_set(struct mvpp2 *priv,
 }
 
 /* Free all buffers from the pool */
-static void mvpp2_bm_bufs_free(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
+			       struct mvpp2_bm_pool *bm_pool)
 {
 	int i;
 
 	for (i = 0; i < bm_pool->buf_num; i++) {
+		dma_addr_t buf_phys_addr;
 		u32 vaddr;
 
 		/* Get buffer virtual address (indirect access) */
-		mvpp2_read(priv, MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
+		buf_phys_addr = mvpp2_read(priv,
+					   MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
 		vaddr = mvpp2_read(priv, MVPP2_BM_VIRT_ALLOC_REG);
+
+		dma_unmap_single(dev, buf_phys_addr,
+				 bm_pool->buf_size, DMA_FROM_DEVICE);
+
 		if (!vaddr)
 			break;
 		dev_kfree_skb_any((struct sk_buff *)vaddr);
@@ -3421,7 +3446,7 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
 {
 	u32 val;
 
-	mvpp2_bm_bufs_free(priv, bm_pool);
+	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool);
 	if (bm_pool->buf_num) {
 		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
 		return 0;
@@ -3647,7 +3672,6 @@ static struct mvpp2_bm_pool *
 mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 		  int pkt_size)
 {
-	unsigned long flags = 0;
 	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
 	int num;
 
@@ -3656,8 +3680,6 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 		return NULL;
 	}
 
-	spin_lock_irqsave(&new_pool->lock, flags);
-
 	if (new_pool->type == MVPP2_BM_FREE)
 		new_pool->type = type;
 
@@ -3677,7 +3699,8 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 				   MVPP2_BM_LONG_BUF_NUM :
 				   MVPP2_BM_SHORT_BUF_NUM;
 		else
-			mvpp2_bm_bufs_free(port->priv, new_pool);
+			mvpp2_bm_bufs_free(port->dev->dev.parent,
+					   port->priv, new_pool);
 
 		new_pool->pkt_size = pkt_size;
 
@@ -3686,8 +3709,6 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 		if (num != pkts_num) {
 			WARN(1, "pool %d: %d of %d allocated\n",
 			     new_pool->id, num, pkts_num);
-			/* We need to undo the bufs_add() allocations */
-			spin_unlock_irqrestore(&new_pool->lock, flags);
 			return NULL;
 		}
 	}
@@ -3695,15 +3716,12 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 	mvpp2_bm_pool_bufsize_set(port->priv, new_pool,
 				  MVPP2_RX_BUF_SIZE(new_pool->pkt_size));
 
-	spin_unlock_irqrestore(&new_pool->lock, flags);
-
 	return new_pool;
 }
 
 /* Initialize pools for swf */
 static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 {
-	unsigned long flags = 0;
 	int rxq;
 
 	if (!port->pool_long) {
@@ -3714,9 +3732,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 		if (!port->pool_long)
 			return -ENOMEM;
 
-		spin_lock_irqsave(&port->pool_long->lock, flags);
 		port->pool_long->port_map |= (1 << port->id);
-		spin_unlock_irqrestore(&port->pool_long->lock, flags);
 
 		for (rxq = 0; rxq < rxq_number; rxq++)
 			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
@@ -3730,9 +3746,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 		if (!port->pool_short)
 			return -ENOMEM;
 
-		spin_lock_irqsave(&port->pool_short->lock, flags);
 		port->pool_short->port_map |= (1 << port->id);
-		spin_unlock_irqrestore(&port->pool_short->lock, flags);
 
 		for (rxq = 0; rxq < rxq_number; rxq++)
 			mvpp2_rxq_short_pool_set(port, rxq,
@@ -3750,7 +3764,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
 	/* Update BM pool with new buffer size */
-	mvpp2_bm_bufs_free(port->priv, port_pool);
+	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
 	if (port_pool->buf_num) {
 		WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
 		return -EIO;
@@ -3806,7 +3820,6 @@ static void mvpp2_interrupts_unmask(void *arg)
 
 	mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id),
 		    (MVPP2_CAUSE_MISC_SUM_MASK |
-		     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK |
 		     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
 }
 
@@ -4382,23 +4395,6 @@ static void mvpp2_rx_time_coal_set(struct mvpp2_port *port,
 	rxq->time_coal = usec;
 }
 
-/* Set threshold for TX_DONE pkts coalescing */
-static void mvpp2_tx_done_pkts_coal_set(void *arg)
-{
-	struct mvpp2_port *port = arg;
-	int queue;
-	u32 val;
-
-	for (queue = 0; queue < txq_number; queue++) {
-		struct mvpp2_tx_queue *txq = port->txqs[queue];
-
-		val = (txq->done_pkts_coal << MVPP2_TRANSMITTED_THRESH_OFFSET) &
-		       MVPP2_TRANSMITTED_THRESH_MASK;
-		mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id);
-		mvpp2_write(port->priv, MVPP2_TXQ_THRESH_REG, val);
-	}
-}
-
 /* Free Tx queue skbuffs */
 static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
 				struct mvpp2_tx_queue *txq,
@@ -4407,17 +4403,16 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
 	int i;
 
 	for (i = 0; i < num; i++) {
-		struct mvpp2_tx_desc *tx_desc = txq->descs +
-							txq_pcpu->txq_get_index;
+		dma_addr_t buf_phys_addr =
+				    txq_pcpu->tx_buffs[txq_pcpu->txq_get_index];
 		struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index];
 
 		mvpp2_txq_inc_get(txq_pcpu);
 
+		dma_unmap_single(port->dev->dev.parent, buf_phys_addr,
+				 skb_headlen(skb), DMA_TO_DEVICE);
 		if (!skb)
 			continue;
-
-		dma_unmap_single(port->dev->dev.parent, tx_desc->buf_phys_addr,
-				 tx_desc->data_size, DMA_TO_DEVICE);
 		dev_kfree_skb_any(skb);
 	}
 }
@@ -4433,7 +4428,7 @@ static inline struct mvpp2_rx_queue *mvpp2_get_rx_queue(struct mvpp2_port *port,
 static inline struct mvpp2_tx_queue *mvpp2_get_tx_queue(struct mvpp2_port *port,
 							u32 cause)
 {
-	int queue = fls(cause >> 16) - 1;
+	int queue = fls(cause) - 1;
 
 	return port->txqs[queue];
 }
@@ -4460,6 +4455,29 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 			netif_tx_wake_queue(nq);
 }
 
+static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
+{
+	struct mvpp2_tx_queue *txq;
+	struct mvpp2_txq_pcpu *txq_pcpu;
+	unsigned int tx_todo = 0;
+
+	while (cause) {
+		txq = mvpp2_get_tx_queue(port, cause);
+		if (!txq)
+			break;
+
+		txq_pcpu = this_cpu_ptr(txq->pcpu);
+
+		if (txq_pcpu->count) {
+			mvpp2_txq_done(port, txq, txq_pcpu);
+			tx_todo += txq_pcpu->count;
+		}
+
+		cause &= ~(1 << txq->log_id);
+	}
+	return tx_todo;
+}
+
 /* Rx/Tx queue initialization/cleanup methods */
 
 /* Allocate and initialize descriptors for aggr TXQ */
@@ -4649,12 +4667,13 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 		txq_pcpu->tx_skb = kmalloc(txq_pcpu->size *
 					   sizeof(*txq_pcpu->tx_skb),
 					   GFP_KERNEL);
-		if (!txq_pcpu->tx_skb) {
-			dma_free_coherent(port->dev->dev.parent,
-					  txq->size * MVPP2_DESC_ALIGNED_SIZE,
-					  txq->descs, txq->descs_phys);
-			return -ENOMEM;
-		}
+		if (!txq_pcpu->tx_skb)
+			goto error;
+
+		txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size *
+					     sizeof(dma_addr_t), GFP_KERNEL);
+		if (!txq_pcpu->tx_buffs)
+			goto error;
 
 		txq_pcpu->count = 0;
 		txq_pcpu->reserved_num = 0;
@@ -4663,6 +4682,19 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 	}
 
 	return 0;
+
+error:
+	for_each_present_cpu(cpu) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+		kfree(txq_pcpu->tx_skb);
+		kfree(txq_pcpu->tx_buffs);
+	}
+
+	dma_free_coherent(port->dev->dev.parent,
+			  txq->size * MVPP2_DESC_ALIGNED_SIZE,
+			  txq->descs, txq->descs_phys);
+
+	return -ENOMEM;
 }
 
 /* Free allocated TXQ resources */
@@ -4675,6 +4707,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
 		kfree(txq_pcpu->tx_skb);
+		kfree(txq_pcpu->tx_buffs);
 	}
 
 	if (txq->descs)
@@ -4805,7 +4838,6 @@ static int mvpp2_setup_txqs(struct mvpp2_port *port)
 			goto err_cleanup;
 	}
 
-	on_each_cpu(mvpp2_tx_done_pkts_coal_set, port, 1);
 	on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1);
 	return 0;
 
@@ -4887,6 +4919,49 @@ static void mvpp2_link_event(struct net_device *dev)
 	}
 }
 
+static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu)
+{
+	ktime_t interval;
+
+	if (!port_pcpu->timer_scheduled) {
+		port_pcpu->timer_scheduled = true;
+		interval = ktime_set(0, MVPP2_TXDONE_HRTIMER_PERIOD_NS);
+		hrtimer_start(&port_pcpu->tx_done_timer, interval,
+			      HRTIMER_MODE_REL_PINNED);
+	}
+}
+
+static void mvpp2_tx_proc_cb(unsigned long data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
+	unsigned int tx_todo, cause;
+
+	if (!netif_running(dev))
+		return;
+	port_pcpu->timer_scheduled = false;
+
+	/* Process all the Tx queues */
+	cause = (1 << txq_number) - 1;
+	tx_todo = mvpp2_tx_done(port, cause);
+
+	/* Set the timer in case not all the packets were processed */
+	if (tx_todo)
+		mvpp2_timer_set(port_pcpu);
+}
+
+static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer)
+{
+	struct mvpp2_port_pcpu *port_pcpu = container_of(timer,
+							 struct mvpp2_port_pcpu,
+							 tx_done_timer);
+
+	tasklet_schedule(&port_pcpu->tx_done_tasklet);
+
+	return HRTIMER_NORESTART;
+}
+
 /* Main RX/TX processing routines */
 
 /* Display more error info */
@@ -5024,7 +5099,8 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		    struct mvpp2_rx_queue *rxq)
 {
 	struct net_device *dev = port->dev;
-	int rx_received, rx_filled, i;
+	int rx_received;
+	int rx_done = 0;
 	u32 rcvd_pkts = 0;
 	u32 rcvd_bytes = 0;
 
@@ -5033,17 +5109,18 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 	if (rx_todo > rx_received)
 		rx_todo = rx_received;
 
-	rx_filled = 0;
-	for (i = 0; i < rx_todo; i++) {
+	while (rx_done < rx_todo) {
 		struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
 		struct mvpp2_bm_pool *bm_pool;
 		struct sk_buff *skb;
+		dma_addr_t phys_addr;
 		u32 bm, rx_status;
 		int pool, rx_bytes, err;
 
-		rx_filled++;
+		rx_done++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - MVPP2_MH_SIZE;
+		phys_addr = rx_desc->buf_phys_addr;
 
 		bm = mvpp2_bm_cookie_build(rx_desc);
 		pool = mvpp2_bm_cookie_pool_get(bm);
@@ -5060,8 +5137,10 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		 * comprised by the RX descriptor.
 		 */
 		if (rx_status & MVPP2_RXD_ERR_SUMMARY) {
+		err_drop_frame:
 			dev->stats.rx_errors++;
 			mvpp2_rx_error(port, rx_desc);
+			/* Return the buffer to the pool */
 			mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
 					  rx_desc->buf_cookie);
 			continue;
@@ -5069,6 +5148,15 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 
 		skb = (struct sk_buff *)rx_desc->buf_cookie;
 
+		err = mvpp2_rx_refill(port, bm_pool, bm, 0);
+		if (err) {
+			netdev_err(port->dev, "failed to refill BM pools\n");
+			goto err_drop_frame;
+		}
+
+		dma_unmap_single(dev->dev.parent, phys_addr,
+				 bm_pool->buf_size, DMA_FROM_DEVICE);
+
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
 		atomic_inc(&bm_pool->in_use);
@@ -5079,12 +5167,6 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		mvpp2_rx_csum(port, rx_status, skb);
 
 		napi_gro_receive(&port->napi, skb);
-
-		err = mvpp2_rx_refill(port, bm_pool, bm, 0);
-		if (err) {
-			netdev_err(port->dev, "failed to refill BM pools\n");
-			rx_filled--;
-		}
 	}
 
 	if (rcvd_pkts) {
@@ -5098,7 +5180,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 
 	/* Update Rx queue management counters */
 	wmb();
-	mvpp2_rxq_status_update(port, rxq->id, rx_todo, rx_filled);
+	mvpp2_rxq_status_update(port, rxq->id, rx_done, rx_done);
 
 	return rx_todo;
 }
@@ -5144,11 +5226,11 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
 		if (i == (skb_shinfo(skb)->nr_frags - 1)) {
 			/* Last descriptor */
 			tx_desc->command = MVPP2_TXD_L_DESC;
-			mvpp2_txq_inc_put(txq_pcpu, skb);
+			mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc);
 		} else {
 			/* Descriptor in the middle: Not First, Not Last */
 			tx_desc->command = 0;
-			mvpp2_txq_inc_put(txq_pcpu, NULL);
+			mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc);
 		}
 	}
 
@@ -5214,12 +5296,12 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 		/* First and Last descriptor */
 		tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_L_DESC;
 		tx_desc->command = tx_cmd;
-		mvpp2_txq_inc_put(txq_pcpu, skb);
+		mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc);
 	} else {
 		/* First but not Last */
 		tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_PADDING_DISABLE;
 		tx_desc->command = tx_cmd;
-		mvpp2_txq_inc_put(txq_pcpu, NULL);
+		mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc);
 
 		/* Continue with other skb fragments */
 		if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) {
@@ -5255,6 +5337,17 @@ out:
 		dev_kfree_skb_any(skb);
 	}
 
+	/* Finalize TX processing */
+	if (txq_pcpu->count >= txq->done_pkts_coal)
+		mvpp2_txq_done(port, txq, txq_pcpu);
+
+	/* Set the timer in case not all frags were processed */
+	if (txq_pcpu->count <= frags && txq_pcpu->count > 0) {
+		struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
+
+		mvpp2_timer_set(port_pcpu);
+	}
+
 	return NETDEV_TX_OK;
 }
 
@@ -5268,10 +5361,11 @@ static inline void mvpp2_cause_error(struct net_device *dev, int cause)
 		netdev_err(dev, "tx fifo underrun error\n");
 }
 
-static void mvpp2_txq_done_percpu(void *arg)
+static int mvpp2_poll(struct napi_struct *napi, int budget)
 {
-	struct mvpp2_port *port = arg;
-	u32 cause_rx_tx, cause_tx, cause_misc;
+	u32 cause_rx_tx, cause_rx, cause_misc;
+	int rx_done = 0;
+	struct mvpp2_port *port = netdev_priv(napi->dev);
 
 	/* Rx/Tx cause register
 	 *
@@ -5285,7 +5379,7 @@ static void mvpp2_txq_done_percpu(void *arg)
 	 */
 	cause_rx_tx = mvpp2_read(port->priv,
 				 MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
-	cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+	cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
 	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 
 	if (cause_misc) {
@@ -5297,26 +5391,6 @@ static void mvpp2_txq_done_percpu(void *arg)
 			    cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
 	}
 
-	/* Release TX descriptors */
-	if (cause_tx) {
-		struct mvpp2_tx_queue *txq = mvpp2_get_tx_queue(port, cause_tx);
-		struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
-
-		if (txq_pcpu->count)
-			mvpp2_txq_done(port, txq, txq_pcpu);
-	}
-}
-
-static int mvpp2_poll(struct napi_struct *napi, int budget)
-{
-	u32 cause_rx_tx, cause_rx;
-	int rx_done = 0;
-	struct mvpp2_port *port = netdev_priv(napi->dev);
-
-	on_each_cpu(mvpp2_txq_done_percpu, port, 1);
-
-	cause_rx_tx = mvpp2_read(port->priv,
-				 MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
 	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
 
 	/* Process RX packets */
@@ -5561,6 +5635,8 @@ err_cleanup_rxqs:
 static int mvpp2_stop(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port_pcpu *port_pcpu;
+	int cpu;
 
 	mvpp2_stop_dev(port);
 	mvpp2_phy_disconnect(port);
@@ -5569,6 +5645,13 @@ static int mvpp2_stop(struct net_device *dev)
 	on_each_cpu(mvpp2_interrupts_mask, port, 1);
 
 	free_irq(port->irq, port);
+	for_each_present_cpu(cpu) {
+		port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+
+		hrtimer_cancel(&port_pcpu->tx_done_timer);
+		port_pcpu->timer_scheduled = false;
+		tasklet_kill(&port_pcpu->tx_done_tasklet);
+	}
 	mvpp2_cleanup_rxqs(port);
 	mvpp2_cleanup_txqs(port);
 
@@ -5784,7 +5867,6 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 		txq->done_pkts_coal = c->tx_max_coalesced_frames;
 	}
 
-	on_each_cpu(mvpp2_tx_done_pkts_coal_set, port, 1);
 	return 0;
 }
 
@@ -6035,6 +6117,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 {
 	struct device_node *phy_node;
 	struct mvpp2_port *port;
+	struct mvpp2_port_pcpu *port_pcpu;
 	struct net_device *dev;
 	struct resource *res;
 	const char *dt_mac_addr;
@@ -6044,7 +6127,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	int features;
 	int phy_mode;
 	int priv_common_regs_num = 2;
-	int err, i;
+	int err, i, cpu;
 
 	dev = alloc_etherdev_mqs(sizeof(struct mvpp2_port), txq_number,
 				 rxq_number);
@@ -6135,6 +6218,24 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	}
 	mvpp2_port_power_up(port);
 
+	port->pcpu = alloc_percpu(struct mvpp2_port_pcpu);
+	if (!port->pcpu) {
+		err = -ENOMEM;
+		goto err_free_txq_pcpu;
+	}
+
+	for_each_present_cpu(cpu) {
+		port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+
+		hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_PINNED);
+		port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
+		port_pcpu->timer_scheduled = false;
+
+		tasklet_init(&port_pcpu->tx_done_tasklet, mvpp2_tx_proc_cb,
+			     (unsigned long)dev);
+	}
+
 	netif_napi_add(dev, &port->napi, mvpp2_poll, NAPI_POLL_WEIGHT);
 	features = NETIF_F_SG | NETIF_F_IP_CSUM;
 	dev->features = features | NETIF_F_RXCSUM;
@@ -6144,7 +6245,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register netdev\n");
-		goto err_free_txq_pcpu;
+		goto err_free_port_pcpu;
 	}
 	netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
 
@@ -6153,6 +6254,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	priv->port_list[id] = port;
 	return 0;
 
+err_free_port_pcpu:
+	free_percpu(port->pcpu);
 err_free_txq_pcpu:
 	for (i = 0; i < txq_number; i++)
 		free_percpu(port->txqs[i]->pcpu);
@@ -6171,6 +6274,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
 	int i;
 
 	unregister_netdev(port->dev);
+	free_percpu(port->pcpu);
 	free_percpu(port->stats);
 	for (i = 0; i < txq_number; i++)
 		free_percpu(port->txqs[i]->pcpu);
diff --git a/kernel/drivers/net/ethernet/marvell/sky2.c b/kernel/drivers/net/ethernet/marvell/sky2.c
index d9f449883..5606a0430 100644
--- a/kernel/drivers/net/ethernet/marvell/sky2.c
+++ b/kernel/drivers/net/ethernet/marvell/sky2.c
@@ -4819,6 +4819,18 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 		memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8,
 			      ETH_ALEN);
 
+	/* if the address is invalid, use a random value */
+	if (!is_valid_ether_addr(dev->dev_addr)) {
+		struct sockaddr sa = { AF_UNSPEC };
+
+		netdev_warn(dev,
+			    "Invalid MAC address, defaulting to random\n");
+		eth_hw_addr_random(dev);
+		memcpy(sa.sa_data, dev->dev_addr, ETH_ALEN);
+		if (sky2_set_mac_address(dev, &sa))
+			netdev_warn(dev, "Failed to set MAC address.\n");
+	}
+
 	return dev;
 }
 
-- 
cgit 1.2.3-korg