diff options
author | Yang Zhang <yang.z.zhang@intel.com> | 2015-08-28 09:58:54 +0800 |
---|---|---|
committer | Yang Zhang <yang.z.zhang@intel.com> | 2015-09-01 12:44:00 +0800 |
commit | e44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch) | |
tree | 66b09f592c55df2878107a468a91d21506104d3f /qemu/hw/net | |
parent | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff) |
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5
Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/hw/net')
57 files changed, 37619 insertions, 0 deletions
diff --git a/qemu/hw/net/Makefile.objs b/qemu/hw/net/Makefile.objs new file mode 100644 index 000000000..98801739e --- /dev/null +++ b/qemu/hw/net/Makefile.objs @@ -0,0 +1,42 @@ +common-obj-$(CONFIG_DP8393X) += dp8393x.o +common-obj-$(CONFIG_XEN_BACKEND) += xen_nic.o + +# PCI network cards +common-obj-$(CONFIG_NE2000_PCI) += ne2000.o +common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o +common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o +common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o +common-obj-$(CONFIG_E1000_PCI) += e1000.o +common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o +common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet_tx_pkt.o vmxnet_rx_pkt.o +common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o + +common-obj-$(CONFIG_SMC91C111) += smc91c111.o +common-obj-$(CONFIG_LAN9118) += lan9118.o +common-obj-$(CONFIG_NE2000_ISA) += ne2000-isa.o +common-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o +common-obj-$(CONFIG_XGMAC) += xgmac.o +common-obj-$(CONFIG_MIPSNET) += mipsnet.o +common-obj-$(CONFIG_XILINX_AXI) += xilinx_axienet.o +common-obj-$(CONFIG_ALLWINNER_EMAC) += allwinner_emac.o + +common-obj-$(CONFIG_CADENCE) += cadence_gem.o +common-obj-$(CONFIG_STELLARIS_ENET) += stellaris_enet.o +common-obj-$(CONFIG_LANCE) += lance.o + +obj-$(CONFIG_ETRAXFS) += etraxfs_eth.o +obj-$(CONFIG_COLDFIRE) += mcf_fec.o +obj-$(CONFIG_MILKYMIST) += milkymist-minimac2.o +obj-$(CONFIG_PSERIES) += spapr_llan.o +obj-$(CONFIG_XILINX_ETHLITE) += xilinx_ethlite.o + +obj-$(CONFIG_VIRTIO) += virtio-net.o +obj-y += vhost_net.o + +obj-$(CONFIG_ETSEC) += fsl_etsec/etsec.o fsl_etsec/registers.o \ + fsl_etsec/rings.o fsl_etsec/miim.o + +common-obj-$(CONFIG_ROCKER) += rocker/rocker.o rocker/rocker_fp.o \ + rocker/rocker_desc.o rocker/rocker_world.o \ + rocker/rocker_of_dpa.o +obj-$(call lnot,$(CONFIG_ROCKER)) += rocker/qmp-norocker.o diff --git a/qemu/hw/net/allwinner_emac.c b/qemu/hw/net/allwinner_emac.c new file mode 100644 index 000000000..0407dee6d --- /dev/null +++ b/qemu/hw/net/allwinner_emac.c @@ -0,0 +1,533 @@ +/* + * Emulation of Allwinner EMAC Fast Ethernet controller and + * Realtek RTL8201CP PHY + * + * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com> + * + * This model is based on reverse-engineering of Linux kernel driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "hw/sysbus.h" +#include "net/net.h" +#include "qemu/fifo8.h" +#include "hw/net/allwinner_emac.h" +#include <zlib.h> + +static uint8_t padding[60]; + +static void mii_set_link(RTL8201CPState *mii, bool link_ok) +{ + if (link_ok) { + mii->bmsr |= MII_BMSR_LINK_ST | MII_BMSR_AN_COMP; + mii->anlpar |= MII_ANAR_TXFD | MII_ANAR_10FD | MII_ANAR_10 | + MII_ANAR_CSMACD; + } else { + mii->bmsr &= ~(MII_BMSR_LINK_ST | MII_BMSR_AN_COMP); + mii->anlpar = MII_ANAR_TX; + } +} + +static void mii_reset(RTL8201CPState *mii, bool link_ok) +{ + mii->bmcr = MII_BMCR_FD | MII_BMCR_AUTOEN | MII_BMCR_SPEED; + mii->bmsr = MII_BMSR_100TX_FD | MII_BMSR_100TX_HD | MII_BMSR_10T_FD | + MII_BMSR_10T_HD | MII_BMSR_MFPS | MII_BMSR_AUTONEG; + mii->anar = MII_ANAR_TXFD | MII_ANAR_TX | MII_ANAR_10FD | MII_ANAR_10 | + MII_ANAR_CSMACD; + mii->anlpar = MII_ANAR_TX; + + mii_set_link(mii, link_ok); +} + +static uint16_t RTL8201CP_mdio_read(AwEmacState *s, uint8_t addr, uint8_t reg) +{ + RTL8201CPState *mii = &s->mii; + uint16_t ret = 0xffff; + + if (addr == s->phy_addr) { + switch (reg) { + case MII_BMCR: + return mii->bmcr; + case MII_BMSR: + return mii->bmsr; + case MII_PHYID1: + return RTL8201CP_PHYID1; + case MII_PHYID2: + return RTL8201CP_PHYID2; + case MII_ANAR: + return mii->anar; + case MII_ANLPAR: + return mii->anlpar; + case MII_ANER: + case MII_NSR: + case MII_LBREMR: + case MII_REC: + case MII_SNRDR: + case MII_TEST: + qemu_log_mask(LOG_UNIMP, + "allwinner_emac: read from unimpl. mii reg 0x%x\n", + reg); + return 0; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "allwinner_emac: read from invalid mii reg 0x%x\n", + reg); + return 0; + } + } + return ret; +} + +static void RTL8201CP_mdio_write(AwEmacState *s, uint8_t addr, uint8_t reg, + uint16_t value) +{ + RTL8201CPState *mii = &s->mii; + NetClientState *nc; + + if (addr == s->phy_addr) { + switch (reg) { + case MII_BMCR: + if (value & MII_BMCR_RESET) { + nc = qemu_get_queue(s->nic); + mii_reset(mii, !nc->link_down); + } else { + mii->bmcr = value; + } + break; + case MII_ANAR: + mii->anar = value; + break; + case MII_BMSR: + case MII_PHYID1: + case MII_PHYID2: + case MII_ANLPAR: + case MII_ANER: + qemu_log_mask(LOG_GUEST_ERROR, + "allwinner_emac: write to read-only mii reg 0x%x\n", + reg); + break; + case MII_NSR: + case MII_LBREMR: + case MII_REC: + case MII_SNRDR: + case MII_TEST: + qemu_log_mask(LOG_UNIMP, + "allwinner_emac: write to unimpl. mii reg 0x%x\n", + reg); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "allwinner_emac: write to invalid mii reg 0x%x\n", + reg); + } + } +} + +static void aw_emac_update_irq(AwEmacState *s) +{ + qemu_set_irq(s->irq, (s->int_sta & s->int_ctl) != 0); +} + +static void aw_emac_tx_reset(AwEmacState *s, int chan) +{ + fifo8_reset(&s->tx_fifo[chan]); + s->tx_length[chan] = 0; +} + +static void aw_emac_rx_reset(AwEmacState *s) +{ + fifo8_reset(&s->rx_fifo); + s->rx_num_packets = 0; + s->rx_packet_size = 0; + s->rx_packet_pos = 0; +} + +static void fifo8_push_word(Fifo8 *fifo, uint32_t val) +{ + fifo8_push(fifo, val); + fifo8_push(fifo, val >> 8); + fifo8_push(fifo, val >> 16); + fifo8_push(fifo, val >> 24); +} + +static uint32_t fifo8_pop_word(Fifo8 *fifo) +{ + uint32_t ret; + + ret = fifo8_pop(fifo); + ret |= fifo8_pop(fifo) << 8; + ret |= fifo8_pop(fifo) << 16; + ret |= fifo8_pop(fifo) << 24; + + return ret; +} + +static int aw_emac_can_receive(NetClientState *nc) +{ + AwEmacState *s = qemu_get_nic_opaque(nc); + + /* + * To avoid packet drops, allow reception only when there is space + * for a full frame: 1522 + 8 (rx headers) + 2 (padding). + */ + return (s->ctl & EMAC_CTL_RX_EN) && (fifo8_num_free(&s->rx_fifo) >= 1532); +} + +static ssize_t aw_emac_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + AwEmacState *s = qemu_get_nic_opaque(nc); + Fifo8 *fifo = &s->rx_fifo; + size_t padded_size, total_size; + uint32_t crc; + + padded_size = size > 60 ? size : 60; + total_size = QEMU_ALIGN_UP(RX_HDR_SIZE + padded_size + CRC_SIZE, 4); + + if (!(s->ctl & EMAC_CTL_RX_EN) || (fifo8_num_free(fifo) < total_size)) { + return -1; + } + + fifo8_push_word(fifo, EMAC_UNDOCUMENTED_MAGIC); + fifo8_push_word(fifo, EMAC_RX_HEADER(padded_size + CRC_SIZE, + EMAC_RX_IO_DATA_STATUS_OK)); + fifo8_push_all(fifo, buf, size); + crc = crc32(~0, buf, size); + + if (padded_size != size) { + fifo8_push_all(fifo, padding, padded_size - size); + crc = crc32(crc, padding, padded_size - size); + } + + fifo8_push_word(fifo, crc); + fifo8_push_all(fifo, padding, QEMU_ALIGN_UP(padded_size, 4) - padded_size); + s->rx_num_packets++; + + s->int_sta |= EMAC_INT_RX; + aw_emac_update_irq(s); + + return size; +} + +static void aw_emac_reset(DeviceState *dev) +{ + AwEmacState *s = AW_EMAC(dev); + NetClientState *nc = qemu_get_queue(s->nic); + + s->ctl = 0; + s->tx_mode = 0; + s->int_ctl = 0; + s->int_sta = 0; + s->tx_channel = 0; + s->phy_target = 0; + + aw_emac_tx_reset(s, 0); + aw_emac_tx_reset(s, 1); + aw_emac_rx_reset(s); + + mii_reset(&s->mii, !nc->link_down); +} + +static uint64_t aw_emac_read(void *opaque, hwaddr offset, unsigned size) +{ + AwEmacState *s = opaque; + Fifo8 *fifo = &s->rx_fifo; + NetClientState *nc; + uint64_t ret; + + switch (offset) { + case EMAC_CTL_REG: + return s->ctl; + case EMAC_TX_MODE_REG: + return s->tx_mode; + case EMAC_TX_INS_REG: + return s->tx_channel; + case EMAC_RX_CTL_REG: + return s->rx_ctl; + case EMAC_RX_IO_DATA_REG: + if (!s->rx_num_packets) { + qemu_log_mask(LOG_GUEST_ERROR, + "Read IO data register when no packet available"); + return 0; + } + + ret = fifo8_pop_word(fifo); + + switch (s->rx_packet_pos) { + case 0: /* Word is magic header */ + s->rx_packet_pos += 4; + break; + case 4: /* Word is rx info header */ + s->rx_packet_pos += 4; + s->rx_packet_size = QEMU_ALIGN_UP(extract32(ret, 0, 16), 4); + break; + default: /* Word is packet data */ + s->rx_packet_pos += 4; + s->rx_packet_size -= 4; + + if (!s->rx_packet_size) { + s->rx_packet_pos = 0; + s->rx_num_packets--; + nc = qemu_get_queue(s->nic); + if (aw_emac_can_receive(nc)) { + qemu_flush_queued_packets(nc); + } + } + } + return ret; + case EMAC_RX_FBC_REG: + return s->rx_num_packets; + case EMAC_INT_CTL_REG: + return s->int_ctl; + case EMAC_INT_STA_REG: + return s->int_sta; + case EMAC_MAC_MRDD_REG: + return RTL8201CP_mdio_read(s, + extract32(s->phy_target, PHY_ADDR_SHIFT, 8), + extract32(s->phy_target, PHY_REG_SHIFT, 8)); + default: + qemu_log_mask(LOG_UNIMP, + "allwinner_emac: read access to unknown register 0x" + TARGET_FMT_plx "\n", offset); + ret = 0; + } + + return ret; +} + +static void aw_emac_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + AwEmacState *s = opaque; + Fifo8 *fifo; + NetClientState *nc = qemu_get_queue(s->nic); + int chan; + + switch (offset) { + case EMAC_CTL_REG: + if (value & EMAC_CTL_RESET) { + aw_emac_reset(DEVICE(s)); + value &= ~EMAC_CTL_RESET; + } + s->ctl = value; + if (aw_emac_can_receive(nc)) { + qemu_flush_queued_packets(nc); + } + break; + case EMAC_TX_MODE_REG: + s->tx_mode = value; + break; + case EMAC_TX_CTL0_REG: + case EMAC_TX_CTL1_REG: + chan = (offset == EMAC_TX_CTL0_REG ? 0 : 1); + if ((value & 1) && (s->ctl & EMAC_CTL_TX_EN)) { + uint32_t len, ret; + const uint8_t *data; + + fifo = &s->tx_fifo[chan]; + len = s->tx_length[chan]; + + if (len > fifo8_num_used(fifo)) { + len = fifo8_num_used(fifo); + qemu_log_mask(LOG_GUEST_ERROR, + "allwinner_emac: TX length > fifo data length\n"); + } + if (len > 0) { + data = fifo8_pop_buf(fifo, len, &ret); + qemu_send_packet(nc, data, ret); + aw_emac_tx_reset(s, chan); + /* Raise TX interrupt */ + s->int_sta |= EMAC_INT_TX_CHAN(chan); + aw_emac_update_irq(s); + } + } + break; + case EMAC_TX_INS_REG: + s->tx_channel = value < NUM_TX_FIFOS ? value : 0; + break; + case EMAC_TX_PL0_REG: + case EMAC_TX_PL1_REG: + chan = (offset == EMAC_TX_PL0_REG ? 0 : 1); + if (value > TX_FIFO_SIZE) { + qemu_log_mask(LOG_GUEST_ERROR, + "allwinner_emac: invalid TX frame length %d\n", + (int)value); + value = TX_FIFO_SIZE; + } + s->tx_length[chan] = value; + break; + case EMAC_TX_IO_DATA_REG: + fifo = &s->tx_fifo[s->tx_channel]; + if (fifo8_num_free(fifo) < 4) { + qemu_log_mask(LOG_GUEST_ERROR, + "allwinner_emac: TX data overruns fifo\n"); + break; + } + fifo8_push_word(fifo, value); + break; + case EMAC_RX_CTL_REG: + s->rx_ctl = value; + break; + case EMAC_RX_FBC_REG: + if (value == 0) { + aw_emac_rx_reset(s); + } + break; + case EMAC_INT_CTL_REG: + s->int_ctl = value; + aw_emac_update_irq(s); + break; + case EMAC_INT_STA_REG: + s->int_sta &= ~value; + aw_emac_update_irq(s); + break; + case EMAC_MAC_MADR_REG: + s->phy_target = value; + break; + case EMAC_MAC_MWTD_REG: + RTL8201CP_mdio_write(s, extract32(s->phy_target, PHY_ADDR_SHIFT, 8), + extract32(s->phy_target, PHY_REG_SHIFT, 8), value); + break; + default: + qemu_log_mask(LOG_UNIMP, + "allwinner_emac: write access to unknown register 0x" + TARGET_FMT_plx "\n", offset); + } +} + +static void aw_emac_set_link(NetClientState *nc) +{ + AwEmacState *s = qemu_get_nic_opaque(nc); + + mii_set_link(&s->mii, !nc->link_down); +} + +static const MemoryRegionOps aw_emac_mem_ops = { + .read = aw_emac_read, + .write = aw_emac_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static NetClientInfo net_aw_emac_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = aw_emac_can_receive, + .receive = aw_emac_receive, + .link_status_changed = aw_emac_set_link, +}; + +static void aw_emac_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + AwEmacState *s = AW_EMAC(obj); + + memory_region_init_io(&s->iomem, OBJECT(s), &aw_emac_mem_ops, s, + "aw_emac", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->irq); +} + +static void aw_emac_realize(DeviceState *dev, Error **errp) +{ + AwEmacState *s = AW_EMAC(dev); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_aw_emac_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + fifo8_create(&s->rx_fifo, RX_FIFO_SIZE); + fifo8_create(&s->tx_fifo[0], TX_FIFO_SIZE); + fifo8_create(&s->tx_fifo[1], TX_FIFO_SIZE); +} + +static Property aw_emac_properties[] = { + DEFINE_NIC_PROPERTIES(AwEmacState, conf), + DEFINE_PROP_UINT8("phy-addr", AwEmacState, phy_addr, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_mii = { + .name = "rtl8201cp", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(bmcr, RTL8201CPState), + VMSTATE_UINT16(bmsr, RTL8201CPState), + VMSTATE_UINT16(anar, RTL8201CPState), + VMSTATE_UINT16(anlpar, RTL8201CPState), + VMSTATE_END_OF_LIST() + } +}; + +static int aw_emac_post_load(void *opaque, int version_id) +{ + AwEmacState *s = opaque; + + aw_emac_set_link(qemu_get_queue(s->nic)); + + return 0; +} + +static const VMStateDescription vmstate_aw_emac = { + .name = "allwinner_emac", + .version_id = 1, + .minimum_version_id = 1, + .post_load = aw_emac_post_load, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(mii, AwEmacState, 1, vmstate_mii, RTL8201CPState), + VMSTATE_UINT32(ctl, AwEmacState), + VMSTATE_UINT32(tx_mode, AwEmacState), + VMSTATE_UINT32(rx_ctl, AwEmacState), + VMSTATE_UINT32(int_ctl, AwEmacState), + VMSTATE_UINT32(int_sta, AwEmacState), + VMSTATE_UINT32(phy_target, AwEmacState), + VMSTATE_FIFO8(rx_fifo, AwEmacState), + VMSTATE_UINT32(rx_num_packets, AwEmacState), + VMSTATE_UINT32(rx_packet_size, AwEmacState), + VMSTATE_UINT32(rx_packet_pos, AwEmacState), + VMSTATE_STRUCT_ARRAY(tx_fifo, AwEmacState, NUM_TX_FIFOS, 1, + vmstate_fifo8, Fifo8), + VMSTATE_UINT32_ARRAY(tx_length, AwEmacState, NUM_TX_FIFOS), + VMSTATE_UINT32(tx_channel, AwEmacState), + VMSTATE_END_OF_LIST() + } +}; + +static void aw_emac_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = aw_emac_realize; + dc->props = aw_emac_properties; + dc->reset = aw_emac_reset; + dc->vmsd = &vmstate_aw_emac; +} + +static const TypeInfo aw_emac_info = { + .name = TYPE_AW_EMAC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AwEmacState), + .instance_init = aw_emac_init, + .class_init = aw_emac_class_init, +}; + +static void aw_emac_register_types(void) +{ + type_register_static(&aw_emac_info); +} + +type_init(aw_emac_register_types) diff --git a/qemu/hw/net/cadence_gem.c b/qemu/hw/net/cadence_gem.c new file mode 100644 index 000000000..494a346cf --- /dev/null +++ b/qemu/hw/net/cadence_gem.c @@ -0,0 +1,1248 @@ +/* + * QEMU Cadence GEM emulation + * + * Copyright (c) 2011 Xilinx, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <zlib.h> /* For crc32 */ + +#include "hw/net/cadence_gem.h" +#include "net/checksum.h" + +#ifdef CADENCE_GEM_ERR_DEBUG +#define DB_PRINT(...) do { \ + fprintf(stderr, ": %s: ", __func__); \ + fprintf(stderr, ## __VA_ARGS__); \ + } while (0); +#else + #define DB_PRINT(...) +#endif + +#define GEM_NWCTRL (0x00000000/4) /* Network Control reg */ +#define GEM_NWCFG (0x00000004/4) /* Network Config reg */ +#define GEM_NWSTATUS (0x00000008/4) /* Network Status reg */ +#define GEM_USERIO (0x0000000C/4) /* User IO reg */ +#define GEM_DMACFG (0x00000010/4) /* DMA Control reg */ +#define GEM_TXSTATUS (0x00000014/4) /* TX Status reg */ +#define GEM_RXQBASE (0x00000018/4) /* RX Q Base address reg */ +#define GEM_TXQBASE (0x0000001C/4) /* TX Q Base address reg */ +#define GEM_RXSTATUS (0x00000020/4) /* RX Status reg */ +#define GEM_ISR (0x00000024/4) /* Interrupt Status reg */ +#define GEM_IER (0x00000028/4) /* Interrupt Enable reg */ +#define GEM_IDR (0x0000002C/4) /* Interrupt Disable reg */ +#define GEM_IMR (0x00000030/4) /* Interrupt Mask reg */ +#define GEM_PHYMNTNC (0x00000034/4) /* Phy Maintenance reg */ +#define GEM_RXPAUSE (0x00000038/4) /* RX Pause Time reg */ +#define GEM_TXPAUSE (0x0000003C/4) /* TX Pause Time reg */ +#define GEM_TXPARTIALSF (0x00000040/4) /* TX Partial Store and Forward */ +#define GEM_RXPARTIALSF (0x00000044/4) /* RX Partial Store and Forward */ +#define GEM_HASHLO (0x00000080/4) /* Hash Low address reg */ +#define GEM_HASHHI (0x00000084/4) /* Hash High address reg */ +#define GEM_SPADDR1LO (0x00000088/4) /* Specific addr 1 low reg */ +#define GEM_SPADDR1HI (0x0000008C/4) /* Specific addr 1 high reg */ +#define GEM_SPADDR2LO (0x00000090/4) /* Specific addr 2 low reg */ +#define GEM_SPADDR2HI (0x00000094/4) /* Specific addr 2 high reg */ +#define GEM_SPADDR3LO (0x00000098/4) /* Specific addr 3 low reg */ +#define GEM_SPADDR3HI (0x0000009C/4) /* Specific addr 3 high reg */ +#define GEM_SPADDR4LO (0x000000A0/4) /* Specific addr 4 low reg */ +#define GEM_SPADDR4HI (0x000000A4/4) /* Specific addr 4 high reg */ +#define GEM_TIDMATCH1 (0x000000A8/4) /* Type ID1 Match reg */ +#define GEM_TIDMATCH2 (0x000000AC/4) /* Type ID2 Match reg */ +#define GEM_TIDMATCH3 (0x000000B0/4) /* Type ID3 Match reg */ +#define GEM_TIDMATCH4 (0x000000B4/4) /* Type ID4 Match reg */ +#define GEM_WOLAN (0x000000B8/4) /* Wake on LAN reg */ +#define GEM_IPGSTRETCH (0x000000BC/4) /* IPG Stretch reg */ +#define GEM_SVLAN (0x000000C0/4) /* Stacked VLAN reg */ +#define GEM_MODID (0x000000FC/4) /* Module ID reg */ +#define GEM_OCTTXLO (0x00000100/4) /* Octects transmitted Low reg */ +#define GEM_OCTTXHI (0x00000104/4) /* Octects transmitted High reg */ +#define GEM_TXCNT (0x00000108/4) /* Error-free Frames transmitted */ +#define GEM_TXBCNT (0x0000010C/4) /* Error-free Broadcast Frames */ +#define GEM_TXMCNT (0x00000110/4) /* Error-free Multicast Frame */ +#define GEM_TXPAUSECNT (0x00000114/4) /* Pause Frames Transmitted */ +#define GEM_TX64CNT (0x00000118/4) /* Error-free 64 TX */ +#define GEM_TX65CNT (0x0000011C/4) /* Error-free 65-127 TX */ +#define GEM_TX128CNT (0x00000120/4) /* Error-free 128-255 TX */ +#define GEM_TX256CNT (0x00000124/4) /* Error-free 256-511 */ +#define GEM_TX512CNT (0x00000128/4) /* Error-free 512-1023 TX */ +#define GEM_TX1024CNT (0x0000012C/4) /* Error-free 1024-1518 TX */ +#define GEM_TX1519CNT (0x00000130/4) /* Error-free larger than 1519 TX */ +#define GEM_TXURUNCNT (0x00000134/4) /* TX under run error counter */ +#define GEM_SINGLECOLLCNT (0x00000138/4) /* Single Collision Frames */ +#define GEM_MULTCOLLCNT (0x0000013C/4) /* Multiple Collision Frames */ +#define GEM_EXCESSCOLLCNT (0x00000140/4) /* Excessive Collision Frames */ +#define GEM_LATECOLLCNT (0x00000144/4) /* Late Collision Frames */ +#define GEM_DEFERTXCNT (0x00000148/4) /* Deferred Transmission Frames */ +#define GEM_CSENSECNT (0x0000014C/4) /* Carrier Sense Error Counter */ +#define GEM_OCTRXLO (0x00000150/4) /* Octects Received register Low */ +#define GEM_OCTRXHI (0x00000154/4) /* Octects Received register High */ +#define GEM_RXCNT (0x00000158/4) /* Error-free Frames Received */ +#define GEM_RXBROADCNT (0x0000015C/4) /* Error-free Broadcast Frames RX */ +#define GEM_RXMULTICNT (0x00000160/4) /* Error-free Multicast Frames RX */ +#define GEM_RXPAUSECNT (0x00000164/4) /* Pause Frames Received Counter */ +#define GEM_RX64CNT (0x00000168/4) /* Error-free 64 byte Frames RX */ +#define GEM_RX65CNT (0x0000016C/4) /* Error-free 65-127B Frames RX */ +#define GEM_RX128CNT (0x00000170/4) /* Error-free 128-255B Frames RX */ +#define GEM_RX256CNT (0x00000174/4) /* Error-free 256-512B Frames RX */ +#define GEM_RX512CNT (0x00000178/4) /* Error-free 512-1023B Frames RX */ +#define GEM_RX1024CNT (0x0000017C/4) /* Error-free 1024-1518B Frames RX */ +#define GEM_RX1519CNT (0x00000180/4) /* Error-free 1519-max Frames RX */ +#define GEM_RXUNDERCNT (0x00000184/4) /* Undersize Frames Received */ +#define GEM_RXOVERCNT (0x00000188/4) /* Oversize Frames Received */ +#define GEM_RXJABCNT (0x0000018C/4) /* Jabbers Received Counter */ +#define GEM_RXFCSCNT (0x00000190/4) /* Frame Check seq. Error Counter */ +#define GEM_RXLENERRCNT (0x00000194/4) /* Length Field Error Counter */ +#define GEM_RXSYMERRCNT (0x00000198/4) /* Symbol Error Counter */ +#define GEM_RXALIGNERRCNT (0x0000019C/4) /* Alignment Error Counter */ +#define GEM_RXRSCERRCNT (0x000001A0/4) /* Receive Resource Error Counter */ +#define GEM_RXORUNCNT (0x000001A4/4) /* Receive Overrun Counter */ +#define GEM_RXIPCSERRCNT (0x000001A8/4) /* IP header Checksum Error Counter */ +#define GEM_RXTCPCCNT (0x000001AC/4) /* TCP Checksum Error Counter */ +#define GEM_RXUDPCCNT (0x000001B0/4) /* UDP Checksum Error Counter */ + +#define GEM_1588S (0x000001D0/4) /* 1588 Timer Seconds */ +#define GEM_1588NS (0x000001D4/4) /* 1588 Timer Nanoseconds */ +#define GEM_1588ADJ (0x000001D8/4) /* 1588 Timer Adjust */ +#define GEM_1588INC (0x000001DC/4) /* 1588 Timer Increment */ +#define GEM_PTPETXS (0x000001E0/4) /* PTP Event Frame Transmitted (s) */ +#define GEM_PTPETXNS (0x000001E4/4) /* PTP Event Frame Transmitted (ns) */ +#define GEM_PTPERXS (0x000001E8/4) /* PTP Event Frame Received (s) */ +#define GEM_PTPERXNS (0x000001EC/4) /* PTP Event Frame Received (ns) */ +#define GEM_PTPPTXS (0x000001E0/4) /* PTP Peer Frame Transmitted (s) */ +#define GEM_PTPPTXNS (0x000001E4/4) /* PTP Peer Frame Transmitted (ns) */ +#define GEM_PTPPRXS (0x000001E8/4) /* PTP Peer Frame Received (s) */ +#define GEM_PTPPRXNS (0x000001EC/4) /* PTP Peer Frame Received (ns) */ + +/* Design Configuration Registers */ +#define GEM_DESCONF (0x00000280/4) +#define GEM_DESCONF2 (0x00000284/4) +#define GEM_DESCONF3 (0x00000288/4) +#define GEM_DESCONF4 (0x0000028C/4) +#define GEM_DESCONF5 (0x00000290/4) +#define GEM_DESCONF6 (0x00000294/4) +#define GEM_DESCONF7 (0x00000298/4) + +/*****************************************/ +#define GEM_NWCTRL_TXSTART 0x00000200 /* Transmit Enable */ +#define GEM_NWCTRL_TXENA 0x00000008 /* Transmit Enable */ +#define GEM_NWCTRL_RXENA 0x00000004 /* Receive Enable */ +#define GEM_NWCTRL_LOCALLOOP 0x00000002 /* Local Loopback */ + +#define GEM_NWCFG_STRIP_FCS 0x00020000 /* Strip FCS field */ +#define GEM_NWCFG_LERR_DISC 0x00010000 /* Discard RX frames with len err */ +#define GEM_NWCFG_BUFF_OFST_M 0x0000C000 /* Receive buffer offset mask */ +#define GEM_NWCFG_BUFF_OFST_S 14 /* Receive buffer offset shift */ +#define GEM_NWCFG_UCAST_HASH 0x00000080 /* accept unicast if hash match */ +#define GEM_NWCFG_MCAST_HASH 0x00000040 /* accept multicast if hash match */ +#define GEM_NWCFG_BCAST_REJ 0x00000020 /* Reject broadcast packets */ +#define GEM_NWCFG_PROMISC 0x00000010 /* Accept all packets */ + +#define GEM_DMACFG_RBUFSZ_M 0x00FF0000 /* DMA RX Buffer Size mask */ +#define GEM_DMACFG_RBUFSZ_S 16 /* DMA RX Buffer Size shift */ +#define GEM_DMACFG_RBUFSZ_MUL 64 /* DMA RX Buffer Size multiplier */ +#define GEM_DMACFG_TXCSUM_OFFL 0x00000800 /* Transmit checksum offload */ + +#define GEM_TXSTATUS_TXCMPL 0x00000020 /* Transmit Complete */ +#define GEM_TXSTATUS_USED 0x00000001 /* sw owned descriptor encountered */ + +#define GEM_RXSTATUS_FRMRCVD 0x00000002 /* Frame received */ +#define GEM_RXSTATUS_NOBUF 0x00000001 /* Buffer unavailable */ + +/* GEM_ISR GEM_IER GEM_IDR GEM_IMR */ +#define GEM_INT_TXCMPL 0x00000080 /* Transmit Complete */ +#define GEM_INT_TXUSED 0x00000008 +#define GEM_INT_RXUSED 0x00000004 +#define GEM_INT_RXCMPL 0x00000002 + +#define GEM_PHYMNTNC_OP_R 0x20000000 /* read operation */ +#define GEM_PHYMNTNC_OP_W 0x10000000 /* write operation */ +#define GEM_PHYMNTNC_ADDR 0x0F800000 /* Address bits */ +#define GEM_PHYMNTNC_ADDR_SHFT 23 +#define GEM_PHYMNTNC_REG 0x007C0000 /* register bits */ +#define GEM_PHYMNTNC_REG_SHIFT 18 + +/* Marvell PHY definitions */ +#define BOARD_PHY_ADDRESS 23 /* PHY address we will emulate a device at */ + +#define PHY_REG_CONTROL 0 +#define PHY_REG_STATUS 1 +#define PHY_REG_PHYID1 2 +#define PHY_REG_PHYID2 3 +#define PHY_REG_ANEGADV 4 +#define PHY_REG_LINKPABIL 5 +#define PHY_REG_ANEGEXP 6 +#define PHY_REG_NEXTP 7 +#define PHY_REG_LINKPNEXTP 8 +#define PHY_REG_100BTCTRL 9 +#define PHY_REG_1000BTSTAT 10 +#define PHY_REG_EXTSTAT 15 +#define PHY_REG_PHYSPCFC_CTL 16 +#define PHY_REG_PHYSPCFC_ST 17 +#define PHY_REG_INT_EN 18 +#define PHY_REG_INT_ST 19 +#define PHY_REG_EXT_PHYSPCFC_CTL 20 +#define PHY_REG_RXERR 21 +#define PHY_REG_EACD 22 +#define PHY_REG_LED 24 +#define PHY_REG_LED_OVRD 25 +#define PHY_REG_EXT_PHYSPCFC_CTL2 26 +#define PHY_REG_EXT_PHYSPCFC_ST 27 +#define PHY_REG_CABLE_DIAG 28 + +#define PHY_REG_CONTROL_RST 0x8000 +#define PHY_REG_CONTROL_LOOP 0x4000 +#define PHY_REG_CONTROL_ANEG 0x1000 + +#define PHY_REG_STATUS_LINK 0x0004 +#define PHY_REG_STATUS_ANEGCMPL 0x0020 + +#define PHY_REG_INT_ST_ANEGCMPL 0x0800 +#define PHY_REG_INT_ST_LINKC 0x0400 +#define PHY_REG_INT_ST_ENERGY 0x0010 + +/***********************************************************************/ +#define GEM_RX_REJECT (-1) +#define GEM_RX_PROMISCUOUS_ACCEPT (-2) +#define GEM_RX_BROADCAST_ACCEPT (-3) +#define GEM_RX_MULTICAST_HASH_ACCEPT (-4) +#define GEM_RX_UNICAST_HASH_ACCEPT (-5) + +#define GEM_RX_SAR_ACCEPT 0 + +/***********************************************************************/ + +#define DESC_1_USED 0x80000000 +#define DESC_1_LENGTH 0x00001FFF + +#define DESC_1_TX_WRAP 0x40000000 +#define DESC_1_TX_LAST 0x00008000 + +#define DESC_0_RX_WRAP 0x00000002 +#define DESC_0_RX_OWNERSHIP 0x00000001 + +#define R_DESC_1_RX_SAR_SHIFT 25 +#define R_DESC_1_RX_SAR_LENGTH 2 +#define R_DESC_1_RX_SAR_MATCH (1 << 27) +#define R_DESC_1_RX_UNICAST_HASH (1 << 29) +#define R_DESC_1_RX_MULTICAST_HASH (1 << 30) +#define R_DESC_1_RX_BROADCAST (1 << 31) + +#define DESC_1_RX_SOF 0x00004000 +#define DESC_1_RX_EOF 0x00008000 + +static inline unsigned tx_desc_get_buffer(unsigned *desc) +{ + return desc[0]; +} + +static inline unsigned tx_desc_get_used(unsigned *desc) +{ + return (desc[1] & DESC_1_USED) ? 1 : 0; +} + +static inline void tx_desc_set_used(unsigned *desc) +{ + desc[1] |= DESC_1_USED; +} + +static inline unsigned tx_desc_get_wrap(unsigned *desc) +{ + return (desc[1] & DESC_1_TX_WRAP) ? 1 : 0; +} + +static inline unsigned tx_desc_get_last(unsigned *desc) +{ + return (desc[1] & DESC_1_TX_LAST) ? 1 : 0; +} + +static inline unsigned tx_desc_get_length(unsigned *desc) +{ + return desc[1] & DESC_1_LENGTH; +} + +static inline void print_gem_tx_desc(unsigned *desc) +{ + DB_PRINT("TXDESC:\n"); + DB_PRINT("bufaddr: 0x%08x\n", *desc); + DB_PRINT("used_hw: %d\n", tx_desc_get_used(desc)); + DB_PRINT("wrap: %d\n", tx_desc_get_wrap(desc)); + DB_PRINT("last: %d\n", tx_desc_get_last(desc)); + DB_PRINT("length: %d\n", tx_desc_get_length(desc)); +} + +static inline unsigned rx_desc_get_buffer(unsigned *desc) +{ + return desc[0] & ~0x3UL; +} + +static inline unsigned rx_desc_get_wrap(unsigned *desc) +{ + return desc[0] & DESC_0_RX_WRAP ? 1 : 0; +} + +static inline unsigned rx_desc_get_ownership(unsigned *desc) +{ + return desc[0] & DESC_0_RX_OWNERSHIP ? 1 : 0; +} + +static inline void rx_desc_set_ownership(unsigned *desc) +{ + desc[0] |= DESC_0_RX_OWNERSHIP; +} + +static inline void rx_desc_set_sof(unsigned *desc) +{ + desc[1] |= DESC_1_RX_SOF; +} + +static inline void rx_desc_set_eof(unsigned *desc) +{ + desc[1] |= DESC_1_RX_EOF; +} + +static inline void rx_desc_set_length(unsigned *desc, unsigned len) +{ + desc[1] &= ~DESC_1_LENGTH; + desc[1] |= len; +} + +static inline void rx_desc_set_broadcast(unsigned *desc) +{ + desc[1] |= R_DESC_1_RX_BROADCAST; +} + +static inline void rx_desc_set_unicast_hash(unsigned *desc) +{ + desc[1] |= R_DESC_1_RX_UNICAST_HASH; +} + +static inline void rx_desc_set_multicast_hash(unsigned *desc) +{ + desc[1] |= R_DESC_1_RX_MULTICAST_HASH; +} + +static inline void rx_desc_set_sar(unsigned *desc, int sar_idx) +{ + desc[1] = deposit32(desc[1], R_DESC_1_RX_SAR_SHIFT, R_DESC_1_RX_SAR_LENGTH, + sar_idx); + desc[1] |= R_DESC_1_RX_SAR_MATCH; +} + +/* The broadcast MAC address: 0xFFFFFFFFFFFF */ +static const uint8_t broadcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + +/* + * gem_init_register_masks: + * One time initialization. + * Set masks to identify which register bits have magical clear properties + */ +static void gem_init_register_masks(CadenceGEMState *s) +{ + /* Mask of register bits which are read only */ + memset(&s->regs_ro[0], 0, sizeof(s->regs_ro)); + s->regs_ro[GEM_NWCTRL] = 0xFFF80000; + s->regs_ro[GEM_NWSTATUS] = 0xFFFFFFFF; + s->regs_ro[GEM_DMACFG] = 0xFE00F000; + s->regs_ro[GEM_TXSTATUS] = 0xFFFFFE08; + s->regs_ro[GEM_RXQBASE] = 0x00000003; + s->regs_ro[GEM_TXQBASE] = 0x00000003; + s->regs_ro[GEM_RXSTATUS] = 0xFFFFFFF0; + s->regs_ro[GEM_ISR] = 0xFFFFFFFF; + s->regs_ro[GEM_IMR] = 0xFFFFFFFF; + s->regs_ro[GEM_MODID] = 0xFFFFFFFF; + + /* Mask of register bits which are clear on read */ + memset(&s->regs_rtc[0], 0, sizeof(s->regs_rtc)); + s->regs_rtc[GEM_ISR] = 0xFFFFFFFF; + + /* Mask of register bits which are write 1 to clear */ + memset(&s->regs_w1c[0], 0, sizeof(s->regs_w1c)); + s->regs_w1c[GEM_TXSTATUS] = 0x000001F7; + s->regs_w1c[GEM_RXSTATUS] = 0x0000000F; + + /* Mask of register bits which are write only */ + memset(&s->regs_wo[0], 0, sizeof(s->regs_wo)); + s->regs_wo[GEM_NWCTRL] = 0x00073E60; + s->regs_wo[GEM_IER] = 0x07FFFFFF; + s->regs_wo[GEM_IDR] = 0x07FFFFFF; +} + +/* + * phy_update_link: + * Make the emulated PHY link state match the QEMU "interface" state. + */ +static void phy_update_link(CadenceGEMState *s) +{ + DB_PRINT("down %d\n", qemu_get_queue(s->nic)->link_down); + + /* Autonegotiation status mirrors link status. */ + if (qemu_get_queue(s->nic)->link_down) { + s->phy_regs[PHY_REG_STATUS] &= ~(PHY_REG_STATUS_ANEGCMPL | + PHY_REG_STATUS_LINK); + s->phy_regs[PHY_REG_INT_ST] |= PHY_REG_INT_ST_LINKC; + } else { + s->phy_regs[PHY_REG_STATUS] |= (PHY_REG_STATUS_ANEGCMPL | + PHY_REG_STATUS_LINK); + s->phy_regs[PHY_REG_INT_ST] |= (PHY_REG_INT_ST_LINKC | + PHY_REG_INT_ST_ANEGCMPL | + PHY_REG_INT_ST_ENERGY); + } +} + +static int gem_can_receive(NetClientState *nc) +{ + CadenceGEMState *s; + + s = qemu_get_nic_opaque(nc); + + /* Do nothing if receive is not enabled. */ + if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_RXENA)) { + if (s->can_rx_state != 1) { + s->can_rx_state = 1; + DB_PRINT("can't receive - no enable\n"); + } + return 0; + } + + if (rx_desc_get_ownership(s->rx_desc) == 1) { + if (s->can_rx_state != 2) { + s->can_rx_state = 2; + DB_PRINT("can't receive - busy buffer descriptor 0x%x\n", + s->rx_desc_addr); + } + return 0; + } + + if (s->can_rx_state != 0) { + s->can_rx_state = 0; + DB_PRINT("can receive 0x%x\n", s->rx_desc_addr); + } + return 1; +} + +/* + * gem_update_int_status: + * Raise or lower interrupt based on current status. + */ +static void gem_update_int_status(CadenceGEMState *s) +{ + if (s->regs[GEM_ISR]) { + DB_PRINT("asserting int. (0x%08x)\n", s->regs[GEM_ISR]); + qemu_set_irq(s->irq, 1); + } +} + +/* + * gem_receive_updatestats: + * Increment receive statistics. + */ +static void gem_receive_updatestats(CadenceGEMState *s, const uint8_t *packet, + unsigned bytes) +{ + uint64_t octets; + + /* Total octets (bytes) received */ + octets = ((uint64_t)(s->regs[GEM_OCTRXLO]) << 32) | + s->regs[GEM_OCTRXHI]; + octets += bytes; + s->regs[GEM_OCTRXLO] = octets >> 32; + s->regs[GEM_OCTRXHI] = octets; + + /* Error-free Frames received */ + s->regs[GEM_RXCNT]++; + + /* Error-free Broadcast Frames counter */ + if (!memcmp(packet, broadcast_addr, 6)) { + s->regs[GEM_RXBROADCNT]++; + } + + /* Error-free Multicast Frames counter */ + if (packet[0] == 0x01) { + s->regs[GEM_RXMULTICNT]++; + } + + if (bytes <= 64) { + s->regs[GEM_RX64CNT]++; + } else if (bytes <= 127) { + s->regs[GEM_RX65CNT]++; + } else if (bytes <= 255) { + s->regs[GEM_RX128CNT]++; + } else if (bytes <= 511) { + s->regs[GEM_RX256CNT]++; + } else if (bytes <= 1023) { + s->regs[GEM_RX512CNT]++; + } else if (bytes <= 1518) { + s->regs[GEM_RX1024CNT]++; + } else { + s->regs[GEM_RX1519CNT]++; + } +} + +/* + * Get the MAC Address bit from the specified position + */ +static unsigned get_bit(const uint8_t *mac, unsigned bit) +{ + unsigned byte; + + byte = mac[bit / 8]; + byte >>= (bit & 0x7); + byte &= 1; + + return byte; +} + +/* + * Calculate a GEM MAC Address hash index + */ +static unsigned calc_mac_hash(const uint8_t *mac) +{ + int index_bit, mac_bit; + unsigned hash_index; + + hash_index = 0; + mac_bit = 5; + for (index_bit = 5; index_bit >= 0; index_bit--) { + hash_index |= (get_bit(mac, mac_bit) ^ + get_bit(mac, mac_bit + 6) ^ + get_bit(mac, mac_bit + 12) ^ + get_bit(mac, mac_bit + 18) ^ + get_bit(mac, mac_bit + 24) ^ + get_bit(mac, mac_bit + 30) ^ + get_bit(mac, mac_bit + 36) ^ + get_bit(mac, mac_bit + 42)) << index_bit; + mac_bit--; + } + + return hash_index; +} + +/* + * gem_mac_address_filter: + * Accept or reject this destination address? + * Returns: + * GEM_RX_REJECT: reject + * >= 0: Specific address accept (which matched SAR is returned) + * others for various other modes of accept: + * GEM_RM_PROMISCUOUS_ACCEPT, GEM_RX_BROADCAST_ACCEPT, + * GEM_RX_MULTICAST_HASH_ACCEPT or GEM_RX_UNICAST_HASH_ACCEPT + */ +static int gem_mac_address_filter(CadenceGEMState *s, const uint8_t *packet) +{ + uint8_t *gem_spaddr; + int i; + + /* Promiscuous mode? */ + if (s->regs[GEM_NWCFG] & GEM_NWCFG_PROMISC) { + return GEM_RX_PROMISCUOUS_ACCEPT; + } + + if (!memcmp(packet, broadcast_addr, 6)) { + /* Reject broadcast packets? */ + if (s->regs[GEM_NWCFG] & GEM_NWCFG_BCAST_REJ) { + return GEM_RX_REJECT; + } + return GEM_RX_BROADCAST_ACCEPT; + } + + /* Accept packets -w- hash match? */ + if ((packet[0] == 0x01 && (s->regs[GEM_NWCFG] & GEM_NWCFG_MCAST_HASH)) || + (packet[0] != 0x01 && (s->regs[GEM_NWCFG] & GEM_NWCFG_UCAST_HASH))) { + unsigned hash_index; + + hash_index = calc_mac_hash(packet); + if (hash_index < 32) { + if (s->regs[GEM_HASHLO] & (1<<hash_index)) { + return packet[0] == 0x01 ? GEM_RX_MULTICAST_HASH_ACCEPT : + GEM_RX_UNICAST_HASH_ACCEPT; + } + } else { + hash_index -= 32; + if (s->regs[GEM_HASHHI] & (1<<hash_index)) { + return packet[0] == 0x01 ? GEM_RX_MULTICAST_HASH_ACCEPT : + GEM_RX_UNICAST_HASH_ACCEPT; + } + } + } + + /* Check all 4 specific addresses */ + gem_spaddr = (uint8_t *)&(s->regs[GEM_SPADDR1LO]); + for (i = 3; i >= 0; i--) { + if (s->sar_active[i] && !memcmp(packet, gem_spaddr + 8 * i, 6)) { + return GEM_RX_SAR_ACCEPT + i; + } + } + + /* No address match; reject the packet */ + return GEM_RX_REJECT; +} + +static void gem_get_rx_desc(CadenceGEMState *s) +{ + DB_PRINT("read descriptor 0x%x\n", (unsigned)s->rx_desc_addr); + /* read current descriptor */ + cpu_physical_memory_read(s->rx_desc_addr, + (uint8_t *)s->rx_desc, sizeof(s->rx_desc)); + + /* Descriptor owned by software ? */ + if (rx_desc_get_ownership(s->rx_desc) == 1) { + DB_PRINT("descriptor 0x%x owned by sw.\n", + (unsigned)s->rx_desc_addr); + s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF; + s->regs[GEM_ISR] |= GEM_INT_RXUSED & ~(s->regs[GEM_IMR]); + /* Handle interrupt consequences */ + gem_update_int_status(s); + } +} + +/* + * gem_receive: + * Fit a packet handed to us by QEMU into the receive descriptor ring. + */ +static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + CadenceGEMState *s; + unsigned rxbufsize, bytes_to_copy; + unsigned rxbuf_offset; + uint8_t rxbuf[2048]; + uint8_t *rxbuf_ptr; + bool first_desc = true; + int maf; + + s = qemu_get_nic_opaque(nc); + + /* Is this destination MAC address "for us" ? */ + maf = gem_mac_address_filter(s, buf); + if (maf == GEM_RX_REJECT) { + return -1; + } + + /* Discard packets with receive length error enabled ? */ + if (s->regs[GEM_NWCFG] & GEM_NWCFG_LERR_DISC) { + unsigned type_len; + + /* Fish the ethertype / length field out of the RX packet */ + type_len = buf[12] << 8 | buf[13]; + /* It is a length field, not an ethertype */ + if (type_len < 0x600) { + if (size < type_len) { + /* discard */ + return -1; + } + } + } + + /* + * Determine configured receive buffer offset (probably 0) + */ + rxbuf_offset = (s->regs[GEM_NWCFG] & GEM_NWCFG_BUFF_OFST_M) >> + GEM_NWCFG_BUFF_OFST_S; + + /* The configure size of each receive buffer. Determines how many + * buffers needed to hold this packet. + */ + rxbufsize = ((s->regs[GEM_DMACFG] & GEM_DMACFG_RBUFSZ_M) >> + GEM_DMACFG_RBUFSZ_S) * GEM_DMACFG_RBUFSZ_MUL; + bytes_to_copy = size; + + /* Pad to minimum length. Assume FCS field is stripped, logic + * below will increment it to the real minimum of 64 when + * not FCS stripping + */ + if (size < 60) { + size = 60; + } + + /* Strip of FCS field ? (usually yes) */ + if (s->regs[GEM_NWCFG] & GEM_NWCFG_STRIP_FCS) { + rxbuf_ptr = (void *)buf; + } else { + unsigned crc_val; + + /* The application wants the FCS field, which QEMU does not provide. + * We must try and calculate one. + */ + + memcpy(rxbuf, buf, size); + memset(rxbuf + size, 0, sizeof(rxbuf) - size); + rxbuf_ptr = rxbuf; + crc_val = cpu_to_le32(crc32(0, rxbuf, MAX(size, 60))); + memcpy(rxbuf + size, &crc_val, sizeof(crc_val)); + + bytes_to_copy += 4; + size += 4; + } + + DB_PRINT("config bufsize: %d packet size: %ld\n", rxbufsize, size); + + while (bytes_to_copy) { + /* Do nothing if receive is not enabled. */ + if (!gem_can_receive(nc)) { + assert(!first_desc); + return -1; + } + + DB_PRINT("copy %d bytes to 0x%x\n", MIN(bytes_to_copy, rxbufsize), + rx_desc_get_buffer(s->rx_desc)); + + /* Copy packet data to emulated DMA buffer */ + cpu_physical_memory_write(rx_desc_get_buffer(s->rx_desc) + rxbuf_offset, + rxbuf_ptr, MIN(bytes_to_copy, rxbufsize)); + rxbuf_ptr += MIN(bytes_to_copy, rxbufsize); + bytes_to_copy -= MIN(bytes_to_copy, rxbufsize); + + /* Update the descriptor. */ + if (first_desc) { + rx_desc_set_sof(s->rx_desc); + first_desc = false; + } + if (bytes_to_copy == 0) { + rx_desc_set_eof(s->rx_desc); + rx_desc_set_length(s->rx_desc, size); + } + rx_desc_set_ownership(s->rx_desc); + + switch (maf) { + case GEM_RX_PROMISCUOUS_ACCEPT: + break; + case GEM_RX_BROADCAST_ACCEPT: + rx_desc_set_broadcast(s->rx_desc); + break; + case GEM_RX_UNICAST_HASH_ACCEPT: + rx_desc_set_unicast_hash(s->rx_desc); + break; + case GEM_RX_MULTICAST_HASH_ACCEPT: + rx_desc_set_multicast_hash(s->rx_desc); + break; + case GEM_RX_REJECT: + abort(); + default: /* SAR */ + rx_desc_set_sar(s->rx_desc, maf); + } + + /* Descriptor write-back. */ + cpu_physical_memory_write(s->rx_desc_addr, + (uint8_t *)s->rx_desc, sizeof(s->rx_desc)); + + /* Next descriptor */ + if (rx_desc_get_wrap(s->rx_desc)) { + DB_PRINT("wrapping RX descriptor list\n"); + s->rx_desc_addr = s->regs[GEM_RXQBASE]; + } else { + DB_PRINT("incrementing RX descriptor list\n"); + s->rx_desc_addr += 8; + } + gem_get_rx_desc(s); + } + + /* Count it */ + gem_receive_updatestats(s, buf, size); + + s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_FRMRCVD; + s->regs[GEM_ISR] |= GEM_INT_RXCMPL & ~(s->regs[GEM_IMR]); + + /* Handle interrupt consequences */ + gem_update_int_status(s); + + return size; +} + +/* + * gem_transmit_updatestats: + * Increment transmit statistics. + */ +static void gem_transmit_updatestats(CadenceGEMState *s, const uint8_t *packet, + unsigned bytes) +{ + uint64_t octets; + + /* Total octets (bytes) transmitted */ + octets = ((uint64_t)(s->regs[GEM_OCTTXLO]) << 32) | + s->regs[GEM_OCTTXHI]; + octets += bytes; + s->regs[GEM_OCTTXLO] = octets >> 32; + s->regs[GEM_OCTTXHI] = octets; + + /* Error-free Frames transmitted */ + s->regs[GEM_TXCNT]++; + + /* Error-free Broadcast Frames counter */ + if (!memcmp(packet, broadcast_addr, 6)) { + s->regs[GEM_TXBCNT]++; + } + + /* Error-free Multicast Frames counter */ + if (packet[0] == 0x01) { + s->regs[GEM_TXMCNT]++; + } + + if (bytes <= 64) { + s->regs[GEM_TX64CNT]++; + } else if (bytes <= 127) { + s->regs[GEM_TX65CNT]++; + } else if (bytes <= 255) { + s->regs[GEM_TX128CNT]++; + } else if (bytes <= 511) { + s->regs[GEM_TX256CNT]++; + } else if (bytes <= 1023) { + s->regs[GEM_TX512CNT]++; + } else if (bytes <= 1518) { + s->regs[GEM_TX1024CNT]++; + } else { + s->regs[GEM_TX1519CNT]++; + } +} + +/* + * gem_transmit: + * Fish packets out of the descriptor ring and feed them to QEMU + */ +static void gem_transmit(CadenceGEMState *s) +{ + unsigned desc[2]; + hwaddr packet_desc_addr; + uint8_t tx_packet[2048]; + uint8_t *p; + unsigned total_bytes; + + /* Do nothing if transmit is not enabled. */ + if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_TXENA)) { + return; + } + + DB_PRINT("\n"); + + /* The packet we will hand off to QEMU. + * Packets scattered across multiple descriptors are gathered to this + * one contiguous buffer first. + */ + p = tx_packet; + total_bytes = 0; + + /* read current descriptor */ + packet_desc_addr = s->tx_desc_addr; + + DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr); + cpu_physical_memory_read(packet_desc_addr, + (uint8_t *)desc, sizeof(desc)); + /* Handle all descriptors owned by hardware */ + while (tx_desc_get_used(desc) == 0) { + + /* Do nothing if transmit is not enabled. */ + if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_TXENA)) { + return; + } + print_gem_tx_desc(desc); + + /* The real hardware would eat this (and possibly crash). + * For QEMU let's lend a helping hand. + */ + if ((tx_desc_get_buffer(desc) == 0) || + (tx_desc_get_length(desc) == 0)) { + DB_PRINT("Invalid TX descriptor @ 0x%x\n", + (unsigned)packet_desc_addr); + break; + } + + /* Gather this fragment of the packet from "dma memory" to our contig. + * buffer. + */ + cpu_physical_memory_read(tx_desc_get_buffer(desc), p, + tx_desc_get_length(desc)); + p += tx_desc_get_length(desc); + total_bytes += tx_desc_get_length(desc); + + /* Last descriptor for this packet; hand the whole thing off */ + if (tx_desc_get_last(desc)) { + unsigned desc_first[2]; + + /* Modify the 1st descriptor of this packet to be owned by + * the processor. + */ + cpu_physical_memory_read(s->tx_desc_addr, (uint8_t *)desc_first, + sizeof(desc_first)); + tx_desc_set_used(desc_first); + cpu_physical_memory_write(s->tx_desc_addr, (uint8_t *)desc_first, + sizeof(desc_first)); + /* Advance the hardware current descriptor past this packet */ + if (tx_desc_get_wrap(desc)) { + s->tx_desc_addr = s->regs[GEM_TXQBASE]; + } else { + s->tx_desc_addr = packet_desc_addr + 8; + } + DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr); + + s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_TXCMPL; + s->regs[GEM_ISR] |= GEM_INT_TXCMPL & ~(s->regs[GEM_IMR]); + + /* Handle interrupt consequences */ + gem_update_int_status(s); + + /* Is checksum offload enabled? */ + if (s->regs[GEM_DMACFG] & GEM_DMACFG_TXCSUM_OFFL) { + net_checksum_calculate(tx_packet, total_bytes); + } + + /* Update MAC statistics */ + gem_transmit_updatestats(s, tx_packet, total_bytes); + + /* Send the packet somewhere */ + if (s->phy_loop || (s->regs[GEM_NWCTRL] & GEM_NWCTRL_LOCALLOOP)) { + gem_receive(qemu_get_queue(s->nic), tx_packet, total_bytes); + } else { + qemu_send_packet(qemu_get_queue(s->nic), tx_packet, + total_bytes); + } + + /* Prepare for next packet */ + p = tx_packet; + total_bytes = 0; + } + + /* read next descriptor */ + if (tx_desc_get_wrap(desc)) { + packet_desc_addr = s->regs[GEM_TXQBASE]; + } else { + packet_desc_addr += 8; + } + DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr); + cpu_physical_memory_read(packet_desc_addr, + (uint8_t *)desc, sizeof(desc)); + } + + if (tx_desc_get_used(desc)) { + s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_USED; + s->regs[GEM_ISR] |= GEM_INT_TXUSED & ~(s->regs[GEM_IMR]); + gem_update_int_status(s); + } +} + +static void gem_phy_reset(CadenceGEMState *s) +{ + memset(&s->phy_regs[0], 0, sizeof(s->phy_regs)); + s->phy_regs[PHY_REG_CONTROL] = 0x1140; + s->phy_regs[PHY_REG_STATUS] = 0x7969; + s->phy_regs[PHY_REG_PHYID1] = 0x0141; + s->phy_regs[PHY_REG_PHYID2] = 0x0CC2; + s->phy_regs[PHY_REG_ANEGADV] = 0x01E1; + s->phy_regs[PHY_REG_LINKPABIL] = 0xCDE1; + s->phy_regs[PHY_REG_ANEGEXP] = 0x000F; + s->phy_regs[PHY_REG_NEXTP] = 0x2001; + s->phy_regs[PHY_REG_LINKPNEXTP] = 0x40E6; + s->phy_regs[PHY_REG_100BTCTRL] = 0x0300; + s->phy_regs[PHY_REG_1000BTSTAT] = 0x7C00; + s->phy_regs[PHY_REG_EXTSTAT] = 0x3000; + s->phy_regs[PHY_REG_PHYSPCFC_CTL] = 0x0078; + s->phy_regs[PHY_REG_PHYSPCFC_ST] = 0xBC00; + s->phy_regs[PHY_REG_EXT_PHYSPCFC_CTL] = 0x0C60; + s->phy_regs[PHY_REG_LED] = 0x4100; + s->phy_regs[PHY_REG_EXT_PHYSPCFC_CTL2] = 0x000A; + s->phy_regs[PHY_REG_EXT_PHYSPCFC_ST] = 0x848B; + + phy_update_link(s); +} + +static void gem_reset(DeviceState *d) +{ + int i; + CadenceGEMState *s = CADENCE_GEM(d); + + DB_PRINT("\n"); + + /* Set post reset register values */ + memset(&s->regs[0], 0, sizeof(s->regs)); + s->regs[GEM_NWCFG] = 0x00080000; + s->regs[GEM_NWSTATUS] = 0x00000006; + s->regs[GEM_DMACFG] = 0x00020784; + s->regs[GEM_IMR] = 0x07ffffff; + s->regs[GEM_TXPAUSE] = 0x0000ffff; + s->regs[GEM_TXPARTIALSF] = 0x000003ff; + s->regs[GEM_RXPARTIALSF] = 0x000003ff; + s->regs[GEM_MODID] = 0x00020118; + s->regs[GEM_DESCONF] = 0x02500111; + s->regs[GEM_DESCONF2] = 0x2ab13fff; + s->regs[GEM_DESCONF5] = 0x002f2145; + s->regs[GEM_DESCONF6] = 0x00000200; + + for (i = 0; i < 4; i++) { + s->sar_active[i] = false; + } + + gem_phy_reset(s); + + gem_update_int_status(s); +} + +static uint16_t gem_phy_read(CadenceGEMState *s, unsigned reg_num) +{ + DB_PRINT("reg: %d value: 0x%04x\n", reg_num, s->phy_regs[reg_num]); + return s->phy_regs[reg_num]; +} + +static void gem_phy_write(CadenceGEMState *s, unsigned reg_num, uint16_t val) +{ + DB_PRINT("reg: %d value: 0x%04x\n", reg_num, val); + + switch (reg_num) { + case PHY_REG_CONTROL: + if (val & PHY_REG_CONTROL_RST) { + /* Phy reset */ + gem_phy_reset(s); + val &= ~(PHY_REG_CONTROL_RST | PHY_REG_CONTROL_LOOP); + s->phy_loop = 0; + } + if (val & PHY_REG_CONTROL_ANEG) { + /* Complete autonegotiation immediately */ + val &= ~PHY_REG_CONTROL_ANEG; + s->phy_regs[PHY_REG_STATUS] |= PHY_REG_STATUS_ANEGCMPL; + } + if (val & PHY_REG_CONTROL_LOOP) { + DB_PRINT("PHY placed in loopback\n"); + s->phy_loop = 1; + } else { + s->phy_loop = 0; + } + break; + } + s->phy_regs[reg_num] = val; +} + +/* + * gem_read32: + * Read a GEM register. + */ +static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size) +{ + CadenceGEMState *s; + uint32_t retval; + + s = (CadenceGEMState *)opaque; + + offset >>= 2; + retval = s->regs[offset]; + + DB_PRINT("offset: 0x%04x read: 0x%08x\n", (unsigned)offset*4, retval); + + switch (offset) { + case GEM_ISR: + DB_PRINT("lowering irq on ISR read\n"); + qemu_set_irq(s->irq, 0); + break; + case GEM_PHYMNTNC: + if (retval & GEM_PHYMNTNC_OP_R) { + uint32_t phy_addr, reg_num; + + phy_addr = (retval & GEM_PHYMNTNC_ADDR) >> GEM_PHYMNTNC_ADDR_SHFT; + if (phy_addr == BOARD_PHY_ADDRESS || phy_addr == 0) { + reg_num = (retval & GEM_PHYMNTNC_REG) >> GEM_PHYMNTNC_REG_SHIFT; + retval &= 0xFFFF0000; + retval |= gem_phy_read(s, reg_num); + } else { + retval |= 0xFFFF; /* No device at this address */ + } + } + break; + } + + /* Squash read to clear bits */ + s->regs[offset] &= ~(s->regs_rtc[offset]); + + /* Do not provide write only bits */ + retval &= ~(s->regs_wo[offset]); + + DB_PRINT("0x%08x\n", retval); + return retval; +} + +/* + * gem_write32: + * Write a GEM register. + */ +static void gem_write(void *opaque, hwaddr offset, uint64_t val, + unsigned size) +{ + CadenceGEMState *s = (CadenceGEMState *)opaque; + uint32_t readonly; + + DB_PRINT("offset: 0x%04x write: 0x%08x ", (unsigned)offset, (unsigned)val); + offset >>= 2; + + /* Squash bits which are read only in write value */ + val &= ~(s->regs_ro[offset]); + /* Preserve (only) bits which are read only and wtc in register */ + readonly = s->regs[offset] & (s->regs_ro[offset] | s->regs_w1c[offset]); + + /* Copy register write to backing store */ + s->regs[offset] = (val & ~s->regs_w1c[offset]) | readonly; + + /* do w1c */ + s->regs[offset] &= ~(s->regs_w1c[offset] & val); + + /* Handle register write side effects */ + switch (offset) { + case GEM_NWCTRL: + if (val & GEM_NWCTRL_RXENA) { + gem_get_rx_desc(s); + } + if (val & GEM_NWCTRL_TXSTART) { + gem_transmit(s); + } + if (!(val & GEM_NWCTRL_TXENA)) { + /* Reset to start of Q when transmit disabled. */ + s->tx_desc_addr = s->regs[GEM_TXQBASE]; + } + if (gem_can_receive(qemu_get_queue(s->nic))) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + + case GEM_TXSTATUS: + gem_update_int_status(s); + break; + case GEM_RXQBASE: + s->rx_desc_addr = val; + break; + case GEM_TXQBASE: + s->tx_desc_addr = val; + break; + case GEM_RXSTATUS: + gem_update_int_status(s); + break; + case GEM_IER: + s->regs[GEM_IMR] &= ~val; + gem_update_int_status(s); + break; + case GEM_IDR: + s->regs[GEM_IMR] |= val; + gem_update_int_status(s); + break; + case GEM_SPADDR1LO: + case GEM_SPADDR2LO: + case GEM_SPADDR3LO: + case GEM_SPADDR4LO: + s->sar_active[(offset - GEM_SPADDR1LO) / 2] = false; + break; + case GEM_SPADDR1HI: + case GEM_SPADDR2HI: + case GEM_SPADDR3HI: + case GEM_SPADDR4HI: + s->sar_active[(offset - GEM_SPADDR1HI) / 2] = true; + break; + case GEM_PHYMNTNC: + if (val & GEM_PHYMNTNC_OP_W) { + uint32_t phy_addr, reg_num; + + phy_addr = (val & GEM_PHYMNTNC_ADDR) >> GEM_PHYMNTNC_ADDR_SHFT; + if (phy_addr == BOARD_PHY_ADDRESS || phy_addr == 0) { + reg_num = (val & GEM_PHYMNTNC_REG) >> GEM_PHYMNTNC_REG_SHIFT; + gem_phy_write(s, reg_num, val); + } + } + break; + } + + DB_PRINT("newval: 0x%08x\n", s->regs[offset]); +} + +static const MemoryRegionOps gem_ops = { + .read = gem_read, + .write = gem_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void gem_set_link(NetClientState *nc) +{ + DB_PRINT("\n"); + phy_update_link(qemu_get_nic_opaque(nc)); +} + +static NetClientInfo net_gem_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = gem_can_receive, + .receive = gem_receive, + .link_status_changed = gem_set_link, +}; + +static int gem_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + CadenceGEMState *s = CADENCE_GEM(dev); + + DB_PRINT("\n"); + + gem_init_register_masks(s); + memory_region_init_io(&s->iomem, OBJECT(s), &gem_ops, s, + "enet", sizeof(s->regs)); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->irq); + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + s->nic = qemu_new_nic(&net_gem_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + + return 0; +} + +static const VMStateDescription vmstate_cadence_gem = { + .name = "cadence_gem", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, CadenceGEMState, CADENCE_GEM_MAXREG), + VMSTATE_UINT16_ARRAY(phy_regs, CadenceGEMState, 32), + VMSTATE_UINT8(phy_loop, CadenceGEMState), + VMSTATE_UINT32(rx_desc_addr, CadenceGEMState), + VMSTATE_UINT32(tx_desc_addr, CadenceGEMState), + VMSTATE_BOOL_ARRAY(sar_active, CadenceGEMState, 4), + VMSTATE_END_OF_LIST(), + } +}; + +static Property gem_properties[] = { + DEFINE_NIC_PROPERTIES(CadenceGEMState, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void gem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass); + + sdc->init = gem_init; + dc->props = gem_properties; + dc->vmsd = &vmstate_cadence_gem; + dc->reset = gem_reset; +} + +static const TypeInfo gem_info = { + .name = TYPE_CADENCE_GEM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(CadenceGEMState), + .class_init = gem_class_init, +}; + +static void gem_register_types(void) +{ + type_register_static(&gem_info); +} + +type_init(gem_register_types) diff --git a/qemu/hw/net/dp8393x.c b/qemu/hw/net/dp8393x.c new file mode 100644 index 000000000..ab607e484 --- /dev/null +++ b/qemu/hw/net/dp8393x.c @@ -0,0 +1,910 @@ +/* + * QEMU NS SONIC DP8393x netcard + * + * Copyright (c) 2008-2009 Herve Poussineau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/sysbus.h" +#include "hw/devices.h" +#include "net/net.h" +#include "qemu/timer.h" +#include <zlib.h> + +//#define DEBUG_SONIC + +#define SONIC_PROM_SIZE 0x1000 + +#ifdef DEBUG_SONIC +#define DPRINTF(fmt, ...) \ +do { printf("sonic: " fmt , ## __VA_ARGS__); } while (0) +static const char* reg_names[] = { + "CR", "DCR", "RCR", "TCR", "IMR", "ISR", "UTDA", "CTDA", + "TPS", "TFC", "TSA0", "TSA1", "TFS", "URDA", "CRDA", "CRBA0", + "CRBA1", "RBWC0", "RBWC1", "EOBC", "URRA", "RSA", "REA", "RRP", + "RWP", "TRBA0", "TRBA1", "0x1b", "0x1c", "0x1d", "0x1e", "LLFA", + "TTDA", "CEP", "CAP2", "CAP1", "CAP0", "CE", "CDP", "CDC", + "SR", "WT0", "WT1", "RSC", "CRCT", "FAET", "MPT", "MDT", + "0x30", "0x31", "0x32", "0x33", "0x34", "0x35", "0x36", "0x37", + "0x38", "0x39", "0x3a", "0x3b", "0x3c", "0x3d", "0x3e", "DCR2" }; +#else +#define DPRINTF(fmt, ...) do {} while (0) +#endif + +#define SONIC_ERROR(fmt, ...) \ +do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0) + +#define SONIC_CR 0x00 +#define SONIC_DCR 0x01 +#define SONIC_RCR 0x02 +#define SONIC_TCR 0x03 +#define SONIC_IMR 0x04 +#define SONIC_ISR 0x05 +#define SONIC_UTDA 0x06 +#define SONIC_CTDA 0x07 +#define SONIC_TPS 0x08 +#define SONIC_TFC 0x09 +#define SONIC_TSA0 0x0a +#define SONIC_TSA1 0x0b +#define SONIC_TFS 0x0c +#define SONIC_URDA 0x0d +#define SONIC_CRDA 0x0e +#define SONIC_CRBA0 0x0f +#define SONIC_CRBA1 0x10 +#define SONIC_RBWC0 0x11 +#define SONIC_RBWC1 0x12 +#define SONIC_EOBC 0x13 +#define SONIC_URRA 0x14 +#define SONIC_RSA 0x15 +#define SONIC_REA 0x16 +#define SONIC_RRP 0x17 +#define SONIC_RWP 0x18 +#define SONIC_TRBA0 0x19 +#define SONIC_TRBA1 0x1a +#define SONIC_LLFA 0x1f +#define SONIC_TTDA 0x20 +#define SONIC_CEP 0x21 +#define SONIC_CAP2 0x22 +#define SONIC_CAP1 0x23 +#define SONIC_CAP0 0x24 +#define SONIC_CE 0x25 +#define SONIC_CDP 0x26 +#define SONIC_CDC 0x27 +#define SONIC_SR 0x28 +#define SONIC_WT0 0x29 +#define SONIC_WT1 0x2a +#define SONIC_RSC 0x2b +#define SONIC_CRCT 0x2c +#define SONIC_FAET 0x2d +#define SONIC_MPT 0x2e +#define SONIC_MDT 0x2f +#define SONIC_DCR2 0x3f + +#define SONIC_CR_HTX 0x0001 +#define SONIC_CR_TXP 0x0002 +#define SONIC_CR_RXDIS 0x0004 +#define SONIC_CR_RXEN 0x0008 +#define SONIC_CR_STP 0x0010 +#define SONIC_CR_ST 0x0020 +#define SONIC_CR_RST 0x0080 +#define SONIC_CR_RRRA 0x0100 +#define SONIC_CR_LCAM 0x0200 +#define SONIC_CR_MASK 0x03bf + +#define SONIC_DCR_DW 0x0020 +#define SONIC_DCR_LBR 0x2000 +#define SONIC_DCR_EXBUS 0x8000 + +#define SONIC_RCR_PRX 0x0001 +#define SONIC_RCR_LBK 0x0002 +#define SONIC_RCR_FAER 0x0004 +#define SONIC_RCR_CRCR 0x0008 +#define SONIC_RCR_CRS 0x0020 +#define SONIC_RCR_LPKT 0x0040 +#define SONIC_RCR_BC 0x0080 +#define SONIC_RCR_MC 0x0100 +#define SONIC_RCR_LB0 0x0200 +#define SONIC_RCR_LB1 0x0400 +#define SONIC_RCR_AMC 0x0800 +#define SONIC_RCR_PRO 0x1000 +#define SONIC_RCR_BRD 0x2000 +#define SONIC_RCR_RNT 0x4000 + +#define SONIC_TCR_PTX 0x0001 +#define SONIC_TCR_BCM 0x0002 +#define SONIC_TCR_FU 0x0004 +#define SONIC_TCR_EXC 0x0040 +#define SONIC_TCR_CRSL 0x0080 +#define SONIC_TCR_NCRS 0x0100 +#define SONIC_TCR_EXD 0x0400 +#define SONIC_TCR_CRCI 0x2000 +#define SONIC_TCR_PINT 0x8000 + +#define SONIC_ISR_RBE 0x0020 +#define SONIC_ISR_RDE 0x0040 +#define SONIC_ISR_TC 0x0080 +#define SONIC_ISR_TXDN 0x0200 +#define SONIC_ISR_PKTRX 0x0400 +#define SONIC_ISR_PINT 0x0800 +#define SONIC_ISR_LCD 0x1000 + +#define TYPE_DP8393X "dp8393x" +#define DP8393X(obj) OBJECT_CHECK(dp8393xState, (obj), TYPE_DP8393X) + +typedef struct dp8393xState { + SysBusDevice parent_obj; + + /* Hardware */ + uint8_t it_shift; + qemu_irq irq; +#ifdef DEBUG_SONIC + int irq_level; +#endif + QEMUTimer *watchdog; + int64_t wt_last_update; + NICConf conf; + NICState *nic; + MemoryRegion mmio; + MemoryRegion prom; + + /* Registers */ + uint8_t cam[16][6]; + uint16_t regs[0x40]; + + /* Temporaries */ + uint8_t tx_buffer[0x10000]; + int loopback_packet; + + /* Memory access */ + void *dma_mr; + AddressSpace as; +} dp8393xState; + +static void dp8393x_update_irq(dp8393xState *s) +{ + int level = (s->regs[SONIC_IMR] & s->regs[SONIC_ISR]) ? 1 : 0; + +#ifdef DEBUG_SONIC + if (level != s->irq_level) { + s->irq_level = level; + if (level) { + DPRINTF("raise irq, isr is 0x%04x\n", s->regs[SONIC_ISR]); + } else { + DPRINTF("lower irq\n"); + } + } +#endif + + qemu_set_irq(s->irq, level); +} + +static void dp8393x_do_load_cam(dp8393xState *s) +{ + uint16_t data[8]; + int width, size; + uint16_t index = 0; + + width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; + size = sizeof(uint16_t) * 4 * width; + + while (s->regs[SONIC_CDC] & 0x1f) { + /* Fill current entry */ + address_space_rw(&s->as, + (s->regs[SONIC_URRA] << 16) | s->regs[SONIC_CDP], + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + s->cam[index][0] = data[1 * width] & 0xff; + s->cam[index][1] = data[1 * width] >> 8; + s->cam[index][2] = data[2 * width] & 0xff; + s->cam[index][3] = data[2 * width] >> 8; + s->cam[index][4] = data[3 * width] & 0xff; + s->cam[index][5] = data[3 * width] >> 8; + DPRINTF("load cam[%d] with %02x%02x%02x%02x%02x%02x\n", index, + s->cam[index][0], s->cam[index][1], s->cam[index][2], + s->cam[index][3], s->cam[index][4], s->cam[index][5]); + /* Move to next entry */ + s->regs[SONIC_CDC]--; + s->regs[SONIC_CDP] += size; + index++; + } + + /* Read CAM enable */ + address_space_rw(&s->as, + (s->regs[SONIC_URRA] << 16) | s->regs[SONIC_CDP], + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + s->regs[SONIC_CE] = data[0 * width]; + DPRINTF("load cam done. cam enable mask 0x%04x\n", s->regs[SONIC_CE]); + + /* Done */ + s->regs[SONIC_CR] &= ~SONIC_CR_LCAM; + s->regs[SONIC_ISR] |= SONIC_ISR_LCD; + dp8393x_update_irq(s); +} + +static void dp8393x_do_read_rra(dp8393xState *s) +{ + uint16_t data[8]; + int width, size; + + /* Read memory */ + width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; + size = sizeof(uint16_t) * 4 * width; + address_space_rw(&s->as, + (s->regs[SONIC_URRA] << 16) | s->regs[SONIC_RRP], + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + + /* Update SONIC registers */ + s->regs[SONIC_CRBA0] = data[0 * width]; + s->regs[SONIC_CRBA1] = data[1 * width]; + s->regs[SONIC_RBWC0] = data[2 * width]; + s->regs[SONIC_RBWC1] = data[3 * width]; + DPRINTF("CRBA0/1: 0x%04x/0x%04x, RBWC0/1: 0x%04x/0x%04x\n", + s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1], + s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]); + + /* Go to next entry */ + s->regs[SONIC_RRP] += size; + + /* Handle wrap */ + if (s->regs[SONIC_RRP] == s->regs[SONIC_REA]) { + s->regs[SONIC_RRP] = s->regs[SONIC_RSA]; + } + + /* Check resource exhaustion */ + if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) + { + s->regs[SONIC_ISR] |= SONIC_ISR_RBE; + dp8393x_update_irq(s); + } + + /* Done */ + s->regs[SONIC_CR] &= ~SONIC_CR_RRRA; +} + +static void dp8393x_do_software_reset(dp8393xState *s) +{ + timer_del(s->watchdog); + + s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP | SONIC_CR_HTX); + s->regs[SONIC_CR] |= SONIC_CR_RST | SONIC_CR_RXDIS; +} + +static void dp8393x_set_next_tick(dp8393xState *s) +{ + uint32_t ticks; + int64_t delay; + + if (s->regs[SONIC_CR] & SONIC_CR_STP) { + timer_del(s->watchdog); + return; + } + + ticks = s->regs[SONIC_WT1] << 16 | s->regs[SONIC_WT0]; + s->wt_last_update = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + delay = get_ticks_per_sec() * ticks / 5000000; + timer_mod(s->watchdog, s->wt_last_update + delay); +} + +static void dp8393x_update_wt_regs(dp8393xState *s) +{ + int64_t elapsed; + uint32_t val; + + if (s->regs[SONIC_CR] & SONIC_CR_STP) { + timer_del(s->watchdog); + return; + } + + elapsed = s->wt_last_update - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + val = s->regs[SONIC_WT1] << 16 | s->regs[SONIC_WT0]; + val -= elapsed / 5000000; + s->regs[SONIC_WT1] = (val >> 16) & 0xffff; + s->regs[SONIC_WT0] = (val >> 0) & 0xffff; + dp8393x_set_next_tick(s); + +} + +static void dp8393x_do_start_timer(dp8393xState *s) +{ + s->regs[SONIC_CR] &= ~SONIC_CR_STP; + dp8393x_set_next_tick(s); +} + +static void dp8393x_do_stop_timer(dp8393xState *s) +{ + s->regs[SONIC_CR] &= ~SONIC_CR_ST; + dp8393x_update_wt_regs(s); +} + +static int dp8393x_can_receive(NetClientState *nc); + +static void dp8393x_do_receiver_enable(dp8393xState *s) +{ + s->regs[SONIC_CR] &= ~SONIC_CR_RXDIS; + if (dp8393x_can_receive(s->nic->ncs)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } +} + +static void dp8393x_do_receiver_disable(dp8393xState *s) +{ + s->regs[SONIC_CR] &= ~SONIC_CR_RXEN; +} + +static void dp8393x_do_transmit_packets(dp8393xState *s) +{ + NetClientState *nc = qemu_get_queue(s->nic); + uint16_t data[12]; + int width, size; + int tx_len, len; + uint16_t i; + + width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; + + while (1) { + /* Read memory */ + DPRINTF("Transmit packet at %08x\n", + (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_CTDA]); + size = sizeof(uint16_t) * 6 * width; + s->regs[SONIC_TTDA] = s->regs[SONIC_CTDA]; + address_space_rw(&s->as, + ((s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA]) + sizeof(uint16_t) * width, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + tx_len = 0; + + /* Update registers */ + s->regs[SONIC_TCR] = data[0 * width] & 0xf000; + s->regs[SONIC_TPS] = data[1 * width]; + s->regs[SONIC_TFC] = data[2 * width]; + s->regs[SONIC_TSA0] = data[3 * width]; + s->regs[SONIC_TSA1] = data[4 * width]; + s->regs[SONIC_TFS] = data[5 * width]; + + /* Handle programmable interrupt */ + if (s->regs[SONIC_TCR] & SONIC_TCR_PINT) { + s->regs[SONIC_ISR] |= SONIC_ISR_PINT; + } else { + s->regs[SONIC_ISR] &= ~SONIC_ISR_PINT; + } + + for (i = 0; i < s->regs[SONIC_TFC]; ) { + /* Append fragment */ + len = s->regs[SONIC_TFS]; + if (tx_len + len > sizeof(s->tx_buffer)) { + len = sizeof(s->tx_buffer) - tx_len; + } + address_space_rw(&s->as, + (s->regs[SONIC_TSA1] << 16) | s->regs[SONIC_TSA0], + MEMTXATTRS_UNSPECIFIED, &s->tx_buffer[tx_len], len, 0); + tx_len += len; + + i++; + if (i != s->regs[SONIC_TFC]) { + /* Read next fragment details */ + size = sizeof(uint16_t) * 3 * width; + address_space_rw(&s->as, + ((s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA]) + sizeof(uint16_t) * (4 + 3 * i) * width, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + s->regs[SONIC_TSA0] = data[0 * width]; + s->regs[SONIC_TSA1] = data[1 * width]; + s->regs[SONIC_TFS] = data[2 * width]; + } + } + + /* Handle Ethernet checksum */ + if (!(s->regs[SONIC_TCR] & SONIC_TCR_CRCI)) { + /* Don't append FCS there, to look like slirp packets + * which don't have one */ + } else { + /* Remove existing FCS */ + tx_len -= 4; + } + + if (s->regs[SONIC_RCR] & (SONIC_RCR_LB1 | SONIC_RCR_LB0)) { + /* Loopback */ + s->regs[SONIC_TCR] |= SONIC_TCR_CRSL; + if (nc->info->can_receive(nc)) { + s->loopback_packet = 1; + nc->info->receive(nc, s->tx_buffer, tx_len); + } + } else { + /* Transmit packet */ + qemu_send_packet(nc, s->tx_buffer, tx_len); + } + s->regs[SONIC_TCR] |= SONIC_TCR_PTX; + + /* Write status */ + data[0 * width] = s->regs[SONIC_TCR] & 0x0fff; /* status */ + size = sizeof(uint16_t) * width; + address_space_rw(&s->as, + (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA], + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 1); + + if (!(s->regs[SONIC_CR] & SONIC_CR_HTX)) { + /* Read footer of packet */ + size = sizeof(uint16_t) * width; + address_space_rw(&s->as, + ((s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA]) + sizeof(uint16_t) * (4 + 3 * s->regs[SONIC_TFC]) * width, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + s->regs[SONIC_CTDA] = data[0 * width] & ~0x1; + if (data[0 * width] & 0x1) { + /* EOL detected */ + break; + } + } + } + + /* Done */ + s->regs[SONIC_CR] &= ~SONIC_CR_TXP; + s->regs[SONIC_ISR] |= SONIC_ISR_TXDN; + dp8393x_update_irq(s); +} + +static void dp8393x_do_halt_transmission(dp8393xState *s) +{ + /* Nothing to do */ +} + +static void dp8393x_do_command(dp8393xState *s, uint16_t command) +{ + if ((s->regs[SONIC_CR] & SONIC_CR_RST) && !(command & SONIC_CR_RST)) { + s->regs[SONIC_CR] &= ~SONIC_CR_RST; + return; + } + + s->regs[SONIC_CR] |= (command & SONIC_CR_MASK); + + if (command & SONIC_CR_HTX) + dp8393x_do_halt_transmission(s); + if (command & SONIC_CR_TXP) + dp8393x_do_transmit_packets(s); + if (command & SONIC_CR_RXDIS) + dp8393x_do_receiver_disable(s); + if (command & SONIC_CR_RXEN) + dp8393x_do_receiver_enable(s); + if (command & SONIC_CR_STP) + dp8393x_do_stop_timer(s); + if (command & SONIC_CR_ST) + dp8393x_do_start_timer(s); + if (command & SONIC_CR_RST) + dp8393x_do_software_reset(s); + if (command & SONIC_CR_RRRA) + dp8393x_do_read_rra(s); + if (command & SONIC_CR_LCAM) + dp8393x_do_load_cam(s); +} + +static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size) +{ + dp8393xState *s = opaque; + int reg = addr >> s->it_shift; + uint16_t val = 0; + + switch (reg) { + /* Update data before reading it */ + case SONIC_WT0: + case SONIC_WT1: + dp8393x_update_wt_regs(s); + val = s->regs[reg]; + break; + /* Accept read to some registers only when in reset mode */ + case SONIC_CAP2: + case SONIC_CAP1: + case SONIC_CAP0: + if (s->regs[SONIC_CR] & SONIC_CR_RST) { + val = s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg) + 1] << 8; + val |= s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg)]; + } + break; + /* All other registers have no special contrainst */ + default: + val = s->regs[reg]; + } + + DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]); + + return val; +} + +static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + dp8393xState *s = opaque; + int reg = addr >> s->it_shift; + + DPRINTF("write 0x%04x to reg %s\n", (uint16_t)data, reg_names[reg]); + + switch (reg) { + /* Command register */ + case SONIC_CR: + dp8393x_do_command(s, data); + break; + /* Prevent write to read-only registers */ + case SONIC_CAP2: + case SONIC_CAP1: + case SONIC_CAP0: + case SONIC_SR: + case SONIC_MDT: + DPRINTF("writing to reg %d invalid\n", reg); + break; + /* Accept write to some registers only when in reset mode */ + case SONIC_DCR: + if (s->regs[SONIC_CR] & SONIC_CR_RST) { + s->regs[reg] = data & 0xbfff; + } else { + DPRINTF("writing to DCR invalid\n"); + } + break; + case SONIC_DCR2: + if (s->regs[SONIC_CR] & SONIC_CR_RST) { + s->regs[reg] = data & 0xf017; + } else { + DPRINTF("writing to DCR2 invalid\n"); + } + break; + /* 12 lower bytes are Read Only */ + case SONIC_TCR: + s->regs[reg] = data & 0xf000; + break; + /* 9 lower bytes are Read Only */ + case SONIC_RCR: + s->regs[reg] = data & 0xffe0; + break; + /* Ignore most significant bit */ + case SONIC_IMR: + s->regs[reg] = data & 0x7fff; + dp8393x_update_irq(s); + break; + /* Clear bits by writing 1 to them */ + case SONIC_ISR: + data &= s->regs[reg]; + s->regs[reg] &= ~data; + if (data & SONIC_ISR_RBE) { + dp8393x_do_read_rra(s); + } + dp8393x_update_irq(s); + if (dp8393x_can_receive(s->nic->ncs)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + /* Ignore least significant bit */ + case SONIC_RSA: + case SONIC_REA: + case SONIC_RRP: + case SONIC_RWP: + s->regs[reg] = data & 0xfffe; + break; + /* Invert written value for some registers */ + case SONIC_CRCT: + case SONIC_FAET: + case SONIC_MPT: + s->regs[reg] = data ^ 0xffff; + break; + /* All other registers have no special contrainst */ + default: + s->regs[reg] = data; + } + + if (reg == SONIC_WT0 || reg == SONIC_WT1) { + dp8393x_set_next_tick(s); + } +} + +static const MemoryRegionOps dp8393x_ops = { + .read = dp8393x_read, + .write = dp8393x_write, + .impl.min_access_size = 2, + .impl.max_access_size = 2, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void dp8393x_watchdog(void *opaque) +{ + dp8393xState *s = opaque; + + if (s->regs[SONIC_CR] & SONIC_CR_STP) { + return; + } + + s->regs[SONIC_WT1] = 0xffff; + s->regs[SONIC_WT0] = 0xffff; + dp8393x_set_next_tick(s); + + /* Signal underflow */ + s->regs[SONIC_ISR] |= SONIC_ISR_TC; + dp8393x_update_irq(s); +} + +static int dp8393x_can_receive(NetClientState *nc) +{ + dp8393xState *s = qemu_get_nic_opaque(nc); + + if (!(s->regs[SONIC_CR] & SONIC_CR_RXEN)) + return 0; + if (s->regs[SONIC_ISR] & SONIC_ISR_RBE) + return 0; + return 1; +} + +static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf, + int size) +{ + static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + int i; + + /* Check promiscuous mode */ + if ((s->regs[SONIC_RCR] & SONIC_RCR_PRO) && (buf[0] & 1) == 0) { + return 0; + } + + /* Check multicast packets */ + if ((s->regs[SONIC_RCR] & SONIC_RCR_AMC) && (buf[0] & 1) == 1) { + return SONIC_RCR_MC; + } + + /* Check broadcast */ + if ((s->regs[SONIC_RCR] & SONIC_RCR_BRD) && !memcmp(buf, bcast, sizeof(bcast))) { + return SONIC_RCR_BC; + } + + /* Check CAM */ + for (i = 0; i < 16; i++) { + if (s->regs[SONIC_CE] & (1 << i)) { + /* Entry enabled */ + if (!memcmp(buf, s->cam[i], sizeof(s->cam[i]))) { + return 0; + } + } + } + + return -1; +} + +static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, + size_t size) +{ + dp8393xState *s = qemu_get_nic_opaque(nc); + uint16_t data[10]; + int packet_type; + uint32_t available, address; + int width, rx_len = size; + uint32_t checksum; + + width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; + + s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER | + SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC); + + packet_type = dp8393x_receive_filter(s, buf, size); + if (packet_type < 0) { + DPRINTF("packet not for netcard\n"); + return -1; + } + + /* XXX: Check byte ordering */ + + /* Check for EOL */ + if (s->regs[SONIC_LLFA] & 0x1) { + /* Are we still in resource exhaustion? */ + size = sizeof(uint16_t) * 1 * width; + address = ((s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]) + sizeof(uint16_t) * 5 * width; + address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED, + (uint8_t *)data, size, 0); + if (data[0 * width] & 0x1) { + /* Still EOL ; stop reception */ + return -1; + } else { + s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA]; + } + } + + /* Save current position */ + s->regs[SONIC_TRBA1] = s->regs[SONIC_CRBA1]; + s->regs[SONIC_TRBA0] = s->regs[SONIC_CRBA0]; + + /* Calculate the ethernet checksum */ + checksum = cpu_to_le32(crc32(0, buf, rx_len)); + + /* Put packet into RBA */ + DPRINTF("Receive packet at %08x\n", (s->regs[SONIC_CRBA1] << 16) | s->regs[SONIC_CRBA0]); + address = (s->regs[SONIC_CRBA1] << 16) | s->regs[SONIC_CRBA0]; + address_space_rw(&s->as, address, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)buf, rx_len, 1); + address += rx_len; + address_space_rw(&s->as, address, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)&checksum, 4, 1); + rx_len += 4; + s->regs[SONIC_CRBA1] = address >> 16; + s->regs[SONIC_CRBA0] = address & 0xffff; + available = (s->regs[SONIC_RBWC1] << 16) | s->regs[SONIC_RBWC0]; + available -= rx_len / 2; + s->regs[SONIC_RBWC1] = available >> 16; + s->regs[SONIC_RBWC0] = available & 0xffff; + + /* Update status */ + if (((s->regs[SONIC_RBWC1] << 16) | s->regs[SONIC_RBWC0]) < s->regs[SONIC_EOBC]) { + s->regs[SONIC_RCR] |= SONIC_RCR_LPKT; + } + s->regs[SONIC_RCR] |= packet_type; + s->regs[SONIC_RCR] |= SONIC_RCR_PRX; + if (s->loopback_packet) { + s->regs[SONIC_RCR] |= SONIC_RCR_LBK; + s->loopback_packet = 0; + } + + /* Write status to memory */ + DPRINTF("Write status at %08x\n", (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]); + data[0 * width] = s->regs[SONIC_RCR]; /* status */ + data[1 * width] = rx_len; /* byte count */ + data[2 * width] = s->regs[SONIC_TRBA0]; /* pkt_ptr0 */ + data[3 * width] = s->regs[SONIC_TRBA1]; /* pkt_ptr1 */ + data[4 * width] = s->regs[SONIC_RSC]; /* seq_no */ + size = sizeof(uint16_t) * 5 * width; + address_space_rw(&s->as, (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA], + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 1); + + /* Move to next descriptor */ + size = sizeof(uint16_t) * width; + address_space_rw(&s->as, + ((s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]) + sizeof(uint16_t) * 5 * width, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, size, 0); + s->regs[SONIC_LLFA] = data[0 * width]; + if (s->regs[SONIC_LLFA] & 0x1) { + /* EOL detected */ + s->regs[SONIC_ISR] |= SONIC_ISR_RDE; + } else { + data[0 * width] = 0; /* in_use */ + address_space_rw(&s->as, + ((s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA]) + sizeof(uint16_t) * 6 * width, + MEMTXATTRS_UNSPECIFIED, (uint8_t *)data, sizeof(uint16_t), 1); + s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA]; + s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX; + s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff); + + if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) { + /* Read next RRA */ + dp8393x_do_read_rra(s); + } + } + + /* Done */ + dp8393x_update_irq(s); + + return size; +} + +static void dp8393x_reset(DeviceState *dev) +{ + dp8393xState *s = DP8393X(dev); + timer_del(s->watchdog); + + memset(s->regs, 0, sizeof(s->regs)); + s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS; + s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR); + s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT); + s->regs[SONIC_TCR] |= SONIC_TCR_NCRS | SONIC_TCR_PTX; + s->regs[SONIC_TCR] &= ~SONIC_TCR_BCM; + s->regs[SONIC_IMR] = 0; + s->regs[SONIC_ISR] = 0; + s->regs[SONIC_DCR2] = 0; + s->regs[SONIC_EOBC] = 0x02F8; + s->regs[SONIC_RSC] = 0; + s->regs[SONIC_CE] = 0; + s->regs[SONIC_RSC] = 0; + + /* Network cable is connected */ + s->regs[SONIC_RCR] |= SONIC_RCR_CRS; + + dp8393x_update_irq(s); +} + +static NetClientInfo net_dp83932_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = dp8393x_can_receive, + .receive = dp8393x_receive, +}; + +static void dp8393x_instance_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + dp8393xState *s = DP8393X(obj); + + sysbus_init_mmio(sbd, &s->mmio); + sysbus_init_mmio(sbd, &s->prom); + sysbus_init_irq(sbd, &s->irq); +} + +static void dp8393x_realize(DeviceState *dev, Error **errp) +{ + dp8393xState *s = DP8393X(dev); + int i, checksum; + uint8_t *prom; + Error *local_err = NULL; + + address_space_init(&s->as, s->dma_mr, "dp8393x"); + memory_region_init_io(&s->mmio, OBJECT(dev), &dp8393x_ops, s, + "dp8393x-regs", 0x40 << s->it_shift); + + s->nic = qemu_new_nic(&net_dp83932_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s); + s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */ + + memory_region_init_ram(&s->prom, OBJECT(dev), + "dp8393x-prom", SONIC_PROM_SIZE, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + memory_region_set_readonly(&s->prom, true); + prom = memory_region_get_ram_ptr(&s->prom); + checksum = 0; + for (i = 0; i < 6; i++) { + prom[i] = s->conf.macaddr.a[i]; + checksum += prom[i]; + if (checksum > 0xff) { + checksum = (checksum + 1) & 0xff; + } + } + prom[7] = 0xff - checksum; +} + +static const VMStateDescription vmstate_dp8393x = { + .name = "dp8393x", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField []) { + VMSTATE_BUFFER_UNSAFE(cam, dp8393xState, 0, 16 * 6), + VMSTATE_UINT16_ARRAY(regs, dp8393xState, 0x40), + VMSTATE_END_OF_LIST() + } +}; + +static Property dp8393x_properties[] = { + DEFINE_NIC_PROPERTIES(dp8393xState, conf), + DEFINE_PROP_PTR("dma_mr", dp8393xState, dma_mr), + DEFINE_PROP_UINT8("it_shift", dp8393xState, it_shift, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void dp8393x_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->realize = dp8393x_realize; + dc->reset = dp8393x_reset; + dc->vmsd = &vmstate_dp8393x; + dc->props = dp8393x_properties; + /* Reason: dma_mr property can't be set */ + dc->cannot_instantiate_with_device_add_yet = true; +} + +static const TypeInfo dp8393x_info = { + .name = TYPE_DP8393X, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(dp8393xState), + .instance_init = dp8393x_instance_init, + .class_init = dp8393x_class_init, +}; + +static void dp8393x_register_types(void) +{ + type_register_static(&dp8393x_info); +} + +type_init(dp8393x_register_types) diff --git a/qemu/hw/net/e1000.c b/qemu/hw/net/e1000.c new file mode 100644 index 000000000..5c6bcd001 --- /dev/null +++ b/qemu/hw/net/e1000.c @@ -0,0 +1,1693 @@ +/* + * QEMU e1000 emulation + * + * Software developer's manual: + * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf + * + * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. + * Copyright (c) 2008 Qumranet + * Based on work done by: + * Copyright (c) 2007 Dan Aloni + * Copyright (c) 2004 Antony T Curtis + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "net/net.h" +#include "net/checksum.h" +#include "hw/loader.h" +#include "sysemu/sysemu.h" +#include "sysemu/dma.h" +#include "qemu/iov.h" +#include "qemu/range.h" + +#include "e1000_regs.h" + +#define E1000_DEBUG + +#ifdef E1000_DEBUG +enum { + DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT, + DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM, + DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR, + DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET, +}; +#define DBGBIT(x) (1<<DEBUG_##x) +static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); + +#define DBGOUT(what, fmt, ...) do { \ + if (debugflags & DBGBIT(what)) \ + fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \ + } while (0) +#else +#define DBGOUT(what, fmt, ...) do {} while (0) +#endif + +#define IOPORT_SIZE 0x40 +#define PNPMMIO_SIZE 0x20000 +#define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */ + +/* this is the size past which hardware will drop packets when setting LPE=0 */ +#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 +/* this is the size past which hardware will drop packets when setting LPE=1 */ +#define MAXIMUM_ETHERNET_LPE_SIZE 16384 + +#define MAXIMUM_ETHERNET_HDR_LEN (14+4) + +/* + * HW models: + * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8 + * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested + * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6 + * Others never tested + */ + +typedef struct E1000State_st { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + NICState *nic; + NICConf conf; + MemoryRegion mmio; + MemoryRegion io; + + uint32_t mac_reg[0x8000]; + uint16_t phy_reg[0x20]; + uint16_t eeprom_data[64]; + + uint32_t rxbuf_size; + uint32_t rxbuf_min_shift; + struct e1000_tx { + unsigned char header[256]; + unsigned char vlan_header[4]; + /* Fields vlan and data must not be reordered or separated. */ + unsigned char vlan[4]; + unsigned char data[0x10000]; + uint16_t size; + unsigned char sum_needed; + unsigned char vlan_needed; + uint8_t ipcss; + uint8_t ipcso; + uint16_t ipcse; + uint8_t tucss; + uint8_t tucso; + uint16_t tucse; + uint8_t hdr_len; + uint16_t mss; + uint32_t paylen; + uint16_t tso_frames; + char tse; + int8_t ip; + int8_t tcp; + char cptse; // current packet tse bit + } tx; + + struct { + uint32_t val_in; // shifted in from guest driver + uint16_t bitnum_in; + uint16_t bitnum_out; + uint16_t reading; + uint32_t old_eecd; + } eecd_state; + + QEMUTimer *autoneg_timer; + + QEMUTimer *mit_timer; /* Mitigation timer. */ + bool mit_timer_on; /* Mitigation timer is running. */ + bool mit_irq_level; /* Tracks interrupt pin level. */ + uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ + +/* Compatibility flags for migration to/from qemu 1.3.0 and older */ +#define E1000_FLAG_AUTONEG_BIT 0 +#define E1000_FLAG_MIT_BIT 1 +#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT) +#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT) + uint32_t compat_flags; +} E1000State; + +typedef struct E1000BaseClass { + PCIDeviceClass parent_class; + uint16_t phy_id2; +} E1000BaseClass; + +#define TYPE_E1000_BASE "e1000-base" + +#define E1000(obj) \ + OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE) + +#define E1000_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE) +#define E1000_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE) + +#define defreg(x) x = (E1000_##x>>2) +enum { + defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), + defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), + defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), + defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), + defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), + defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), + defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), + defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), + defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), + defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), + defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), + defreg(ITR), +}; + +static void +e1000_link_down(E1000State *s) +{ + s->mac_reg[STATUS] &= ~E1000_STATUS_LU; + s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; + s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; + s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK; +} + +static void +e1000_link_up(E1000State *s) +{ + s->mac_reg[STATUS] |= E1000_STATUS_LU; + s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); +} + +static bool +have_autoneg(E1000State *s) +{ + return (s->compat_flags & E1000_FLAG_AUTONEG) && + (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN); +} + +static void +set_phy_ctrl(E1000State *s, int index, uint16_t val) +{ + /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */ + s->phy_reg[PHY_CTRL] = val & ~(0x3f | + MII_CR_RESET | + MII_CR_RESTART_AUTO_NEG); + + /* + * QEMU 1.3 does not support link auto-negotiation emulation, so if we + * migrate during auto negotiation, after migration the link will be + * down. + */ + if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) { + e1000_link_down(s); + DBGOUT(PHY, "Start link auto negotiation\n"); + timer_mod(s->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = { + [PHY_CTRL] = set_phy_ctrl, +}; + +enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) }; + +enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W }; +static const char phy_regcap[0x20] = { + [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW, + [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW, + [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW, + [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R, + [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R, + [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R, + [PHY_AUTONEG_EXP] = PHY_R, +}; + +/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */ +static const uint16_t phy_reg_init[] = { + [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB | + MII_CR_FULL_DUPLEX | + MII_CR_AUTO_NEG_EN, + + [PHY_STATUS] = MII_SR_EXTENDED_CAPS | + MII_SR_LINK_STATUS | /* link initially up */ + MII_SR_AUTONEG_CAPS | + /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */ + MII_SR_PREAMBLE_SUPPRESS | + MII_SR_EXTENDED_STATUS | + MII_SR_10T_HD_CAPS | + MII_SR_10T_FD_CAPS | + MII_SR_100X_HD_CAPS | + MII_SR_100X_FD_CAPS, + + [PHY_ID1] = 0x141, + /* [PHY_ID2] configured per DevId, from e1000_reset() */ + [PHY_AUTONEG_ADV] = 0xde1, + [PHY_LP_ABILITY] = 0x1e0, + [PHY_1000T_CTRL] = 0x0e00, + [PHY_1000T_STATUS] = 0x3c00, + [M88E1000_PHY_SPEC_CTRL] = 0x360, + [M88E1000_PHY_SPEC_STATUS] = 0xac00, + [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, +}; + +static const uint32_t mac_reg_init[] = { + [PBA] = 0x00100030, + [LEDCTL] = 0x602, + [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | + E1000_CTRL_SPD_1000 | E1000_CTRL_SLU, + [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE | + E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK | + E1000_STATUS_SPEED_1000 | E1000_STATUS_FD | + E1000_STATUS_LU, + [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN | + E1000_MANC_ARP_EN | E1000_MANC_0298_EN | + E1000_MANC_RMCP_EN, +}; + +/* Helper function, *curr == 0 means the value is not set */ +static inline void +mit_update_delay(uint32_t *curr, uint32_t value) +{ + if (value && (*curr == 0 || value < *curr)) { + *curr = value; + } +} + +static void +set_interrupt_cause(E1000State *s, int index, uint32_t val) +{ + PCIDevice *d = PCI_DEVICE(s); + uint32_t pending_ints; + uint32_t mit_delay; + + s->mac_reg[ICR] = val; + + /* + * Make sure ICR and ICS registers have the same value. + * The spec says that the ICS register is write-only. However in practice, + * on real hardware ICS is readable, and for reads it has the same value as + * ICR (except that ICS does not have the clear on read behaviour of ICR). + * + * The VxWorks PRO/1000 driver uses this behaviour. + */ + s->mac_reg[ICS] = val; + + pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]); + if (!s->mit_irq_level && pending_ints) { + /* + * Here we detect a potential raising edge. We postpone raising the + * interrupt line if we are inside the mitigation delay window + * (s->mit_timer_on == 1). + * We provide a partial implementation of interrupt mitigation, + * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for + * RADV and TADV, 256ns units for ITR). RDTR is only used to enable + * RADV; relative timers based on TIDV and RDTR are not implemented. + */ + if (s->mit_timer_on) { + return; + } + if (s->compat_flags & E1000_FLAG_MIT) { + /* Compute the next mitigation delay according to pending + * interrupts and the current values of RADV (provided + * RDTR!=0), TADV and ITR. + * Then rearm the timer. + */ + mit_delay = 0; + if (s->mit_ide && + (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) { + mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); + } + if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) { + mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); + } + mit_update_delay(&mit_delay, s->mac_reg[ITR]); + + if (mit_delay) { + s->mit_timer_on = 1; + timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + mit_delay * 256); + } + s->mit_ide = 0; + } + } + + s->mit_irq_level = (pending_ints != 0); + pci_set_irq(d, s->mit_irq_level); +} + +static void +e1000_mit_timer(void *opaque) +{ + E1000State *s = opaque; + + s->mit_timer_on = 0; + /* Call set_interrupt_cause to update the irq level (if necessary). */ + set_interrupt_cause(s, 0, s->mac_reg[ICR]); +} + +static void +set_ics(E1000State *s, int index, uint32_t val) +{ + DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR], + s->mac_reg[IMS]); + set_interrupt_cause(s, 0, val | s->mac_reg[ICR]); +} + +static void +e1000_autoneg_timer(void *opaque) +{ + E1000State *s = opaque; + if (!qemu_get_queue(s->nic)->link_down) { + e1000_link_up(s); + s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; + s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + DBGOUT(PHY, "Auto negotiation is completed\n"); + set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */ + } +} + +static int +rxbufsize(uint32_t v) +{ + v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | + E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | + E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; + switch (v) { + case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: + return 16384; + case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: + return 8192; + case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: + return 4096; + case E1000_RCTL_SZ_1024: + return 1024; + case E1000_RCTL_SZ_512: + return 512; + case E1000_RCTL_SZ_256: + return 256; + } + return 2048; +} + +static void e1000_reset(void *opaque) +{ + E1000State *d = opaque; + E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d); + uint8_t *macaddr = d->conf.macaddr.a; + int i; + + timer_del(d->autoneg_timer); + timer_del(d->mit_timer); + d->mit_timer_on = 0; + d->mit_irq_level = 0; + d->mit_ide = 0; + memset(d->phy_reg, 0, sizeof d->phy_reg); + memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init); + d->phy_reg[PHY_ID2] = edc->phy_id2; + memset(d->mac_reg, 0, sizeof d->mac_reg); + memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init); + d->rxbuf_min_shift = 1; + memset(&d->tx, 0, sizeof d->tx); + + if (qemu_get_queue(d->nic)->link_down) { + e1000_link_down(d); + } + + /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */ + d->mac_reg[RA] = 0; + d->mac_reg[RA + 1] = E1000_RAH_AV; + for (i = 0; i < 4; i++) { + d->mac_reg[RA] |= macaddr[i] << (8 * i); + d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0; + } + qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); +} + +static void +set_ctrl(E1000State *s, int index, uint32_t val) +{ + /* RST is self clearing */ + s->mac_reg[CTRL] = val & ~E1000_CTRL_RST; +} + +static void +set_rx_control(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[RCTL] = val; + s->rxbuf_size = rxbufsize(val); + s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; + DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], + s->mac_reg[RCTL]); + qemu_flush_queued_packets(qemu_get_queue(s->nic)); +} + +static void +set_mdic(E1000State *s, int index, uint32_t val) +{ + uint32_t data = val & E1000_MDIC_DATA_MASK; + uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + + if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy # + val = s->mac_reg[MDIC] | E1000_MDIC_ERROR; + else if (val & E1000_MDIC_OP_READ) { + DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr); + if (!(phy_regcap[addr] & PHY_R)) { + DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr); + val |= E1000_MDIC_ERROR; + } else + val = (val ^ data) | s->phy_reg[addr]; + } else if (val & E1000_MDIC_OP_WRITE) { + DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data); + if (!(phy_regcap[addr] & PHY_W)) { + DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr); + val |= E1000_MDIC_ERROR; + } else { + if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) { + phyreg_writeops[addr](s, index, data); + } else { + s->phy_reg[addr] = data; + } + } + } + s->mac_reg[MDIC] = val | E1000_MDIC_READY; + + if (val & E1000_MDIC_INT_EN) { + set_ics(s, 0, E1000_ICR_MDAC); + } +} + +static uint32_t +get_eecd(E1000State *s, int index) +{ + uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd; + + DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n", + s->eecd_state.bitnum_out, s->eecd_state.reading); + if (!s->eecd_state.reading || + ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >> + ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1) + ret |= E1000_EECD_DO; + return ret; +} + +static void +set_eecd(E1000State *s, int index, uint32_t val) +{ + uint32_t oldval = s->eecd_state.old_eecd; + + s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS | + E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ); + if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do + return; + if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state + s->eecd_state.val_in = 0; + s->eecd_state.bitnum_in = 0; + s->eecd_state.bitnum_out = 0; + s->eecd_state.reading = 0; + } + if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge + return; + if (!(E1000_EECD_SK & val)) { // falling edge + s->eecd_state.bitnum_out++; + return; + } + s->eecd_state.val_in <<= 1; + if (val & E1000_EECD_DI) + s->eecd_state.val_in |= 1; + if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) { + s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1; + s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) == + EEPROM_READ_OPCODE_MICROWIRE); + } + DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n", + s->eecd_state.bitnum_in, s->eecd_state.bitnum_out, + s->eecd_state.reading); +} + +static uint32_t +flash_eerd_read(E1000State *s, int x) +{ + unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START; + + if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0) + return (s->mac_reg[EERD]); + + if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG) + return (E1000_EEPROM_RW_REG_DONE | r); + + return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) | + E1000_EEPROM_RW_REG_DONE | r); +} + +static void +putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse) +{ + uint32_t sum; + + if (cse && cse < n) + n = cse + 1; + if (sloc < n-1) { + sum = net_checksum_add(n-css, data+css); + stw_be_p(data + sloc, net_checksum_finish(sum)); + } +} + +static inline int +vlan_enabled(E1000State *s) +{ + return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0); +} + +static inline int +vlan_rx_filter_enabled(E1000State *s) +{ + return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0); +} + +static inline int +is_vlan_packet(E1000State *s, const uint8_t *buf) +{ + return (be16_to_cpup((uint16_t *)(buf + 12)) == + le16_to_cpu(s->mac_reg[VET])); +} + +static inline int +is_vlan_txd(uint32_t txd_lower) +{ + return ((txd_lower & E1000_TXD_CMD_VLE) != 0); +} + +/* FCS aka Ethernet CRC-32. We don't get it from backends and can't + * fill it in, just pad descriptor length by 4 bytes unless guest + * told us to strip it off the packet. */ +static inline int +fcs_len(E1000State *s) +{ + return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; +} + +static void +e1000_send_packet(E1000State *s, const uint8_t *buf, int size) +{ + NetClientState *nc = qemu_get_queue(s->nic); + if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) { + nc->info->receive(nc, buf, size); + } else { + qemu_send_packet(nc, buf, size); + } +} + +static void +xmit_seg(E1000State *s) +{ + uint16_t len, *sp; + unsigned int frames = s->tx.tso_frames, css, sofar, n; + struct e1000_tx *tp = &s->tx; + + if (tp->tse && tp->cptse) { + css = tp->ipcss; + DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", + frames, tp->size, css); + if (tp->ip) { // IPv4 + stw_be_p(tp->data+css+2, tp->size - css); + stw_be_p(tp->data+css+4, + be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); + } else // IPv6 + stw_be_p(tp->data+css+4, tp->size - css); + css = tp->tucss; + len = tp->size - css; + DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len); + if (tp->tcp) { + sofar = frames * tp->mss; + stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */ + if (tp->paylen - sofar > tp->mss) + tp->data[css + 13] &= ~9; // PSH, FIN + } else // UDP + stw_be_p(tp->data+css+4, len); + if (tp->sum_needed & E1000_TXD_POPTS_TXSM) { + unsigned int phsum; + // add pseudo-header length before checksum calculation + sp = (uint16_t *)(tp->data + tp->tucso); + phsum = be16_to_cpup(sp) + len; + phsum = (phsum >> 16) + (phsum & 0xffff); + stw_be_p(sp, phsum); + } + tp->tso_frames++; + } + + if (tp->sum_needed & E1000_TXD_POPTS_TXSM) + putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse); + if (tp->sum_needed & E1000_TXD_POPTS_IXSM) + putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse); + if (tp->vlan_needed) { + memmove(tp->vlan, tp->data, 4); + memmove(tp->data, tp->data + 4, 8); + memcpy(tp->data + 8, tp->vlan_header, 4); + e1000_send_packet(s, tp->vlan, tp->size + 4); + } else + e1000_send_packet(s, tp->data, tp->size); + s->mac_reg[TPT]++; + s->mac_reg[GPTC]++; + n = s->mac_reg[TOTL]; + if ((s->mac_reg[TOTL] += s->tx.size) < n) + s->mac_reg[TOTH]++; +} + +static void +process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) +{ + PCIDevice *d = PCI_DEVICE(s); + uint32_t txd_lower = le32_to_cpu(dp->lower.data); + uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); + unsigned int split_size = txd_lower & 0xffff, bytes, sz, op; + unsigned int msh = 0xfffff; + uint64_t addr; + struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; + struct e1000_tx *tp = &s->tx; + + s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); + if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor + op = le32_to_cpu(xp->cmd_and_length); + tp->ipcss = xp->lower_setup.ip_fields.ipcss; + tp->ipcso = xp->lower_setup.ip_fields.ipcso; + tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse); + tp->tucss = xp->upper_setup.tcp_fields.tucss; + tp->tucso = xp->upper_setup.tcp_fields.tucso; + tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse); + tp->paylen = op & 0xfffff; + tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len; + tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss); + tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; + tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; + tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; + tp->tso_frames = 0; + if (tp->tucso == 0) { // this is probably wrong + DBGOUT(TXSUM, "TCP/UDP: cso 0!\n"); + tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); + } + return; + } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { + // data descriptor + if (tp->size == 0) { + tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8; + } + tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; + } else { + // legacy descriptor + tp->cptse = 0; + } + + if (vlan_enabled(s) && is_vlan_txd(txd_lower) && + (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) { + tp->vlan_needed = 1; + stw_be_p(tp->vlan_header, + le16_to_cpu(s->mac_reg[VET])); + stw_be_p(tp->vlan_header + 2, + le16_to_cpu(dp->upper.fields.special)); + } + + addr = le64_to_cpu(dp->buffer_addr); + if (tp->tse && tp->cptse) { + msh = tp->hdr_len + tp->mss; + do { + bytes = split_size; + if (tp->size + bytes > msh) + bytes = msh - tp->size; + + bytes = MIN(sizeof(tp->data) - tp->size, bytes); + pci_dma_read(d, addr, tp->data + tp->size, bytes); + sz = tp->size + bytes; + if (sz >= tp->hdr_len && tp->size < tp->hdr_len) { + memmove(tp->header, tp->data, tp->hdr_len); + } + tp->size = sz; + addr += bytes; + if (sz == msh) { + xmit_seg(s); + memmove(tp->data, tp->header, tp->hdr_len); + tp->size = tp->hdr_len; + } + } while (split_size -= bytes); + } else if (!tp->tse && tp->cptse) { + // context descriptor TSE is not set, while data descriptor TSE is set + DBGOUT(TXERR, "TCP segmentation error\n"); + } else { + split_size = MIN(sizeof(tp->data) - tp->size, split_size); + pci_dma_read(d, addr, tp->data + tp->size, split_size); + tp->size += split_size; + } + + if (!(txd_lower & E1000_TXD_CMD_EOP)) + return; + if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) { + xmit_seg(s); + } + tp->tso_frames = 0; + tp->sum_needed = 0; + tp->vlan_needed = 0; + tp->size = 0; + tp->cptse = 0; +} + +static uint32_t +txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp) +{ + PCIDevice *d = PCI_DEVICE(s); + uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data); + + if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS))) + return 0; + txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) & + ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU); + dp->upper.data = cpu_to_le32(txd_upper); + pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp), + &dp->upper, sizeof(dp->upper)); + return E1000_ICR_TXDW; +} + +static uint64_t tx_desc_base(E1000State *s) +{ + uint64_t bah = s->mac_reg[TDBAH]; + uint64_t bal = s->mac_reg[TDBAL] & ~0xf; + + return (bah << 32) + bal; +} + +static void +start_xmit(E1000State *s) +{ + PCIDevice *d = PCI_DEVICE(s); + dma_addr_t base; + struct e1000_tx_desc desc; + uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE; + + if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) { + DBGOUT(TX, "tx disabled\n"); + return; + } + + while (s->mac_reg[TDH] != s->mac_reg[TDT]) { + base = tx_desc_base(s) + + sizeof(struct e1000_tx_desc) * s->mac_reg[TDH]; + pci_dma_read(d, base, &desc, sizeof(desc)); + + DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH], + (void *)(intptr_t)desc.buffer_addr, desc.lower.data, + desc.upper.data); + + process_tx_desc(s, &desc); + cause |= txdesc_writeback(s, base, &desc); + + if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN]) + s->mac_reg[TDH] = 0; + /* + * the following could happen only if guest sw assigns + * bogus values to TDT/TDLEN. + * there's nothing too intelligent we could do about this. + */ + if (s->mac_reg[TDH] == tdh_start) { + DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n", + tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]); + break; + } + } + set_ics(s, 0, cause); +} + +static int +receive_filter(E1000State *s, const uint8_t *buf, int size) +{ + static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static const int mta_shift[] = {4, 3, 2, 0}; + uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp; + + if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) { + uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); + uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) + + ((vid >> 5) & 0x7f)); + if ((vfta & (1 << (vid & 0x1f))) == 0) + return 0; + } + + if (rctl & E1000_RCTL_UPE) // promiscuous + return 1; + + if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast + return 1; + + if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast)) + return 1; + + for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) { + if (!(rp[1] & E1000_RAH_AV)) + continue; + ra[0] = cpu_to_le32(rp[0]); + ra[1] = cpu_to_le32(rp[1]); + if (!memcmp(buf, (uint8_t *)ra, 6)) { + DBGOUT(RXFILTER, + "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n", + (int)(rp - s->mac_reg - RA)/2, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); + return 1; + } + } + DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); + + f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; + f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; + if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) + return 1; + DBGOUT(RXFILTER, + "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], + (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5, + s->mac_reg[MTA + (f >> 5)]); + + return 0; +} + +static void +e1000_set_link_status(NetClientState *nc) +{ + E1000State *s = qemu_get_nic_opaque(nc); + uint32_t old_status = s->mac_reg[STATUS]; + + if (nc->link_down) { + e1000_link_down(s); + } else { + if (have_autoneg(s) && + !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + /* emulate auto-negotiation if supported */ + timer_mod(s->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } else { + e1000_link_up(s); + } + } + + if (s->mac_reg[STATUS] != old_status) + set_ics(s, 0, E1000_ICR_LSC); +} + +static bool e1000_has_rxbufs(E1000State *s, size_t total_size) +{ + int bufs; + /* Fast-path short packets */ + if (total_size <= s->rxbuf_size) { + return s->mac_reg[RDH] != s->mac_reg[RDT]; + } + if (s->mac_reg[RDH] < s->mac_reg[RDT]) { + bufs = s->mac_reg[RDT] - s->mac_reg[RDH]; + } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) { + bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) + + s->mac_reg[RDT] - s->mac_reg[RDH]; + } else { + return false; + } + return total_size <= bufs * s->rxbuf_size; +} + +static int +e1000_can_receive(NetClientState *nc) +{ + E1000State *s = qemu_get_nic_opaque(nc); + + return (s->mac_reg[STATUS] & E1000_STATUS_LU) && + (s->mac_reg[RCTL] & E1000_RCTL_EN) && + (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) && + e1000_has_rxbufs(s, 1); +} + +static uint64_t rx_desc_base(E1000State *s) +{ + uint64_t bah = s->mac_reg[RDBAH]; + uint64_t bal = s->mac_reg[RDBAL] & ~0xf; + + return (bah << 32) + bal; +} + +static ssize_t +e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + E1000State *s = qemu_get_nic_opaque(nc); + PCIDevice *d = PCI_DEVICE(s); + struct e1000_rx_desc desc; + dma_addr_t base; + unsigned int n, rdt; + uint32_t rdh_start; + uint16_t vlan_special = 0; + uint8_t vlan_status = 0; + uint8_t min_buf[MIN_BUF_SIZE]; + struct iovec min_iov; + uint8_t *filter_buf = iov->iov_base; + size_t size = iov_size(iov, iovcnt); + size_t iov_ofs = 0; + size_t desc_offset; + size_t desc_size; + size_t total_size; + + if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) { + return -1; + } + + if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) { + return -1; + } + + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + iov_to_buf(iov, iovcnt, 0, min_buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + min_iov.iov_base = filter_buf = min_buf; + min_iov.iov_len = size = sizeof(min_buf); + iovcnt = 1; + iov = &min_iov; + } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) { + /* This is very unlikely, but may happen. */ + iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN); + filter_buf = min_buf; + } + + /* Discard oversized packets if !LPE and !SBP. */ + if ((size > MAXIMUM_ETHERNET_LPE_SIZE || + (size > MAXIMUM_ETHERNET_VLAN_SIZE + && !(s->mac_reg[RCTL] & E1000_RCTL_LPE))) + && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) { + return size; + } + + if (!receive_filter(s, filter_buf, size)) { + return size; + } + + if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) { + vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf + + 14))); + iov_ofs = 4; + if (filter_buf == iov->iov_base) { + memmove(filter_buf + 4, filter_buf, 12); + } else { + iov_from_buf(iov, iovcnt, 4, filter_buf, 12); + while (iov->iov_len <= iov_ofs) { + iov_ofs -= iov->iov_len; + iov++; + } + } + vlan_status = E1000_RXD_STAT_VP; + size -= 4; + } + + rdh_start = s->mac_reg[RDH]; + desc_offset = 0; + total_size = size + fcs_len(s); + if (!e1000_has_rxbufs(s, total_size)) { + set_ics(s, 0, E1000_ICS_RXO); + return -1; + } + do { + desc_size = total_size - desc_offset; + if (desc_size > s->rxbuf_size) { + desc_size = s->rxbuf_size; + } + base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH]; + pci_dma_read(d, base, &desc, sizeof(desc)); + desc.special = vlan_special; + desc.status |= (vlan_status | E1000_RXD_STAT_DD); + if (desc.buffer_addr) { + if (desc_offset < size) { + size_t iov_copy; + hwaddr ba = le64_to_cpu(desc.buffer_addr); + size_t copy_size = size - desc_offset; + if (copy_size > s->rxbuf_size) { + copy_size = s->rxbuf_size; + } + do { + iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); + pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy); + copy_size -= iov_copy; + ba += iov_copy; + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } while (copy_size); + } + desc_offset += desc_size; + desc.length = cpu_to_le16(desc_size); + if (desc_offset >= total_size) { + desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM; + } else { + /* Guest zeroing out status is not a hardware requirement. + Clear EOP in case guest didn't do it. */ + desc.status &= ~E1000_RXD_STAT_EOP; + } + } else { // as per intel docs; skip descriptors with null buf addr + DBGOUT(RX, "Null RX descriptor!!\n"); + } + pci_dma_write(d, base, &desc, sizeof(desc)); + + if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN]) + s->mac_reg[RDH] = 0; + /* see comment in start_xmit; same here */ + if (s->mac_reg[RDH] == rdh_start) { + DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n", + rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]); + set_ics(s, 0, E1000_ICS_RXO); + return -1; + } + } while (desc_offset < total_size); + + s->mac_reg[GPRC]++; + s->mac_reg[TPR]++; + /* TOR - Total Octets Received: + * This register includes bytes received in a packet from the <Destination + * Address> field through the <CRC> field, inclusively. + */ + n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4; + if (n < s->mac_reg[TORL]) + s->mac_reg[TORH]++; + s->mac_reg[TORL] = n; + + n = E1000_ICS_RXT0; + if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) + rdt += s->mac_reg[RDLEN] / sizeof(desc); + if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >> + s->rxbuf_min_shift) + n |= E1000_ICS_RXDMT0; + + set_ics(s, 0, n); + + return size; +} + +static ssize_t +e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return e1000_receive_iov(nc, &iov, 1); +} + +static uint32_t +mac_readreg(E1000State *s, int index) +{ + return s->mac_reg[index]; +} + +static uint32_t +mac_icr_read(E1000State *s, int index) +{ + uint32_t ret = s->mac_reg[ICR]; + + DBGOUT(INTERRUPT, "ICR read: %x\n", ret); + set_interrupt_cause(s, 0, 0); + return ret; +} + +static uint32_t +mac_read_clr4(E1000State *s, int index) +{ + uint32_t ret = s->mac_reg[index]; + + s->mac_reg[index] = 0; + return ret; +} + +static uint32_t +mac_read_clr8(E1000State *s, int index) +{ + uint32_t ret = s->mac_reg[index]; + + s->mac_reg[index] = 0; + s->mac_reg[index-1] = 0; + return ret; +} + +static void +mac_writereg(E1000State *s, int index, uint32_t val) +{ + uint32_t macaddr[2]; + + s->mac_reg[index] = val; + + if (index == RA + 1) { + macaddr[0] = cpu_to_le32(s->mac_reg[RA]); + macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]); + qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr); + } +} + +static void +set_rdt(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[index] = val & 0xffff; + if (e1000_has_rxbufs(s, 1)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } +} + +static void +set_16bit(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[index] = val & 0xffff; +} + +static void +set_dlen(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[index] = val & 0xfff80; +} + +static void +set_tctl(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[index] = val; + s->mac_reg[TDT] &= 0xffff; + start_xmit(s); +} + +static void +set_icr(E1000State *s, int index, uint32_t val) +{ + DBGOUT(INTERRUPT, "set_icr %x\n", val); + set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val); +} + +static void +set_imc(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[IMS] &= ~val; + set_ics(s, 0, 0); +} + +static void +set_ims(E1000State *s, int index, uint32_t val) +{ + s->mac_reg[IMS] |= val; + set_ics(s, 0, 0); +} + +#define getreg(x) [x] = mac_readreg +static uint32_t (*macreg_readops[])(E1000State *, int) = { + getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL), + getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL), + getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS), + getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL), + getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), + getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), + getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV), + getreg(TADV), getreg(ITR), + + [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, + [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, + [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read, + [CRCERRS ... MPC] = &mac_readreg, + [RA ... RA+31] = &mac_readreg, + [MTA ... MTA+127] = &mac_readreg, + [VFTA ... VFTA+127] = &mac_readreg, +}; +enum { NREADOPS = ARRAY_SIZE(macreg_readops) }; + +#define putreg(x) [x] = mac_writereg +static void (*macreg_writeops[])(E1000State *, int, uint32_t) = { + putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), + putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), + putreg(RDBAL), putreg(LEDCTL), putreg(VET), + [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, + [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, + [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, + [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr, + [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl, + [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, + [ITR] = set_16bit, + [RA ... RA+31] = &mac_writereg, + [MTA ... MTA+127] = &mac_writereg, + [VFTA ... VFTA+127] = &mac_writereg, +}; + +enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; + +static void +e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + E1000State *s = opaque; + unsigned int index = (addr & 0x1ffff) >> 2; + + if (index < NWRITEOPS && macreg_writeops[index]) { + macreg_writeops[index](s, index, val); + } else if (index < NREADOPS && macreg_readops[index]) { + DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val); + } else { + DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n", + index<<2, val); + } +} + +static uint64_t +e1000_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000State *s = opaque; + unsigned int index = (addr & 0x1ffff) >> 2; + + if (index < NREADOPS && macreg_readops[index]) + { + return macreg_readops[index](s, index); + } + DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2); + return 0; +} + +static const MemoryRegionOps e1000_mmio_ops = { + .read = e1000_mmio_read, + .write = e1000_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static uint64_t e1000_io_read(void *opaque, hwaddr addr, + unsigned size) +{ + E1000State *s = opaque; + + (void)s; + return 0; +} + +static void e1000_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000State *s = opaque; + + (void)s; +} + +static const MemoryRegionOps e1000_io_ops = { + .read = e1000_io_read, + .write = e1000_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static bool is_version_1(void *opaque, int version_id) +{ + return version_id == 1; +} + +static void e1000_pre_save(void *opaque) +{ + E1000State *s = opaque; + NetClientState *nc = qemu_get_queue(s->nic); + + /* If the mitigation timer is active, emulate a timeout now. */ + if (s->mit_timer_on) { + e1000_mit_timer(s); + } + + /* + * If link is down and auto-negotiation is supported and ongoing, + * complete auto-negotiation immediately. This allows us to look + * at MII_SR_AUTONEG_COMPLETE to infer link status on load. + */ + if (nc->link_down && have_autoneg(s)) { + s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + } +} + +static int e1000_post_load(void *opaque, int version_id) +{ + E1000State *s = opaque; + NetClientState *nc = qemu_get_queue(s->nic); + + if (!(s->compat_flags & E1000_FLAG_MIT)) { + s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] = + s->mac_reg[TADV] = 0; + s->mit_irq_level = false; + } + s->mit_ide = 0; + s->mit_timer_on = false; + + /* nc.link_down can't be migrated, so infer link_down according + * to link status bit in mac_reg[STATUS]. + * Alternatively, restart link negotiation if it was in progress. */ + nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0; + + if (have_autoneg(s) && + !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + nc->link_down = false; + timer_mod(s->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } + + return 0; +} + +static bool e1000_mit_state_needed(void *opaque) +{ + E1000State *s = opaque; + + return s->compat_flags & E1000_FLAG_MIT; +} + +static const VMStateDescription vmstate_e1000_mit_state = { + .name = "e1000/mit_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = e1000_mit_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(mac_reg[RDTR], E1000State), + VMSTATE_UINT32(mac_reg[RADV], E1000State), + VMSTATE_UINT32(mac_reg[TADV], E1000State), + VMSTATE_UINT32(mac_reg[ITR], E1000State), + VMSTATE_BOOL(mit_irq_level, E1000State), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_e1000 = { + .name = "e1000", + .version_id = 2, + .minimum_version_id = 1, + .pre_save = e1000_pre_save, + .post_load = e1000_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, E1000State), + VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */ + VMSTATE_UNUSED(4), /* Was mmio_base. */ + VMSTATE_UINT32(rxbuf_size, E1000State), + VMSTATE_UINT32(rxbuf_min_shift, E1000State), + VMSTATE_UINT32(eecd_state.val_in, E1000State), + VMSTATE_UINT16(eecd_state.bitnum_in, E1000State), + VMSTATE_UINT16(eecd_state.bitnum_out, E1000State), + VMSTATE_UINT16(eecd_state.reading, E1000State), + VMSTATE_UINT32(eecd_state.old_eecd, E1000State), + VMSTATE_UINT8(tx.ipcss, E1000State), + VMSTATE_UINT8(tx.ipcso, E1000State), + VMSTATE_UINT16(tx.ipcse, E1000State), + VMSTATE_UINT8(tx.tucss, E1000State), + VMSTATE_UINT8(tx.tucso, E1000State), + VMSTATE_UINT16(tx.tucse, E1000State), + VMSTATE_UINT32(tx.paylen, E1000State), + VMSTATE_UINT8(tx.hdr_len, E1000State), + VMSTATE_UINT16(tx.mss, E1000State), + VMSTATE_UINT16(tx.size, E1000State), + VMSTATE_UINT16(tx.tso_frames, E1000State), + VMSTATE_UINT8(tx.sum_needed, E1000State), + VMSTATE_INT8(tx.ip, E1000State), + VMSTATE_INT8(tx.tcp, E1000State), + VMSTATE_BUFFER(tx.header, E1000State), + VMSTATE_BUFFER(tx.data, E1000State), + VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64), + VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20), + VMSTATE_UINT32(mac_reg[CTRL], E1000State), + VMSTATE_UINT32(mac_reg[EECD], E1000State), + VMSTATE_UINT32(mac_reg[EERD], E1000State), + VMSTATE_UINT32(mac_reg[GPRC], E1000State), + VMSTATE_UINT32(mac_reg[GPTC], E1000State), + VMSTATE_UINT32(mac_reg[ICR], E1000State), + VMSTATE_UINT32(mac_reg[ICS], E1000State), + VMSTATE_UINT32(mac_reg[IMC], E1000State), + VMSTATE_UINT32(mac_reg[IMS], E1000State), + VMSTATE_UINT32(mac_reg[LEDCTL], E1000State), + VMSTATE_UINT32(mac_reg[MANC], E1000State), + VMSTATE_UINT32(mac_reg[MDIC], E1000State), + VMSTATE_UINT32(mac_reg[MPC], E1000State), + VMSTATE_UINT32(mac_reg[PBA], E1000State), + VMSTATE_UINT32(mac_reg[RCTL], E1000State), + VMSTATE_UINT32(mac_reg[RDBAH], E1000State), + VMSTATE_UINT32(mac_reg[RDBAL], E1000State), + VMSTATE_UINT32(mac_reg[RDH], E1000State), + VMSTATE_UINT32(mac_reg[RDLEN], E1000State), + VMSTATE_UINT32(mac_reg[RDT], E1000State), + VMSTATE_UINT32(mac_reg[STATUS], E1000State), + VMSTATE_UINT32(mac_reg[SWSM], E1000State), + VMSTATE_UINT32(mac_reg[TCTL], E1000State), + VMSTATE_UINT32(mac_reg[TDBAH], E1000State), + VMSTATE_UINT32(mac_reg[TDBAL], E1000State), + VMSTATE_UINT32(mac_reg[TDH], E1000State), + VMSTATE_UINT32(mac_reg[TDLEN], E1000State), + VMSTATE_UINT32(mac_reg[TDT], E1000State), + VMSTATE_UINT32(mac_reg[TORH], E1000State), + VMSTATE_UINT32(mac_reg[TORL], E1000State), + VMSTATE_UINT32(mac_reg[TOTH], E1000State), + VMSTATE_UINT32(mac_reg[TOTL], E1000State), + VMSTATE_UINT32(mac_reg[TPR], E1000State), + VMSTATE_UINT32(mac_reg[TPT], E1000State), + VMSTATE_UINT32(mac_reg[TXDCTL], E1000State), + VMSTATE_UINT32(mac_reg[WUFC], E1000State), + VMSTATE_UINT32(mac_reg[VET], E1000State), + VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32), + VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128), + VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_e1000_mit_state, + NULL + } +}; + +/* + * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102. + * Note: A valid DevId will be inserted during pci_e1000_init(). + */ +static const uint16_t e1000_eeprom_template[64] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000, + 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040, + 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700, + 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706, + 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, +}; + +/* PCI interface */ + +static void +e1000_mmio_setup(E1000State *d) +{ + int i; + const uint32_t excluded_regs[] = { + E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS, + E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE + }; + + memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d, + "e1000-mmio", PNPMMIO_SIZE); + memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]); + for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++) + memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4, + excluded_regs[i+1] - excluded_regs[i] - 4); + memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE); +} + +static void +pci_e1000_uninit(PCIDevice *dev) +{ + E1000State *d = E1000(dev); + + timer_del(d->autoneg_timer); + timer_free(d->autoneg_timer); + timer_del(d->mit_timer); + timer_free(d->mit_timer); + qemu_del_nic(d->nic); +} + +static NetClientInfo net_e1000_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = e1000_can_receive, + .receive = e1000_receive, + .receive_iov = e1000_receive_iov, + .link_status_changed = e1000_set_link_status, +}; + +static void e1000_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) +{ + E1000State *s = E1000(pci_dev); + + pci_default_write_config(pci_dev, address, val, len); + + if (range_covers_byte(address, len, PCI_COMMAND) && + (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } +} + + +static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) +{ + DeviceState *dev = DEVICE(pci_dev); + E1000State *d = E1000(pci_dev); + PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev); + uint8_t *pci_conf; + uint16_t checksum = 0; + int i; + uint8_t *macaddr; + + pci_dev->config_write = e1000_write_config; + + pci_conf = pci_dev->config; + + /* TODO: RST# value should be 0, PCI spec 6.2.4 */ + pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; + + pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ + + e1000_mmio_setup(d); + + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); + + pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io); + + memmove(d->eeprom_data, e1000_eeprom_template, + sizeof e1000_eeprom_template); + qemu_macaddr_default_if_unset(&d->conf.macaddr); + macaddr = d->conf.macaddr.a; + for (i = 0; i < 3; i++) + d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i]; + d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id; + for (i = 0; i < EEPROM_CHECKSUM_REG; i++) + checksum += d->eeprom_data[i]; + checksum = (uint16_t) EEPROM_SUM - checksum; + d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum; + + d->nic = qemu_new_nic(&net_e1000_info, &d->conf, + object_get_typename(OBJECT(d)), dev->id, d); + + qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); + + d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d); + d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); +} + +static void qdev_e1000_reset(DeviceState *dev) +{ + E1000State *d = E1000(dev); + e1000_reset(d); +} + +static Property e1000_properties[] = { + DEFINE_NIC_PROPERTIES(E1000State, conf), + DEFINE_PROP_BIT("autonegotiation", E1000State, + compat_flags, E1000_FLAG_AUTONEG_BIT, true), + DEFINE_PROP_BIT("mitigation", E1000State, + compat_flags, E1000_FLAG_MIT_BIT, true), + DEFINE_PROP_END_OF_LIST(), +}; + +typedef struct E1000Info { + const char *name; + uint16_t device_id; + uint8_t revision; + uint16_t phy_id2; +} E1000Info; + +static void e1000_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + E1000BaseClass *e = E1000_DEVICE_CLASS(klass); + const E1000Info *info = data; + + k->realize = pci_e1000_realize; + k->exit = pci_e1000_uninit; + k->romfile = "efi-e1000.rom"; + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->device_id = info->device_id; + k->revision = info->revision; + e->phy_id2 = info->phy_id2; + k->class_id = PCI_CLASS_NETWORK_ETHERNET; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->desc = "Intel Gigabit Ethernet"; + dc->reset = qdev_e1000_reset; + dc->vmsd = &vmstate_e1000; + dc->props = e1000_properties; +} + +static void e1000_instance_init(Object *obj) +{ + E1000State *n = E1000(obj); + device_add_bootindex_property(obj, &n->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(n), NULL); +} + +static const TypeInfo e1000_base_info = { + .name = TYPE_E1000_BASE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(E1000State), + .instance_init = e1000_instance_init, + .class_size = sizeof(E1000BaseClass), + .abstract = true, +}; + +static const E1000Info e1000_devices[] = { + { + .name = "e1000-82540em", + .device_id = E1000_DEV_ID_82540EM, + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_82544x, + }, + { + .name = "e1000-82545em", + .device_id = E1000_DEV_ID_82545EM_COPPER, + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, +}; + +static const TypeInfo e1000_default_info = { + .name = "e1000", + .parent = "e1000-82540em", +}; + +static void e1000_register_types(void) +{ + int i; + + type_register_static(&e1000_base_info); + for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) { + const E1000Info *info = &e1000_devices[i]; + TypeInfo type_info = {}; + + type_info.name = info->name; + type_info.parent = TYPE_E1000_BASE; + type_info.class_data = (void *)info; + type_info.class_init = e1000_class_init; + type_info.instance_init = e1000_instance_init; + + type_register(&type_info); + } + type_register_static(&e1000_default_info); +} + +type_init(e1000_register_types) diff --git a/qemu/hw/net/e1000_regs.h b/qemu/hw/net/e1000_regs.h new file mode 100644 index 000000000..60b96aaf1 --- /dev/null +++ b/qemu/hw/net/e1000_regs.h @@ -0,0 +1,902 @@ +/******************************************************************************* + + Intel PRO/1000 Linux driver + Copyright(c) 1999 - 2006 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, see <http://www.gnu.org/licenses/>. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS <linux.nics@intel.com> + e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* e1000_hw.h + * Structures, enums, and macros for the MAC + */ + +#ifndef _E1000_HW_H_ +#define _E1000_HW_H_ + + +/* PCI Device IDs */ +#define E1000_DEV_ID_82542 0x1000 +#define E1000_DEV_ID_82543GC_FIBER 0x1001 +#define E1000_DEV_ID_82543GC_COPPER 0x1004 +#define E1000_DEV_ID_82544EI_COPPER 0x1008 +#define E1000_DEV_ID_82544EI_FIBER 0x1009 +#define E1000_DEV_ID_82544GC_COPPER 0x100C +#define E1000_DEV_ID_82544GC_LOM 0x100D +#define E1000_DEV_ID_82540EM 0x100E +#define E1000_DEV_ID_82540EM_LOM 0x1015 +#define E1000_DEV_ID_82540EP_LOM 0x1016 +#define E1000_DEV_ID_82540EP 0x1017 +#define E1000_DEV_ID_82540EP_LP 0x101E +#define E1000_DEV_ID_82545EM_COPPER 0x100F +#define E1000_DEV_ID_82545EM_FIBER 0x1011 +#define E1000_DEV_ID_82545GM_COPPER 0x1026 +#define E1000_DEV_ID_82545GM_FIBER 0x1027 +#define E1000_DEV_ID_82545GM_SERDES 0x1028 +#define E1000_DEV_ID_82546EB_COPPER 0x1010 +#define E1000_DEV_ID_82546EB_FIBER 0x1012 +#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D +#define E1000_DEV_ID_82541EI 0x1013 +#define E1000_DEV_ID_82541EI_MOBILE 0x1018 +#define E1000_DEV_ID_82541ER_LOM 0x1014 +#define E1000_DEV_ID_82541ER 0x1078 +#define E1000_DEV_ID_82547GI 0x1075 +#define E1000_DEV_ID_82541GI 0x1076 +#define E1000_DEV_ID_82541GI_MOBILE 0x1077 +#define E1000_DEV_ID_82541GI_LF 0x107C +#define E1000_DEV_ID_82546GB_COPPER 0x1079 +#define E1000_DEV_ID_82546GB_FIBER 0x107A +#define E1000_DEV_ID_82546GB_SERDES 0x107B +#define E1000_DEV_ID_82546GB_PCIE 0x108A +#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 +#define E1000_DEV_ID_82547EI 0x1019 +#define E1000_DEV_ID_82547EI_MOBILE 0x101A +#define E1000_DEV_ID_82571EB_COPPER 0x105E +#define E1000_DEV_ID_82571EB_FIBER 0x105F +#define E1000_DEV_ID_82571EB_SERDES 0x1060 +#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 +#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 +#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 +#define E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE 0x10BC +#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 +#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA +#define E1000_DEV_ID_82572EI_COPPER 0x107D +#define E1000_DEV_ID_82572EI_FIBER 0x107E +#define E1000_DEV_ID_82572EI_SERDES 0x107F +#define E1000_DEV_ID_82572EI 0x10B9 +#define E1000_DEV_ID_82573E 0x108B +#define E1000_DEV_ID_82573E_IAMT 0x108C +#define E1000_DEV_ID_82573L 0x109A +#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 +#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 +#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 +#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA +#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB + +#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 +#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A +#define E1000_DEV_ID_ICH8_IGP_C 0x104B +#define E1000_DEV_ID_ICH8_IFE 0x104C +#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 +#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 +#define E1000_DEV_ID_ICH8_IGP_M 0x104D + +/* Device Specific Register Defaults */ +#define E1000_PHY_ID2_82541x 0x380 +#define E1000_PHY_ID2_82544x 0xC30 +#define E1000_PHY_ID2_8254xx_DEFAULT 0xC20 /* 82540x, 82545x, and 82546x */ +#define E1000_PHY_ID2_82573x 0xCC0 + +/* Register Set. (82543, 82544) + * + * Registers are defined to be 32 bits and should be accessed as 32 bit values. + * These registers are physically located on the NIC, but are mapped into the + * host memory address space. + * + * RW - register is both readable and writable + * RO - register is read only + * WO - register is write only + * R/clr - register is read only and is cleared when read + * A - register array + */ +#define E1000_CTRL 0x00000 /* Device Control - RW */ +#define E1000_CTRL_DUP 0x00004 /* Device Control Duplicate (Shadow) - RW */ +#define E1000_STATUS 0x00008 /* Device Status - RO */ +#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ +#define E1000_EERD 0x00014 /* EEPROM Read - RW */ +#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ +#define E1000_FLA 0x0001C /* Flash Access - RW */ +#define E1000_MDIC 0x00020 /* MDI Control - RW */ +#define E1000_SCTL 0x00024 /* SerDes Control - RW */ +#define E1000_FEXTNVM 0x00028 /* Future Extended NVM register */ +#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ +#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ +#define E1000_FCT 0x00030 /* Flow Control Type - RW */ +#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ +#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ +#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ +#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ +#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ +#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ +#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ +#define E1000_RCTL 0x00100 /* RX Control - RW */ +#define E1000_RDTR1 0x02820 /* RX Delay Timer (1) - RW */ +#define E1000_RDBAL1 0x02900 /* RX Descriptor Base Address Low (1) - RW */ +#define E1000_RDBAH1 0x02904 /* RX Descriptor Base Address High (1) - RW */ +#define E1000_RDLEN1 0x02908 /* RX Descriptor Length (1) - RW */ +#define E1000_RDH1 0x02910 /* RX Descriptor Head (1) - RW */ +#define E1000_RDT1 0x02918 /* RX Descriptor Tail (1) - RW */ +#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ +#define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ +#define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ +#define E1000_TCTL 0x00400 /* TX Control - RW */ +#define E1000_TCTL_EXT 0x00404 /* Extended TX Control - RW */ +#define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ +#define E1000_TBT 0x00448 /* TX Burst Timer - RW */ +#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ +#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ +#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ +#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ +#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ +#define FEXTNVM_SW_CONFIG 0x0001 +#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ +#define E1000_PBS 0x01008 /* Packet Buffer Size */ +#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_FLASH_UPDATES 1000 +#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ +#define E1000_FLASHT 0x01028 /* FLASH Timer Register */ +#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ +#define E1000_FLSWCTL 0x01030 /* FLASH control register */ +#define E1000_FLSWDATA 0x01034 /* FLASH data register */ +#define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ +#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ +#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ +#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ +#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ +#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ +#define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */ +#define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */ +#define E1000_RDLEN 0x02808 /* RX Descriptor Length - RW */ +#define E1000_RDH 0x02810 /* RX Descriptor Head - RW */ +#define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */ +#define E1000_RDTR 0x02820 /* RX Delay Timer - RW */ +#define E1000_RDBAL0 E1000_RDBAL /* RX Desc Base Address Low (0) - RW */ +#define E1000_RDBAH0 E1000_RDBAH /* RX Desc Base Address High (0) - RW */ +#define E1000_RDLEN0 E1000_RDLEN /* RX Desc Length (0) - RW */ +#define E1000_RDH0 E1000_RDH /* RX Desc Head (0) - RW */ +#define E1000_RDT0 E1000_RDT /* RX Desc Tail (0) - RW */ +#define E1000_RDTR0 E1000_RDTR /* RX Delay Timer (0) - RW */ +#define E1000_RXDCTL 0x02828 /* RX Descriptor Control queue 0 - RW */ +#define E1000_RXDCTL1 0x02928 /* RX Descriptor Control queue 1 - RW */ +#define E1000_RADV 0x0282C /* RX Interrupt Absolute Delay Timer - RW */ +#define E1000_RSRPD 0x02C00 /* RX Small Packet Detect - RW */ +#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ +#define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ +#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ +#define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ +#define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ +#define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ +#define E1000_TDFTS 0x03428 /* TX Data FIFO Tail Saved - RW */ +#define E1000_TDFPC 0x03430 /* TX Data FIFO Packet Count - RW */ +#define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */ +#define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */ +#define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */ +#define E1000_TDH 0x03810 /* TX Descriptor Head - RW */ +#define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */ +#define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */ +#define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */ +#define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */ +#define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ +#define E1000_TARC0 0x03840 /* TX Arbitration Count (0) */ +#define E1000_TDBAL1 0x03900 /* TX Desc Base Address Low (1) - RW */ +#define E1000_TDBAH1 0x03904 /* TX Desc Base Address High (1) - RW */ +#define E1000_TDLEN1 0x03908 /* TX Desc Length (1) - RW */ +#define E1000_TDH1 0x03910 /* TX Desc Head (1) - RW */ +#define E1000_TDT1 0x03918 /* TX Desc Tail (1) - RW */ +#define E1000_TXDCTL1 0x03928 /* TX Descriptor Control (1) - RW */ +#define E1000_TARC1 0x03940 /* TX Arbitration Count (1) */ +#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ +#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ +#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ +#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ +#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ +#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ +#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ +#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ +#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ +#define E1000_COLC 0x04028 /* Collision Count - R/clr */ +#define E1000_DC 0x04030 /* Defer Count - R/clr */ +#define E1000_TNCRS 0x04034 /* TX-No CRS - R/clr */ +#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ +#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ +#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ +#define E1000_XONRXC 0x04048 /* XON RX Count - R/clr */ +#define E1000_XONTXC 0x0404C /* XON TX Count - R/clr */ +#define E1000_XOFFRXC 0x04050 /* XOFF RX Count - R/clr */ +#define E1000_XOFFTXC 0x04054 /* XOFF TX Count - R/clr */ +#define E1000_FCRUC 0x04058 /* Flow Control RX Unsupported Count- R/clr */ +#define E1000_PRC64 0x0405C /* Packets RX (64 bytes) - R/clr */ +#define E1000_PRC127 0x04060 /* Packets RX (65-127 bytes) - R/clr */ +#define E1000_PRC255 0x04064 /* Packets RX (128-255 bytes) - R/clr */ +#define E1000_PRC511 0x04068 /* Packets RX (255-511 bytes) - R/clr */ +#define E1000_PRC1023 0x0406C /* Packets RX (512-1023 bytes) - R/clr */ +#define E1000_PRC1522 0x04070 /* Packets RX (1024-1522 bytes) - R/clr */ +#define E1000_GPRC 0x04074 /* Good Packets RX Count - R/clr */ +#define E1000_BPRC 0x04078 /* Broadcast Packets RX Count - R/clr */ +#define E1000_MPRC 0x0407C /* Multicast Packets RX Count - R/clr */ +#define E1000_GPTC 0x04080 /* Good Packets TX Count - R/clr */ +#define E1000_GORCL 0x04088 /* Good Octets RX Count Low - R/clr */ +#define E1000_GORCH 0x0408C /* Good Octets RX Count High - R/clr */ +#define E1000_GOTCL 0x04090 /* Good Octets TX Count Low - R/clr */ +#define E1000_GOTCH 0x04094 /* Good Octets TX Count High - R/clr */ +#define E1000_RNBC 0x040A0 /* RX No Buffers Count - R/clr */ +#define E1000_RUC 0x040A4 /* RX Undersize Count - R/clr */ +#define E1000_RFC 0x040A8 /* RX Fragment Count - R/clr */ +#define E1000_ROC 0x040AC /* RX Oversize Count - R/clr */ +#define E1000_RJC 0x040B0 /* RX Jabber Count - R/clr */ +#define E1000_MGTPRC 0x040B4 /* Management Packets RX Count - R/clr */ +#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ +#define E1000_MGTPTC 0x040BC /* Management Packets TX Count - R/clr */ +#define E1000_TORL 0x040C0 /* Total Octets RX Low - R/clr */ +#define E1000_TORH 0x040C4 /* Total Octets RX High - R/clr */ +#define E1000_TOTL 0x040C8 /* Total Octets TX Low - R/clr */ +#define E1000_TOTH 0x040CC /* Total Octets TX High - R/clr */ +#define E1000_TPR 0x040D0 /* Total Packets RX - R/clr */ +#define E1000_TPT 0x040D4 /* Total Packets TX - R/clr */ +#define E1000_PTC64 0x040D8 /* Packets TX (64 bytes) - R/clr */ +#define E1000_PTC127 0x040DC /* Packets TX (65-127 bytes) - R/clr */ +#define E1000_PTC255 0x040E0 /* Packets TX (128-255 bytes) - R/clr */ +#define E1000_PTC511 0x040E4 /* Packets TX (256-511 bytes) - R/clr */ +#define E1000_PTC1023 0x040E8 /* Packets TX (512-1023 bytes) - R/clr */ +#define E1000_PTC1522 0x040EC /* Packets TX (1024-1522 Bytes) - R/clr */ +#define E1000_MPTC 0x040F0 /* Multicast Packets TX Count - R/clr */ +#define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */ +#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */ +#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */ +#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ +#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Packet Timer Expire Count */ +#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Absolute Timer Expire Count */ +#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Packet Timer Expire Count */ +#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Absolute Timer Expire Count */ +#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ +#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Minimum Threshold Count */ +#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Descriptor Minimum Threshold Count */ +#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ +#define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ +#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ +#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define E1000_RA 0x05400 /* Receive Address - RW Array */ +#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ +#define E1000_WUC 0x05800 /* Wakeup Control - RW */ +#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ +#define E1000_WUS 0x05810 /* Wakeup Status - RO */ +#define E1000_MANC 0x05820 /* Management Control - RW */ +#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ +#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ +#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ +#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ +#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ +#define E1000_HOST_IF 0x08800 /* Host Interface */ +#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ +#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ + +#define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */ +#define E1000_MDPHYA 0x0003C /* PHY address - RW */ +#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ +#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ + +#define E1000_GCR 0x05B00 /* PCI-Ex Control */ +#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ +#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ +#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ +#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ +#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ +#define E1000_SWSM 0x05B50 /* SW Semaphore */ +#define E1000_FWSM 0x05B54 /* FW Semaphore */ +#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ +#define E1000_HICR 0x08F00 /* Host Inteface Control */ + +/* RSS registers */ +#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ +#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ +#define E1000_RETA 0x05C00 /* Redirection Table - RW Array */ +#define E1000_RSSRK 0x05C80 /* RSS Random Key - RW Array */ +#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ +#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ + +/* PHY 1000 MII Register/Bit Definitions */ +/* PHY Registers defined by IEEE */ +#define PHY_CTRL 0x00 /* Control Register */ +#define PHY_STATUS 0x01 /* Status Regiser */ +#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ +#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ +#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ +#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ +#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ +#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */ +#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ +#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ +#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ +#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ + +#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ +#define MAX_PHY_MULTI_PAGE_REG 0xF /* Registers equal on all pages */ + +/* M88E1000 Specific Registers */ +#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ +#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ +#define M88E1000_INT_ENABLE 0x12 /* Interrupt Enable Register */ +#define M88E1000_INT_STATUS 0x13 /* Interrupt Status Register */ +#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ +#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ + +#define M88E1000_PHY_EXT_CTRL 0x1A /* PHY extend control register */ +#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for page number setting */ +#define M88E1000_PHY_GEN_CONTROL 0x1E /* Its meaning depends on reg 29 */ +#define M88E1000_PHY_VCO_REG_BIT8 0x100 /* Bits 8 & 11 are adjusted for */ +#define M88E1000_PHY_VCO_REG_BIT11 0x800 /* improved BER performance */ + +/* PHY Control Register */ +#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ +#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ +#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ +#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ +#define MII_CR_POWER_DOWN 0x0800 /* Power down */ +#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ +#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ +#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ + +/* PHY Status Register */ +#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ +#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ +#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ +#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ +#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ +#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ +#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ +#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ +#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ +#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ +#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ +#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ +#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ +#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ +#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ + +/* PHY Link Partner Ability Register */ +#define MII_LPAR_LPACK 0x4000 /* Acked by link partner */ + +/* Interrupt Cause Read */ +#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ +#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ +#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ +#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */ +#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ +#define E1000_ICR_RXO 0x00000040 /* rx overrun */ +#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ +#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ +#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ +#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ +#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ +#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ +#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ +#define E1000_ICR_TXD_LOW 0x00008000 +#define E1000_ICR_SRPD 0x00010000 +#define E1000_ICR_ACK 0x00020000 /* Receive Ack frame */ +#define E1000_ICR_MNG 0x00040000 /* Manageability event */ +#define E1000_ICR_DOCK 0x00080000 /* Dock/Undock */ +#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ +#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_ICR_HOST_ARB_PAR 0x00400000 /* host arb read buffer parity error */ +#define E1000_ICR_PB_PAR 0x00800000 /* packet buffer parity error */ +#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */ +#define E1000_ICR_ALL_PARITY 0x03F00000 /* all parity error bits */ +#define E1000_ICR_DSW 0x00000020 /* FW changed the status of DISSW bit in the FWSM */ +#define E1000_ICR_PHYINT 0x00001000 /* LAN connected device generates an interrupt */ +#define E1000_ICR_EPRST 0x00100000 /* ME handware reset occurs */ + +/* Interrupt Cause Set */ +#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ +#define E1000_ICS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ +#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ +#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ +#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ +#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ +#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ +#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ +#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ +#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ +#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_ICS_SRPD E1000_ICR_SRPD +#define E1000_ICS_ACK E1000_ICR_ACK /* Receive Ack frame */ +#define E1000_ICS_MNG E1000_ICR_MNG /* Manageability event */ +#define E1000_ICS_DOCK E1000_ICR_DOCK /* Dock/Undock */ +#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_ICS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ +#define E1000_ICS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ +#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ +#define E1000_ICS_DSW E1000_ICR_DSW +#define E1000_ICS_PHYINT E1000_ICR_PHYINT +#define E1000_ICS_EPRST E1000_ICR_EPRST + +/* Interrupt Mask Set */ +#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ +#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ +#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ +#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ +#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ +#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ +#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ +#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ +#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ +#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_IMS_SRPD E1000_ICR_SRPD +#define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ +#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ +#define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ +#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_IMS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ +#define E1000_IMS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ +#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ +#define E1000_IMS_DSW E1000_ICR_DSW +#define E1000_IMS_PHYINT E1000_ICR_PHYINT +#define E1000_IMS_EPRST E1000_ICR_EPRST + +/* Interrupt Mask Clear */ +#define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */ +#define E1000_IMC_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_IMC_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_IMC_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ +#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ +#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ +#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ +#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ +#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ +#define E1000_IMC_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ +#define E1000_IMC_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ +#define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ +#define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_IMC_SRPD E1000_ICR_SRPD +#define E1000_IMC_ACK E1000_ICR_ACK /* Receive Ack frame */ +#define E1000_IMC_MNG E1000_ICR_MNG /* Manageability event */ +#define E1000_IMC_DOCK E1000_ICR_DOCK /* Dock/Undock */ +#define E1000_IMC_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_IMC_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_IMC_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ +#define E1000_IMC_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ +#define E1000_IMC_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_IMC_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ +#define E1000_IMC_DSW E1000_ICR_DSW +#define E1000_IMC_PHYINT E1000_ICR_PHYINT +#define E1000_IMC_EPRST E1000_ICR_EPRST + +/* Receive Control */ +#define E1000_RCTL_RST 0x00000001 /* Software reset */ +#define E1000_RCTL_EN 0x00000002 /* enable */ +#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ +#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ +#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ +#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ +#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ +#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ +#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */ +#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ +#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ +#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ +#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ +#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ +#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ +#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ +#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ +#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ +#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ +#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ +#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ +#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ +#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ +#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */ +#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */ + + +#define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */ +#define E1000_EEPROM_LED_LOGIC 0x0020 /* Led Logic Word */ +#define E1000_EEPROM_RW_REG_DATA 16 /* Offset to data in EEPROM read/write registers */ +#define E1000_EEPROM_RW_REG_DONE 0x10 /* Offset to READ/WRITE done bit */ +#define E1000_EEPROM_RW_REG_START 1 /* First bit for telling part to start operation */ +#define E1000_EEPROM_RW_ADDR_SHIFT 8 /* Shift to the address bits */ +#define E1000_EEPROM_POLL_WRITE 1 /* Flag for polling for write complete */ +#define E1000_EEPROM_POLL_READ 0 /* Flag for polling for read complete */ +/* Register Bit Masks */ +/* Device Control */ +#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ +#define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */ +#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ +#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */ +#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ +#define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */ +#define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */ +#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ +#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ +#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ +#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ +#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ +#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ +#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ +#define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */ +#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ +#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ +#define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ +#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ +#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ +#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ +#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ +#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ +#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ +#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ +#define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ +#define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ +#define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ +#define E1000_CTRL_RST 0x04000000 /* Global reset */ +#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ +#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ +#define E1000_CTRL_RTE 0x20000000 /* Routing tag enable */ +#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ +#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ +#define E1000_CTRL_SW2FW_INT 0x02000000 /* Initiate an interrupt to manageability engine */ + +/* Device Status */ +#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ +#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ +#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ +#define E1000_STATUS_FUNC_SHIFT 2 +#define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */ +#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ +#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define E1000_STATUS_TBIMODE 0x00000020 /* TBI mode */ +#define E1000_STATUS_SPEED_MASK 0x000000C0 +#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ +#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ +#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion + by EEPROM/Flash */ +#define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ +#define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */ +#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ +#define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ +#define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ +#define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ +#define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ +#define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ +#define E1000_STATUS_BMC_SKU_0 0x00100000 /* BMC USB redirect disabled */ +#define E1000_STATUS_BMC_SKU_1 0x00200000 /* BMC SRAM disabled */ +#define E1000_STATUS_BMC_SKU_2 0x00400000 /* BMC SDRAM disabled */ +#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */ +#define E1000_STATUS_BMC_LITE 0x01000000 /* BMC external code execution disabled */ +#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */ +#define E1000_STATUS_FUSE_8 0x04000000 +#define E1000_STATUS_FUSE_9 0x08000000 +#define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ +#define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ + +/* EEPROM/Flash Control */ +#define E1000_EECD_SK 0x00000001 /* EEPROM Clock */ +#define E1000_EECD_CS 0x00000002 /* EEPROM Chip Select */ +#define E1000_EECD_DI 0x00000004 /* EEPROM Data In */ +#define E1000_EECD_DO 0x00000008 /* EEPROM Data Out */ +#define E1000_EECD_FWE_MASK 0x00000030 +#define E1000_EECD_FWE_DIS 0x00000010 /* Disable FLASH writes */ +#define E1000_EECD_FWE_EN 0x00000020 /* Enable FLASH writes */ +#define E1000_EECD_FWE_SHIFT 4 +#define E1000_EECD_REQ 0x00000040 /* EEPROM Access Request */ +#define E1000_EECD_GNT 0x00000080 /* EEPROM Access Grant */ +#define E1000_EECD_PRES 0x00000100 /* EEPROM Present */ +#define E1000_EECD_SIZE 0x00000200 /* EEPROM Size (0=64 word 1=256 word) */ +#define E1000_EECD_ADDR_BITS 0x00000400 /* EEPROM Addressing bits based on type + * (0-small, 1-large) */ +#define E1000_EECD_TYPE 0x00002000 /* EEPROM Type (1-SPI, 0-Microwire) */ +#ifndef E1000_EEPROM_GRANT_ATTEMPTS +#define E1000_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM # attempts to gain grant */ +#endif +#define E1000_EECD_AUTO_RD 0x00000200 /* EEPROM Auto Read done */ +#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* EEprom Size */ +#define E1000_EECD_SIZE_EX_SHIFT 11 +#define E1000_EECD_NVADDS 0x00018000 /* NVM Address Size */ +#define E1000_EECD_SELSHAD 0x00020000 /* Select Shadow RAM */ +#define E1000_EECD_INITSRAM 0x00040000 /* Initialize Shadow RAM */ +#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ +#define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ +#define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ +#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ +#define E1000_EECD_SECVAL_SHIFT 22 +#define E1000_STM_OPCODE 0xDB00 +#define E1000_HICR_FW_RESET 0xC0 + +#define E1000_SHADOW_RAM_WORDS 2048 +#define E1000_ICH_NVM_SIG_WORD 0x13 +#define E1000_ICH_NVM_SIG_MASK 0xC0 + +/* MDI Control */ +#define E1000_MDIC_DATA_MASK 0x0000FFFF +#define E1000_MDIC_REG_MASK 0x001F0000 +#define E1000_MDIC_REG_SHIFT 16 +#define E1000_MDIC_PHY_MASK 0x03E00000 +#define E1000_MDIC_PHY_SHIFT 21 +#define E1000_MDIC_OP_WRITE 0x04000000 +#define E1000_MDIC_OP_READ 0x08000000 +#define E1000_MDIC_READY 0x10000000 +#define E1000_MDIC_INT_EN 0x20000000 +#define E1000_MDIC_ERROR 0x40000000 + +/* EEPROM Commands - Microwire */ +#define EEPROM_READ_OPCODE_MICROWIRE 0x6 /* EEPROM read opcode */ +#define EEPROM_WRITE_OPCODE_MICROWIRE 0x5 /* EEPROM write opcode */ +#define EEPROM_ERASE_OPCODE_MICROWIRE 0x7 /* EEPROM erase opcode */ +#define EEPROM_EWEN_OPCODE_MICROWIRE 0x13 /* EEPROM erase/write enable */ +#define EEPROM_EWDS_OPCODE_MICROWIRE 0x10 /* EEPROM erast/write disable */ + +/* EEPROM Word Offsets */ +#define EEPROM_COMPAT 0x0003 +#define EEPROM_ID_LED_SETTINGS 0x0004 +#define EEPROM_VERSION 0x0005 +#define EEPROM_SERDES_AMPLITUDE 0x0006 /* For SERDES output amplitude adjustment. */ +#define EEPROM_PHY_CLASS_WORD 0x0007 +#define EEPROM_INIT_CONTROL1_REG 0x000A +#define EEPROM_INIT_CONTROL2_REG 0x000F +#define EEPROM_SWDEF_PINS_CTRL_PORT_1 0x0010 +#define EEPROM_INIT_CONTROL3_PORT_B 0x0014 +#define EEPROM_INIT_3GIO_3 0x001A +#define EEPROM_SWDEF_PINS_CTRL_PORT_0 0x0020 +#define EEPROM_INIT_CONTROL3_PORT_A 0x0024 +#define EEPROM_CFG 0x0012 +#define EEPROM_FLASH_VERSION 0x0032 +#define EEPROM_CHECKSUM_REG 0x003F + +#define E1000_EEPROM_CFG_DONE 0x00040000 /* MNG config cycle done */ +#define E1000_EEPROM_CFG_DONE_PORT_1 0x00080000 /* ...for second port */ + +/* Transmit Descriptor */ +struct e1000_tx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t cso; /* Checksum offset */ + uint8_t cmd; /* Descriptor control */ + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t css; /* Checksum start */ + uint16_t special; + } fields; + } upper; +}; + +/* Transmit Descriptor bit definitions */ +#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ +#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ +#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ +#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ +#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ +#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ +#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ +#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ +#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ +#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ +#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ +#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ +#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ + +/* Transmit Control */ +#define E1000_TCTL_RST 0x00000001 /* software reset */ +#define E1000_TCTL_EN 0x00000002 /* enable tx */ +#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ +#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ +#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ +#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ +#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ +#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ +#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ +#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ + +/* Receive Descriptor */ +struct e1000_rx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + uint16_t length; /* Length of data DMAed into data buffer */ + uint16_t csum; /* Packet checksum */ + uint8_t status; /* Descriptor status */ + uint8_t errors; /* Descriptor Errors */ + uint16_t special; +}; + +/* Receive Descriptor bit definitions */ +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ +#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ +#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ +#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum caculated */ +#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ +#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ +#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ +#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ +#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ +#define E1000_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ +#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ +#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ +#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ +#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ +#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ +#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ +#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ +#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ +#define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ +#define E1000_RXD_SPC_PRI_SHIFT 13 +#define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ +#define E1000_RXD_SPC_CFI_SHIFT 12 + +#define E1000_RXDEXT_STATERR_CE 0x01000000 +#define E1000_RXDEXT_STATERR_SE 0x02000000 +#define E1000_RXDEXT_STATERR_SEQ 0x04000000 +#define E1000_RXDEXT_STATERR_CXE 0x10000000 +#define E1000_RXDEXT_STATERR_TCPE 0x20000000 +#define E1000_RXDEXT_STATERR_IPE 0x40000000 +#define E1000_RXDEXT_STATERR_RXE 0x80000000 + +#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 +#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF + +/* Receive Address */ +#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ + +/* Offload Context Descriptor */ +struct e1000_context_desc { + union { + uint32_t ip_config; + struct { + uint8_t ipcss; /* IP checksum start */ + uint8_t ipcso; /* IP checksum offset */ + uint16_t ipcse; /* IP checksum end */ + } ip_fields; + } lower_setup; + union { + uint32_t tcp_config; + struct { + uint8_t tucss; /* TCP checksum start */ + uint8_t tucso; /* TCP checksum offset */ + uint16_t tucse; /* TCP checksum end */ + } tcp_fields; + } upper_setup; + uint32_t cmd_and_length; /* */ + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t hdr_len; /* Header length */ + uint16_t mss; /* Maximum segment size */ + } fields; + } tcp_seg_setup; +}; + +/* Offload data descriptor */ +struct e1000_data_desc { + uint64_t buffer_addr; /* Address of the descriptor's buffer address */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t typ_len_ext; /* */ + uint8_t cmd; /* */ + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t popts; /* Packet Options */ + uint16_t special; /* */ + } fields; + } upper; +}; + +/* Management Control */ +#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ +#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ +#define E1000_MANC_R_ON_FORCE 0x00000004 /* Reset on Force TCO - RO */ +#define E1000_MANC_RMCP_EN 0x00000100 /* Enable RCMP 026Fh Filtering */ +#define E1000_MANC_0298_EN 0x00000200 /* Enable RCMP 0298h Filtering */ +#define E1000_MANC_IPV4_EN 0x00000400 /* Enable IPv4 */ +#define E1000_MANC_IPV6_EN 0x00000800 /* Enable IPv6 */ +#define E1000_MANC_SNAP_EN 0x00001000 /* Accept LLC/SNAP */ +#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ +#define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery + * Filtering */ +#define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */ +#define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ +#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ +#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ +#define E1000_MANC_RCV_ALL 0x00080000 /* Receive All Enabled */ +#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ +#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 /* Enable MAC address + * filtering */ +#define E1000_MANC_EN_MNG2HOST 0x00200000 /* Enable MNG packets to host + * memory */ +#define E1000_MANC_EN_IP_ADDR_FILTER 0x00400000 /* Enable IP address + * filtering */ +#define E1000_MANC_EN_XSUM_FILTER 0x00800000 /* Enable checksum filtering */ +#define E1000_MANC_BR_EN 0x01000000 /* Enable broadcast filtering */ +#define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */ +#define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */ +#define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */ +#define E1000_MANC_SMB_DATA_IN 0x08000000 /* SMBus Data In */ +#define E1000_MANC_SMB_DATA_OUT 0x10000000 /* SMBus Data Out */ +#define E1000_MANC_SMB_CLK_OUT 0x20000000 /* SMBus Clock Out */ + +#define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ +#define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ + +/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ +#define EEPROM_SUM 0xBABA + +#endif /* _E1000_HW_H_ */ diff --git a/qemu/hw/net/eepro100.c b/qemu/hw/net/eepro100.c new file mode 100644 index 000000000..60333b7fc --- /dev/null +++ b/qemu/hw/net/eepro100.c @@ -0,0 +1,2098 @@ +/* + * QEMU i8255x (PRO100) emulation + * + * Copyright (C) 2006-2011 Stefan Weil + * + * Portions of the code are copies from grub / etherboot eepro100.c + * and linux e100.c. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Tested features (i82559): + * PXE boot (i386 guest, i386 / mips / mipsel / ppc host) ok + * Linux networking (i386) ok + * + * Untested: + * Windows networking + * + * References: + * + * Intel 8255x 10/100 Mbps Ethernet Controller Family + * Open Source Software Developer Manual + * + * TODO: + * * PHY emulation should be separated from nic emulation. + * Most nic emulations could share the same phy code. + * * i82550 is untested. It is programmed like the i82559. + * * i82562 is untested. It is programmed like the i82559. + * * Power management (i82558 and later) is not implemented. + * * Wake-on-LAN is not implemented. + */ + +#include <stddef.h> /* offsetof */ +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "net/net.h" +#include "hw/nvram/eeprom93xx.h" +#include "sysemu/sysemu.h" +#include "sysemu/dma.h" +#include "qemu/bitops.h" + +/* QEMU sends frames smaller than 60 bytes to ethernet nics. + * Such frames are rejected by real nics and their emulations. + * To avoid this behaviour, other nic emulations pad received + * frames. The following definition enables this padding for + * eepro100, too. We keep the define around in case it might + * become useful the future if the core networking is ever + * changed to pad short packets itself. */ +#define CONFIG_PAD_RECEIVED_FRAMES + +#define KiB 1024 + +/* Debug EEPRO100 card. */ +#if 0 +# define DEBUG_EEPRO100 +#endif + +#ifdef DEBUG_EEPRO100 +#define logout(fmt, ...) fprintf(stderr, "EE100\t%-24s" fmt, __func__, ## __VA_ARGS__) +#else +#define logout(fmt, ...) ((void)0) +#endif + +/* Set flags to 0 to disable debug output. */ +#define INT 1 /* interrupt related actions */ +#define MDI 1 /* mdi related actions */ +#define OTHER 1 +#define RXTX 1 +#define EEPROM 1 /* eeprom related actions */ + +#define TRACE(flag, command) ((flag) ? (command) : (void)0) + +#define missing(text) fprintf(stderr, "eepro100: feature is missing in this emulation: " text "\n") + +#define MAX_ETH_FRAME_SIZE 1514 + +/* This driver supports several different devices which are declared here. */ +#define i82550 0x82550 +#define i82551 0x82551 +#define i82557A 0x82557a +#define i82557B 0x82557b +#define i82557C 0x82557c +#define i82558A 0x82558a +#define i82558B 0x82558b +#define i82559A 0x82559a +#define i82559B 0x82559b +#define i82559C 0x82559c +#define i82559ER 0x82559e +#define i82562 0x82562 +#define i82801 0x82801 + +/* Use 64 word EEPROM. TODO: could be a runtime option. */ +#define EEPROM_SIZE 64 + +#define PCI_MEM_SIZE (4 * KiB) +#define PCI_IO_SIZE 64 +#define PCI_FLASH_SIZE (128 * KiB) + +#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) + +/* The SCB accepts the following controls for the Tx and Rx units: */ +#define CU_NOP 0x0000 /* No operation. */ +#define CU_START 0x0010 /* CU start. */ +#define CU_RESUME 0x0020 /* CU resume. */ +#define CU_STATSADDR 0x0040 /* Load dump counters address. */ +#define CU_SHOWSTATS 0x0050 /* Dump statistical counters. */ +#define CU_CMD_BASE 0x0060 /* Load CU base address. */ +#define CU_DUMPSTATS 0x0070 /* Dump and reset statistical counters. */ +#define CU_SRESUME 0x00a0 /* CU static resume. */ + +#define RU_NOP 0x0000 +#define RX_START 0x0001 +#define RX_RESUME 0x0002 +#define RU_ABORT 0x0004 +#define RX_ADDR_LOAD 0x0006 +#define RX_RESUMENR 0x0007 +#define INT_MASK 0x0100 +#define DRVR_INT 0x0200 /* Driver generated interrupt. */ + +typedef struct { + const char *name; + const char *desc; + uint16_t device_id; + uint8_t revision; + uint16_t subsystem_vendor_id; + uint16_t subsystem_id; + + uint32_t device; + uint8_t stats_size; + bool has_extended_tcb_support; + bool power_management; +} E100PCIDeviceInfo; + +/* Offsets to the various registers. + All accesses need not be longword aligned. */ +typedef enum { + SCBStatus = 0, /* Status Word. */ + SCBAck = 1, + SCBCmd = 2, /* Rx/Command Unit command and status. */ + SCBIntmask = 3, + SCBPointer = 4, /* General purpose pointer. */ + SCBPort = 8, /* Misc. commands and operands. */ + SCBflash = 12, /* Flash memory control. */ + SCBeeprom = 14, /* EEPROM control. */ + SCBCtrlMDI = 16, /* MDI interface control. */ + SCBEarlyRx = 20, /* Early receive byte count. */ + SCBFlow = 24, /* Flow Control. */ + SCBpmdr = 27, /* Power Management Driver. */ + SCBgctrl = 28, /* General Control. */ + SCBgstat = 29, /* General Status. */ +} E100RegisterOffset; + +/* A speedo3 transmit buffer descriptor with two buffers... */ +typedef struct { + uint16_t status; + uint16_t command; + uint32_t link; /* void * */ + uint32_t tbd_array_addr; /* transmit buffer descriptor array address. */ + uint16_t tcb_bytes; /* transmit command block byte count (in lower 14 bits */ + uint8_t tx_threshold; /* transmit threshold */ + uint8_t tbd_count; /* TBD number */ +#if 0 + /* This constitutes two "TBD" entries: hdr and data */ + uint32_t tx_buf_addr0; /* void *, header of frame to be transmitted. */ + int32_t tx_buf_size0; /* Length of Tx hdr. */ + uint32_t tx_buf_addr1; /* void *, data to be transmitted. */ + int32_t tx_buf_size1; /* Length of Tx data. */ +#endif +} eepro100_tx_t; + +/* Receive frame descriptor. */ +typedef struct { + int16_t status; + uint16_t command; + uint32_t link; /* struct RxFD * */ + uint32_t rx_buf_addr; /* void * */ + uint16_t count; + uint16_t size; + /* Ethernet frame data follows. */ +} eepro100_rx_t; + +typedef enum { + COMMAND_EL = BIT(15), + COMMAND_S = BIT(14), + COMMAND_I = BIT(13), + COMMAND_NC = BIT(4), + COMMAND_SF = BIT(3), + COMMAND_CMD = BITS(2, 0), +} scb_command_bit; + +typedef enum { + STATUS_C = BIT(15), + STATUS_OK = BIT(13), +} scb_status_bit; + +typedef struct { + uint32_t tx_good_frames, tx_max_collisions, tx_late_collisions, + tx_underruns, tx_lost_crs, tx_deferred, tx_single_collisions, + tx_multiple_collisions, tx_total_collisions; + uint32_t rx_good_frames, rx_crc_errors, rx_alignment_errors, + rx_resource_errors, rx_overrun_errors, rx_cdt_errors, + rx_short_frame_errors; + uint32_t fc_xmt_pause, fc_rcv_pause, fc_rcv_unsupported; + uint16_t xmt_tco_frames, rcv_tco_frames; + /* TODO: i82559 has six reserved statistics but a total of 24 dwords. */ + uint32_t reserved[4]; +} eepro100_stats_t; + +typedef enum { + cu_idle = 0, + cu_suspended = 1, + cu_active = 2, + cu_lpq_active = 2, + cu_hqp_active = 3 +} cu_state_t; + +typedef enum { + ru_idle = 0, + ru_suspended = 1, + ru_no_resources = 2, + ru_ready = 4 +} ru_state_t; + +typedef struct { + PCIDevice dev; + /* Hash register (multicast mask array, multiple individual addresses). */ + uint8_t mult[8]; + MemoryRegion mmio_bar; + MemoryRegion io_bar; + MemoryRegion flash_bar; + NICState *nic; + NICConf conf; + uint8_t scb_stat; /* SCB stat/ack byte */ + uint8_t int_stat; /* PCI interrupt status */ + /* region must not be saved by nic_save. */ + uint16_t mdimem[32]; + eeprom_t *eeprom; + uint32_t device; /* device variant */ + /* (cu_base + cu_offset) address the next command block in the command block list. */ + uint32_t cu_base; /* CU base address */ + uint32_t cu_offset; /* CU address offset */ + /* (ru_base + ru_offset) address the RFD in the Receive Frame Area. */ + uint32_t ru_base; /* RU base address */ + uint32_t ru_offset; /* RU address offset */ + uint32_t statsaddr; /* pointer to eepro100_stats_t */ + + /* Temporary status information (no need to save these values), + * used while processing CU commands. */ + eepro100_tx_t tx; /* transmit buffer descriptor */ + uint32_t cb_address; /* = cu_base + cu_offset */ + + /* Statistical counters. Also used for wake-up packet (i82559). */ + eepro100_stats_t statistics; + + /* Data in mem is always in the byte order of the controller (le). + * It must be dword aligned to allow direct access to 32 bit values. */ + uint8_t mem[PCI_MEM_SIZE] __attribute__((aligned(8))); + + /* Configuration bytes. */ + uint8_t configuration[22]; + + /* vmstate for each particular nic */ + VMStateDescription *vmstate; + + /* Quasi static device properties (no need to save them). */ + uint16_t stats_size; + bool has_extended_tcb_support; +} EEPRO100State; + +/* Word indices in EEPROM. */ +typedef enum { + EEPROM_CNFG_MDIX = 0x03, + EEPROM_ID = 0x05, + EEPROM_PHY_ID = 0x06, + EEPROM_VENDOR_ID = 0x0c, + EEPROM_CONFIG_ASF = 0x0d, + EEPROM_DEVICE_ID = 0x23, + EEPROM_SMBUS_ADDR = 0x90, +} EEPROMOffset; + +/* Bit values for EEPROM ID word. */ +typedef enum { + EEPROM_ID_MDM = BIT(0), /* Modem */ + EEPROM_ID_STB = BIT(1), /* Standby Enable */ + EEPROM_ID_WMR = BIT(2), /* ??? */ + EEPROM_ID_WOL = BIT(5), /* Wake on LAN */ + EEPROM_ID_DPD = BIT(6), /* Deep Power Down */ + EEPROM_ID_ALT = BIT(7), /* */ + /* BITS(10, 8) device revision */ + EEPROM_ID_BD = BIT(11), /* boot disable */ + EEPROM_ID_ID = BIT(13), /* id bit */ + /* BITS(15, 14) signature */ + EEPROM_ID_VALID = BIT(14), /* signature for valid eeprom */ +} eeprom_id_bit; + +/* Default values for MDI (PHY) registers */ +static const uint16_t eepro100_mdi_default[] = { + /* MDI Registers 0 - 6, 7 */ + 0x3000, 0x780d, 0x02a8, 0x0154, 0x05e1, 0x0000, 0x0000, 0x0000, + /* MDI Registers 8 - 15 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* MDI Registers 16 - 31 */ + 0x0003, 0x0000, 0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +/* Readonly mask for MDI (PHY) registers */ +static const uint16_t eepro100_mdi_mask[] = { + 0x0000, 0xffff, 0xffff, 0xffff, 0xc01f, 0xffff, 0xffff, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0fff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +#define POLYNOMIAL 0x04c11db6 + +static E100PCIDeviceInfo *eepro100_get_class(EEPRO100State *s); + +/* From FreeBSD (locally modified). */ +static unsigned e100_compute_mcast_idx(const uint8_t *ep) +{ + uint32_t crc; + int carry, i, j; + uint8_t b; + + crc = 0xffffffff; + for (i = 0; i < 6; i++) { + b = *ep++; + for (j = 0; j < 8; j++) { + carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01); + crc <<= 1; + b >>= 1; + if (carry) { + crc = ((crc ^ POLYNOMIAL) | carry); + } + } + } + return (crc & BITS(7, 2)) >> 2; +} + +/* Read a 16 bit control/status (CSR) register. */ +static uint16_t e100_read_reg2(EEPRO100State *s, E100RegisterOffset addr) +{ + assert(!((uintptr_t)&s->mem[addr] & 1)); + return le16_to_cpup((uint16_t *)&s->mem[addr]); +} + +/* Read a 32 bit control/status (CSR) register. */ +static uint32_t e100_read_reg4(EEPRO100State *s, E100RegisterOffset addr) +{ + assert(!((uintptr_t)&s->mem[addr] & 3)); + return le32_to_cpup((uint32_t *)&s->mem[addr]); +} + +/* Write a 16 bit control/status (CSR) register. */ +static void e100_write_reg2(EEPRO100State *s, E100RegisterOffset addr, + uint16_t val) +{ + assert(!((uintptr_t)&s->mem[addr] & 1)); + cpu_to_le16w((uint16_t *)&s->mem[addr], val); +} + +/* Read a 32 bit control/status (CSR) register. */ +static void e100_write_reg4(EEPRO100State *s, E100RegisterOffset addr, + uint32_t val) +{ + assert(!((uintptr_t)&s->mem[addr] & 3)); + cpu_to_le32w((uint32_t *)&s->mem[addr], val); +} + +#if defined(DEBUG_EEPRO100) +static const char *nic_dump(const uint8_t * buf, unsigned size) +{ + static char dump[3 * 16 + 1]; + char *p = &dump[0]; + if (size > 16) { + size = 16; + } + while (size-- > 0) { + p += sprintf(p, " %02x", *buf++); + } + return dump; +} +#endif /* DEBUG_EEPRO100 */ + +enum scb_stat_ack { + stat_ack_not_ours = 0x00, + stat_ack_sw_gen = 0x04, + stat_ack_rnr = 0x10, + stat_ack_cu_idle = 0x20, + stat_ack_frame_rx = 0x40, + stat_ack_cu_cmd_done = 0x80, + stat_ack_not_present = 0xFF, + stat_ack_rx = (stat_ack_sw_gen | stat_ack_rnr | stat_ack_frame_rx), + stat_ack_tx = (stat_ack_cu_idle | stat_ack_cu_cmd_done), +}; + +static void disable_interrupt(EEPRO100State * s) +{ + if (s->int_stat) { + TRACE(INT, logout("interrupt disabled\n")); + pci_irq_deassert(&s->dev); + s->int_stat = 0; + } +} + +static void enable_interrupt(EEPRO100State * s) +{ + if (!s->int_stat) { + TRACE(INT, logout("interrupt enabled\n")); + pci_irq_assert(&s->dev); + s->int_stat = 1; + } +} + +static void eepro100_acknowledge(EEPRO100State * s) +{ + s->scb_stat &= ~s->mem[SCBAck]; + s->mem[SCBAck] = s->scb_stat; + if (s->scb_stat == 0) { + disable_interrupt(s); + } +} + +static void eepro100_interrupt(EEPRO100State * s, uint8_t status) +{ + uint8_t mask = ~s->mem[SCBIntmask]; + s->mem[SCBAck] |= status; + status = s->scb_stat = s->mem[SCBAck]; + status &= (mask | 0x0f); +#if 0 + status &= (~s->mem[SCBIntmask] | 0x0xf); +#endif + if (status && (mask & 0x01)) { + /* SCB mask and SCB Bit M do not disable interrupt. */ + enable_interrupt(s); + } else if (s->int_stat) { + disable_interrupt(s); + } +} + +static void eepro100_cx_interrupt(EEPRO100State * s) +{ + /* CU completed action command. */ + /* Transmit not ok (82557 only, not in emulation). */ + eepro100_interrupt(s, 0x80); +} + +static void eepro100_cna_interrupt(EEPRO100State * s) +{ + /* CU left the active state. */ + eepro100_interrupt(s, 0x20); +} + +static void eepro100_fr_interrupt(EEPRO100State * s) +{ + /* RU received a complete frame. */ + eepro100_interrupt(s, 0x40); +} + +static void eepro100_rnr_interrupt(EEPRO100State * s) +{ + /* RU is not ready. */ + eepro100_interrupt(s, 0x10); +} + +static void eepro100_mdi_interrupt(EEPRO100State * s) +{ + /* MDI completed read or write cycle. */ + eepro100_interrupt(s, 0x08); +} + +static void eepro100_swi_interrupt(EEPRO100State * s) +{ + /* Software has requested an interrupt. */ + eepro100_interrupt(s, 0x04); +} + +#if 0 +static void eepro100_fcp_interrupt(EEPRO100State * s) +{ + /* Flow control pause interrupt (82558 and later). */ + eepro100_interrupt(s, 0x01); +} +#endif + +static void e100_pci_reset(EEPRO100State * s) +{ + E100PCIDeviceInfo *info = eepro100_get_class(s); + uint32_t device = s->device; + uint8_t *pci_conf = s->dev.config; + + TRACE(OTHER, logout("%p\n", s)); + + /* PCI Status */ + pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_DEVSEL_MEDIUM | + PCI_STATUS_FAST_BACK); + /* PCI Latency Timer */ + pci_set_byte(pci_conf + PCI_LATENCY_TIMER, 0x20); /* latency timer = 32 clocks */ + /* Capability Pointer is set by PCI framework. */ + /* Interrupt Line */ + /* Interrupt Pin */ + pci_set_byte(pci_conf + PCI_INTERRUPT_PIN, 1); /* interrupt pin A */ + /* Minimum Grant */ + pci_set_byte(pci_conf + PCI_MIN_GNT, 0x08); + /* Maximum Latency */ + pci_set_byte(pci_conf + PCI_MAX_LAT, 0x18); + + s->stats_size = info->stats_size; + s->has_extended_tcb_support = info->has_extended_tcb_support; + + switch (device) { + case i82550: + case i82551: + case i82557A: + case i82557B: + case i82557C: + case i82558A: + case i82558B: + case i82559A: + case i82559B: + case i82559ER: + case i82562: + case i82801: + case i82559C: + break; + default: + logout("Device %X is undefined!\n", device); + } + + /* Standard TxCB. */ + s->configuration[6] |= BIT(4); + + /* Standard statistical counters. */ + s->configuration[6] |= BIT(5); + + if (s->stats_size == 80) { + /* TODO: check TCO Statistical Counters bit. Documentation not clear. */ + if (s->configuration[6] & BIT(2)) { + /* TCO statistical counters. */ + assert(s->configuration[6] & BIT(5)); + } else { + if (s->configuration[6] & BIT(5)) { + /* No extended statistical counters, i82557 compatible. */ + s->stats_size = 64; + } else { + /* i82558 compatible. */ + s->stats_size = 76; + } + } + } else { + if (s->configuration[6] & BIT(5)) { + /* No extended statistical counters. */ + s->stats_size = 64; + } + } + assert(s->stats_size > 0 && s->stats_size <= sizeof(s->statistics)); + + if (info->power_management) { + /* Power Management Capabilities */ + int cfg_offset = 0xdc; + int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM, + cfg_offset, PCI_PM_SIZEOF); + assert(r >= 0); + pci_set_word(pci_conf + cfg_offset + PCI_PM_PMC, 0x7e21); +#if 0 /* TODO: replace dummy code for power management emulation. */ + /* TODO: Power Management Control / Status. */ + pci_set_word(pci_conf + cfg_offset + PCI_PM_CTRL, 0x0000); + /* TODO: Ethernet Power Consumption Registers (i82559 and later). */ + pci_set_byte(pci_conf + cfg_offset + PCI_PM_PPB_EXTENSIONS, 0x0000); +#endif + } + +#if EEPROM_SIZE > 0 + if (device == i82557C || device == i82558B || device == i82559C) { + /* + TODO: get vendor id from EEPROM for i82557C or later. + TODO: get device id from EEPROM for i82557C or later. + TODO: status bit 4 can be disabled by EEPROM for i82558, i82559. + TODO: header type is determined by EEPROM for i82559. + TODO: get subsystem id from EEPROM for i82557C or later. + TODO: get subsystem vendor id from EEPROM for i82557C or later. + TODO: exp. rom baddr depends on a bit in EEPROM for i82558 or later. + TODO: capability pointer depends on EEPROM for i82558. + */ + logout("Get device id and revision from EEPROM!!!\n"); + } +#endif /* EEPROM_SIZE > 0 */ +} + +static void nic_selective_reset(EEPRO100State * s) +{ + size_t i; + uint16_t *eeprom_contents = eeprom93xx_data(s->eeprom); +#if 0 + eeprom93xx_reset(s->eeprom); +#endif + memcpy(eeprom_contents, s->conf.macaddr.a, 6); + eeprom_contents[EEPROM_ID] = EEPROM_ID_VALID; + if (s->device == i82557B || s->device == i82557C) + eeprom_contents[5] = 0x0100; + eeprom_contents[EEPROM_PHY_ID] = 1; + uint16_t sum = 0; + for (i = 0; i < EEPROM_SIZE - 1; i++) { + sum += eeprom_contents[i]; + } + eeprom_contents[EEPROM_SIZE - 1] = 0xbaba - sum; + TRACE(EEPROM, logout("checksum=0x%04x\n", eeprom_contents[EEPROM_SIZE - 1])); + + memset(s->mem, 0, sizeof(s->mem)); + e100_write_reg4(s, SCBCtrlMDI, BIT(21)); + + assert(sizeof(s->mdimem) == sizeof(eepro100_mdi_default)); + memcpy(&s->mdimem[0], &eepro100_mdi_default[0], sizeof(s->mdimem)); +} + +static void nic_reset(void *opaque) +{ + EEPRO100State *s = opaque; + TRACE(OTHER, logout("%p\n", s)); + /* TODO: Clearing of hash register for selective reset, too? */ + memset(&s->mult[0], 0, sizeof(s->mult)); + nic_selective_reset(s); +} + +#if defined(DEBUG_EEPRO100) +static const char * const e100_reg[PCI_IO_SIZE / 4] = { + "Command/Status", + "General Pointer", + "Port", + "EEPROM/Flash Control", + "MDI Control", + "Receive DMA Byte Count", + "Flow Control", + "General Status/Control" +}; + +static char *regname(uint32_t addr) +{ + static char buf[32]; + if (addr < PCI_IO_SIZE) { + const char *r = e100_reg[addr / 4]; + if (r != 0) { + snprintf(buf, sizeof(buf), "%s+%u", r, addr % 4); + } else { + snprintf(buf, sizeof(buf), "0x%02x", addr); + } + } else { + snprintf(buf, sizeof(buf), "??? 0x%08x", addr); + } + return buf; +} +#endif /* DEBUG_EEPRO100 */ + +/***************************************************************************** + * + * Command emulation. + * + ****************************************************************************/ + +#if 0 +static uint16_t eepro100_read_command(EEPRO100State * s) +{ + uint16_t val = 0xffff; + TRACE(OTHER, logout("val=0x%04x\n", val)); + return val; +} +#endif + +/* Commands that can be put in a command list entry. */ +enum commands { + CmdNOp = 0, + CmdIASetup = 1, + CmdConfigure = 2, + CmdMulticastList = 3, + CmdTx = 4, + CmdTDR = 5, /* load microcode */ + CmdDump = 6, + CmdDiagnose = 7, + + /* And some extra flags: */ + CmdSuspend = 0x4000, /* Suspend after completion. */ + CmdIntr = 0x2000, /* Interrupt after completion. */ + CmdTxFlex = 0x0008, /* Use "Flexible mode" for CmdTx command. */ +}; + +static cu_state_t get_cu_state(EEPRO100State * s) +{ + return ((s->mem[SCBStatus] & BITS(7, 6)) >> 6); +} + +static void set_cu_state(EEPRO100State * s, cu_state_t state) +{ + s->mem[SCBStatus] = (s->mem[SCBStatus] & ~BITS(7, 6)) + (state << 6); +} + +static ru_state_t get_ru_state(EEPRO100State * s) +{ + return ((s->mem[SCBStatus] & BITS(5, 2)) >> 2); +} + +static void set_ru_state(EEPRO100State * s, ru_state_t state) +{ + s->mem[SCBStatus] = (s->mem[SCBStatus] & ~BITS(5, 2)) + (state << 2); +} + +static void dump_statistics(EEPRO100State * s) +{ + /* Dump statistical data. Most data is never changed by the emulation + * and always 0, so we first just copy the whole block and then those + * values which really matter. + * Number of data should check configuration!!! + */ + pci_dma_write(&s->dev, s->statsaddr, &s->statistics, s->stats_size); + stl_le_pci_dma(&s->dev, s->statsaddr + 0, + s->statistics.tx_good_frames); + stl_le_pci_dma(&s->dev, s->statsaddr + 36, + s->statistics.rx_good_frames); + stl_le_pci_dma(&s->dev, s->statsaddr + 48, + s->statistics.rx_resource_errors); + stl_le_pci_dma(&s->dev, s->statsaddr + 60, + s->statistics.rx_short_frame_errors); +#if 0 + stw_le_pci_dma(&s->dev, s->statsaddr + 76, s->statistics.xmt_tco_frames); + stw_le_pci_dma(&s->dev, s->statsaddr + 78, s->statistics.rcv_tco_frames); + missing("CU dump statistical counters"); +#endif +} + +static void read_cb(EEPRO100State *s) +{ + pci_dma_read(&s->dev, s->cb_address, &s->tx, sizeof(s->tx)); + s->tx.status = le16_to_cpu(s->tx.status); + s->tx.command = le16_to_cpu(s->tx.command); + s->tx.link = le32_to_cpu(s->tx.link); + s->tx.tbd_array_addr = le32_to_cpu(s->tx.tbd_array_addr); + s->tx.tcb_bytes = le16_to_cpu(s->tx.tcb_bytes); +} + +static void tx_command(EEPRO100State *s) +{ + uint32_t tbd_array = le32_to_cpu(s->tx.tbd_array_addr); + uint16_t tcb_bytes = (le16_to_cpu(s->tx.tcb_bytes) & 0x3fff); + /* Sends larger than MAX_ETH_FRAME_SIZE are allowed, up to 2600 bytes. */ + uint8_t buf[2600]; + uint16_t size = 0; + uint32_t tbd_address = s->cb_address + 0x10; + TRACE(RXTX, logout + ("transmit, TBD array address 0x%08x, TCB byte count 0x%04x, TBD count %u\n", + tbd_array, tcb_bytes, s->tx.tbd_count)); + + if (tcb_bytes > 2600) { + logout("TCB byte count too large, using 2600\n"); + tcb_bytes = 2600; + } + if (!((tcb_bytes > 0) || (tbd_array != 0xffffffff))) { + logout + ("illegal values of TBD array address and TCB byte count!\n"); + } + assert(tcb_bytes <= sizeof(buf)); + while (size < tcb_bytes) { + uint32_t tx_buffer_address = ldl_le_pci_dma(&s->dev, tbd_address); + uint16_t tx_buffer_size = lduw_le_pci_dma(&s->dev, tbd_address + 4); +#if 0 + uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev, tbd_address + 6); +#endif + tbd_address += 8; + TRACE(RXTX, logout + ("TBD (simplified mode): buffer address 0x%08x, size 0x%04x\n", + tx_buffer_address, tx_buffer_size)); + tx_buffer_size = MIN(tx_buffer_size, sizeof(buf) - size); + pci_dma_read(&s->dev, tx_buffer_address, &buf[size], tx_buffer_size); + size += tx_buffer_size; + } + if (tbd_array == 0xffffffff) { + /* Simplified mode. Was already handled by code above. */ + } else { + /* Flexible mode. */ + uint8_t tbd_count = 0; + if (s->has_extended_tcb_support && !(s->configuration[6] & BIT(4))) { + /* Extended Flexible TCB. */ + for (; tbd_count < 2; tbd_count++) { + uint32_t tx_buffer_address = ldl_le_pci_dma(&s->dev, + tbd_address); + uint16_t tx_buffer_size = lduw_le_pci_dma(&s->dev, + tbd_address + 4); + uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev, + tbd_address + 6); + tbd_address += 8; + TRACE(RXTX, logout + ("TBD (extended flexible mode): buffer address 0x%08x, size 0x%04x\n", + tx_buffer_address, tx_buffer_size)); + tx_buffer_size = MIN(tx_buffer_size, sizeof(buf) - size); + pci_dma_read(&s->dev, tx_buffer_address, + &buf[size], tx_buffer_size); + size += tx_buffer_size; + if (tx_buffer_el & 1) { + break; + } + } + } + tbd_address = tbd_array; + for (; tbd_count < s->tx.tbd_count; tbd_count++) { + uint32_t tx_buffer_address = ldl_le_pci_dma(&s->dev, tbd_address); + uint16_t tx_buffer_size = lduw_le_pci_dma(&s->dev, tbd_address + 4); + uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev, tbd_address + 6); + tbd_address += 8; + TRACE(RXTX, logout + ("TBD (flexible mode): buffer address 0x%08x, size 0x%04x\n", + tx_buffer_address, tx_buffer_size)); + tx_buffer_size = MIN(tx_buffer_size, sizeof(buf) - size); + pci_dma_read(&s->dev, tx_buffer_address, + &buf[size], tx_buffer_size); + size += tx_buffer_size; + if (tx_buffer_el & 1) { + break; + } + } + } + TRACE(RXTX, logout("%p sending frame, len=%d,%s\n", s, size, nic_dump(buf, size))); + qemu_send_packet(qemu_get_queue(s->nic), buf, size); + s->statistics.tx_good_frames++; + /* Transmit with bad status would raise an CX/TNO interrupt. + * (82557 only). Emulation never has bad status. */ +#if 0 + eepro100_cx_interrupt(s); +#endif +} + +static void set_multicast_list(EEPRO100State *s) +{ + uint16_t multicast_count = s->tx.tbd_array_addr & BITS(13, 0); + uint16_t i; + memset(&s->mult[0], 0, sizeof(s->mult)); + TRACE(OTHER, logout("multicast list, multicast count = %u\n", multicast_count)); + for (i = 0; i < multicast_count; i += 6) { + uint8_t multicast_addr[6]; + pci_dma_read(&s->dev, s->cb_address + 10 + i, multicast_addr, 6); + TRACE(OTHER, logout("multicast entry %s\n", nic_dump(multicast_addr, 6))); + unsigned mcast_idx = e100_compute_mcast_idx(multicast_addr); + assert(mcast_idx < 64); + s->mult[mcast_idx >> 3] |= (1 << (mcast_idx & 7)); + } +} + +static void action_command(EEPRO100State *s) +{ + for (;;) { + bool bit_el; + bool bit_s; + bool bit_i; + bool bit_nc; + uint16_t ok_status = STATUS_OK; + s->cb_address = s->cu_base + s->cu_offset; + read_cb(s); + bit_el = ((s->tx.command & COMMAND_EL) != 0); + bit_s = ((s->tx.command & COMMAND_S) != 0); + bit_i = ((s->tx.command & COMMAND_I) != 0); + bit_nc = ((s->tx.command & COMMAND_NC) != 0); +#if 0 + bool bit_sf = ((s->tx.command & COMMAND_SF) != 0); +#endif + s->cu_offset = s->tx.link; + TRACE(OTHER, + logout("val=(cu start), status=0x%04x, command=0x%04x, link=0x%08x\n", + s->tx.status, s->tx.command, s->tx.link)); + switch (s->tx.command & COMMAND_CMD) { + case CmdNOp: + /* Do nothing. */ + break; + case CmdIASetup: + pci_dma_read(&s->dev, s->cb_address + 8, &s->conf.macaddr.a[0], 6); + TRACE(OTHER, logout("macaddr: %s\n", nic_dump(&s->conf.macaddr.a[0], 6))); + break; + case CmdConfigure: + pci_dma_read(&s->dev, s->cb_address + 8, + &s->configuration[0], sizeof(s->configuration)); + TRACE(OTHER, logout("configuration: %s\n", + nic_dump(&s->configuration[0], 16))); + TRACE(OTHER, logout("configuration: %s\n", + nic_dump(&s->configuration[16], + ARRAY_SIZE(s->configuration) - 16))); + if (s->configuration[20] & BIT(6)) { + TRACE(OTHER, logout("Multiple IA bit\n")); + } + break; + case CmdMulticastList: + set_multicast_list(s); + break; + case CmdTx: + if (bit_nc) { + missing("CmdTx: NC = 0"); + ok_status = 0; + break; + } + tx_command(s); + break; + case CmdTDR: + TRACE(OTHER, logout("load microcode\n")); + /* Starting with offset 8, the command contains + * 64 dwords microcode which we just ignore here. */ + break; + case CmdDiagnose: + TRACE(OTHER, logout("diagnose\n")); + /* Make sure error flag is not set. */ + s->tx.status = 0; + break; + default: + missing("undefined command"); + ok_status = 0; + break; + } + /* Write new status. */ + stw_le_pci_dma(&s->dev, s->cb_address, + s->tx.status | ok_status | STATUS_C); + if (bit_i) { + /* CU completed action. */ + eepro100_cx_interrupt(s); + } + if (bit_el) { + /* CU becomes idle. Terminate command loop. */ + set_cu_state(s, cu_idle); + eepro100_cna_interrupt(s); + break; + } else if (bit_s) { + /* CU becomes suspended. Terminate command loop. */ + set_cu_state(s, cu_suspended); + eepro100_cna_interrupt(s); + break; + } else { + /* More entries in list. */ + TRACE(OTHER, logout("CU list with at least one more entry\n")); + } + } + TRACE(OTHER, logout("CU list empty\n")); + /* List is empty. Now CU is idle or suspended. */ +} + +static void eepro100_cu_command(EEPRO100State * s, uint8_t val) +{ + cu_state_t cu_state; + switch (val) { + case CU_NOP: + /* No operation. */ + break; + case CU_START: + cu_state = get_cu_state(s); + if (cu_state != cu_idle && cu_state != cu_suspended) { + /* Intel documentation says that CU must be idle or suspended + * for the CU start command. */ + logout("unexpected CU state is %u\n", cu_state); + } + set_cu_state(s, cu_active); + s->cu_offset = e100_read_reg4(s, SCBPointer); + action_command(s); + break; + case CU_RESUME: + if (get_cu_state(s) != cu_suspended) { + logout("bad CU resume from CU state %u\n", get_cu_state(s)); + /* Workaround for bad Linux eepro100 driver which resumes + * from idle state. */ +#if 0 + missing("cu resume"); +#endif + set_cu_state(s, cu_suspended); + } + if (get_cu_state(s) == cu_suspended) { + TRACE(OTHER, logout("CU resuming\n")); + set_cu_state(s, cu_active); + action_command(s); + } + break; + case CU_STATSADDR: + /* Load dump counters address. */ + s->statsaddr = e100_read_reg4(s, SCBPointer); + TRACE(OTHER, logout("val=0x%02x (dump counters address)\n", val)); + if (s->statsaddr & 3) { + /* Memory must be Dword aligned. */ + logout("unaligned dump counters address\n"); + /* Handling of misaligned addresses is undefined. + * Here we align the address by ignoring the lower bits. */ + /* TODO: Test unaligned dump counter address on real hardware. */ + s->statsaddr &= ~3; + } + break; + case CU_SHOWSTATS: + /* Dump statistical counters. */ + TRACE(OTHER, logout("val=0x%02x (dump stats)\n", val)); + dump_statistics(s); + stl_le_pci_dma(&s->dev, s->statsaddr + s->stats_size, 0xa005); + break; + case CU_CMD_BASE: + /* Load CU base. */ + TRACE(OTHER, logout("val=0x%02x (CU base address)\n", val)); + s->cu_base = e100_read_reg4(s, SCBPointer); + break; + case CU_DUMPSTATS: + /* Dump and reset statistical counters. */ + TRACE(OTHER, logout("val=0x%02x (dump stats and reset)\n", val)); + dump_statistics(s); + stl_le_pci_dma(&s->dev, s->statsaddr + s->stats_size, 0xa007); + memset(&s->statistics, 0, sizeof(s->statistics)); + break; + case CU_SRESUME: + /* CU static resume. */ + missing("CU static resume"); + break; + default: + missing("Undefined CU command"); + } +} + +static void eepro100_ru_command(EEPRO100State * s, uint8_t val) +{ + switch (val) { + case RU_NOP: + /* No operation. */ + break; + case RX_START: + /* RU start. */ + if (get_ru_state(s) != ru_idle) { + logout("RU state is %u, should be %u\n", get_ru_state(s), ru_idle); +#if 0 + assert(!"wrong RU state"); +#endif + } + set_ru_state(s, ru_ready); + s->ru_offset = e100_read_reg4(s, SCBPointer); + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + TRACE(OTHER, logout("val=0x%02x (rx start)\n", val)); + break; + case RX_RESUME: + /* Restart RU. */ + if (get_ru_state(s) != ru_suspended) { + logout("RU state is %u, should be %u\n", get_ru_state(s), + ru_suspended); +#if 0 + assert(!"wrong RU state"); +#endif + } + set_ru_state(s, ru_ready); + break; + case RU_ABORT: + /* RU abort. */ + if (get_ru_state(s) == ru_ready) { + eepro100_rnr_interrupt(s); + } + set_ru_state(s, ru_idle); + break; + case RX_ADDR_LOAD: + /* Load RU base. */ + TRACE(OTHER, logout("val=0x%02x (RU base address)\n", val)); + s->ru_base = e100_read_reg4(s, SCBPointer); + break; + default: + logout("val=0x%02x (undefined RU command)\n", val); + missing("Undefined SU command"); + } +} + +static void eepro100_write_command(EEPRO100State * s, uint8_t val) +{ + eepro100_ru_command(s, val & 0x0f); + eepro100_cu_command(s, val & 0xf0); + if ((val) == 0) { + TRACE(OTHER, logout("val=0x%02x\n", val)); + } + /* Clear command byte after command was accepted. */ + s->mem[SCBCmd] = 0; +} + +/***************************************************************************** + * + * EEPROM emulation. + * + ****************************************************************************/ + +#define EEPROM_CS 0x02 +#define EEPROM_SK 0x01 +#define EEPROM_DI 0x04 +#define EEPROM_DO 0x08 + +static uint16_t eepro100_read_eeprom(EEPRO100State * s) +{ + uint16_t val = e100_read_reg2(s, SCBeeprom); + if (eeprom93xx_read(s->eeprom)) { + val |= EEPROM_DO; + } else { + val &= ~EEPROM_DO; + } + TRACE(EEPROM, logout("val=0x%04x\n", val)); + return val; +} + +static void eepro100_write_eeprom(eeprom_t * eeprom, uint8_t val) +{ + TRACE(EEPROM, logout("val=0x%02x\n", val)); + + /* mask unwritable bits */ +#if 0 + val = SET_MASKED(val, 0x31, eeprom->value); +#endif + + int eecs = ((val & EEPROM_CS) != 0); + int eesk = ((val & EEPROM_SK) != 0); + int eedi = ((val & EEPROM_DI) != 0); + eeprom93xx_write(eeprom, eecs, eesk, eedi); +} + +/***************************************************************************** + * + * MDI emulation. + * + ****************************************************************************/ + +#if defined(DEBUG_EEPRO100) +static const char * const mdi_op_name[] = { + "opcode 0", + "write", + "read", + "opcode 3" +}; + +static const char * const mdi_reg_name[] = { + "Control", + "Status", + "PHY Identification (Word 1)", + "PHY Identification (Word 2)", + "Auto-Negotiation Advertisement", + "Auto-Negotiation Link Partner Ability", + "Auto-Negotiation Expansion" +}; + +static const char *reg2name(uint8_t reg) +{ + static char buffer[10]; + const char *p = buffer; + if (reg < ARRAY_SIZE(mdi_reg_name)) { + p = mdi_reg_name[reg]; + } else { + snprintf(buffer, sizeof(buffer), "reg=0x%02x", reg); + } + return p; +} +#endif /* DEBUG_EEPRO100 */ + +static uint32_t eepro100_read_mdi(EEPRO100State * s) +{ + uint32_t val = e100_read_reg4(s, SCBCtrlMDI); + +#ifdef DEBUG_EEPRO100 + uint8_t raiseint = (val & BIT(29)) >> 29; + uint8_t opcode = (val & BITS(27, 26)) >> 26; + uint8_t phy = (val & BITS(25, 21)) >> 21; + uint8_t reg = (val & BITS(20, 16)) >> 16; + uint16_t data = (val & BITS(15, 0)); +#endif + /* Emulation takes no time to finish MDI transaction. */ + val |= BIT(28); + TRACE(MDI, logout("val=0x%08x (int=%u, %s, phy=%u, %s, data=0x%04x\n", + val, raiseint, mdi_op_name[opcode], phy, + reg2name(reg), data)); + return val; +} + +static void eepro100_write_mdi(EEPRO100State *s) +{ + uint32_t val = e100_read_reg4(s, SCBCtrlMDI); + uint8_t raiseint = (val & BIT(29)) >> 29; + uint8_t opcode = (val & BITS(27, 26)) >> 26; + uint8_t phy = (val & BITS(25, 21)) >> 21; + uint8_t reg = (val & BITS(20, 16)) >> 16; + uint16_t data = (val & BITS(15, 0)); + TRACE(MDI, logout("val=0x%08x (int=%u, %s, phy=%u, %s, data=0x%04x\n", + val, raiseint, mdi_op_name[opcode], phy, reg2name(reg), data)); + if (phy != 1) { + /* Unsupported PHY address. */ +#if 0 + logout("phy must be 1 but is %u\n", phy); +#endif + data = 0; + } else if (opcode != 1 && opcode != 2) { + /* Unsupported opcode. */ + logout("opcode must be 1 or 2 but is %u\n", opcode); + data = 0; + } else if (reg > 6) { + /* Unsupported register. */ + logout("register must be 0...6 but is %u\n", reg); + data = 0; + } else { + TRACE(MDI, logout("val=0x%08x (int=%u, %s, phy=%u, %s, data=0x%04x\n", + val, raiseint, mdi_op_name[opcode], phy, + reg2name(reg), data)); + if (opcode == 1) { + /* MDI write */ + switch (reg) { + case 0: /* Control Register */ + if (data & 0x8000) { + /* Reset status and control registers to default. */ + s->mdimem[0] = eepro100_mdi_default[0]; + s->mdimem[1] = eepro100_mdi_default[1]; + data = s->mdimem[reg]; + } else { + /* Restart Auto Configuration = Normal Operation */ + data &= ~0x0200; + } + break; + case 1: /* Status Register */ + missing("not writable"); + break; + case 2: /* PHY Identification Register (Word 1) */ + case 3: /* PHY Identification Register (Word 2) */ + missing("not implemented"); + break; + case 4: /* Auto-Negotiation Advertisement Register */ + case 5: /* Auto-Negotiation Link Partner Ability Register */ + break; + case 6: /* Auto-Negotiation Expansion Register */ + default: + missing("not implemented"); + } + s->mdimem[reg] &= eepro100_mdi_mask[reg]; + s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg]; + } else if (opcode == 2) { + /* MDI read */ + switch (reg) { + case 0: /* Control Register */ + if (data & 0x8000) { + /* Reset status and control registers to default. */ + s->mdimem[0] = eepro100_mdi_default[0]; + s->mdimem[1] = eepro100_mdi_default[1]; + } + break; + case 1: /* Status Register */ + s->mdimem[reg] |= 0x0020; + break; + case 2: /* PHY Identification Register (Word 1) */ + case 3: /* PHY Identification Register (Word 2) */ + case 4: /* Auto-Negotiation Advertisement Register */ + break; + case 5: /* Auto-Negotiation Link Partner Ability Register */ + s->mdimem[reg] = 0x41fe; + break; + case 6: /* Auto-Negotiation Expansion Register */ + s->mdimem[reg] = 0x0001; + break; + } + data = s->mdimem[reg]; + } + /* Emulation takes no time to finish MDI transaction. + * Set MDI bit in SCB status register. */ + s->mem[SCBAck] |= 0x08; + val |= BIT(28); + if (raiseint) { + eepro100_mdi_interrupt(s); + } + } + val = (val & 0xffff0000) + data; + e100_write_reg4(s, SCBCtrlMDI, val); +} + +/***************************************************************************** + * + * Port emulation. + * + ****************************************************************************/ + +#define PORT_SOFTWARE_RESET 0 +#define PORT_SELFTEST 1 +#define PORT_SELECTIVE_RESET 2 +#define PORT_DUMP 3 +#define PORT_SELECTION_MASK 3 + +typedef struct { + uint32_t st_sign; /* Self Test Signature */ + uint32_t st_result; /* Self Test Results */ +} eepro100_selftest_t; + +static uint32_t eepro100_read_port(EEPRO100State * s) +{ + return 0; +} + +static void eepro100_write_port(EEPRO100State *s) +{ + uint32_t val = e100_read_reg4(s, SCBPort); + uint32_t address = (val & ~PORT_SELECTION_MASK); + uint8_t selection = (val & PORT_SELECTION_MASK); + switch (selection) { + case PORT_SOFTWARE_RESET: + nic_reset(s); + break; + case PORT_SELFTEST: + TRACE(OTHER, logout("selftest address=0x%08x\n", address)); + eepro100_selftest_t data; + pci_dma_read(&s->dev, address, (uint8_t *) &data, sizeof(data)); + data.st_sign = 0xffffffff; + data.st_result = 0; + pci_dma_write(&s->dev, address, (uint8_t *) &data, sizeof(data)); + break; + case PORT_SELECTIVE_RESET: + TRACE(OTHER, logout("selective reset, selftest address=0x%08x\n", address)); + nic_selective_reset(s); + break; + default: + logout("val=0x%08x\n", val); + missing("unknown port selection"); + } +} + +/***************************************************************************** + * + * General hardware emulation. + * + ****************************************************************************/ + +static uint8_t eepro100_read1(EEPRO100State * s, uint32_t addr) +{ + uint8_t val = 0; + if (addr <= sizeof(s->mem) - sizeof(val)) { + val = s->mem[addr]; + } + + switch (addr) { + case SCBStatus: + case SCBAck: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBCmd: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); +#if 0 + val = eepro100_read_command(s); +#endif + break; + case SCBIntmask: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBPort + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBeeprom: + val = eepro100_read_eeprom(s); + break; + case SCBCtrlMDI: + case SCBCtrlMDI + 1: + case SCBCtrlMDI + 2: + case SCBCtrlMDI + 3: + val = (uint8_t)(eepro100_read_mdi(s) >> (8 * (addr & 3))); + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBpmdr: /* Power Management Driver Register */ + val = 0; + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBgctrl: /* General Control Register */ + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBgstat: /* General Status Register */ + /* 100 Mbps full duplex, valid link */ + val = 0x07; + TRACE(OTHER, logout("addr=General Status val=%02x\n", val)); + break; + default: + logout("addr=%s val=0x%02x\n", regname(addr), val); + missing("unknown byte read"); + } + return val; +} + +static uint16_t eepro100_read2(EEPRO100State * s, uint32_t addr) +{ + uint16_t val = 0; + if (addr <= sizeof(s->mem) - sizeof(val)) { + val = e100_read_reg2(s, addr); + } + + switch (addr) { + case SCBStatus: + case SCBCmd: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBeeprom: + val = eepro100_read_eeprom(s); + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBCtrlMDI: + case SCBCtrlMDI + 2: + val = (uint16_t)(eepro100_read_mdi(s) >> (8 * (addr & 3))); + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + default: + logout("addr=%s val=0x%04x\n", regname(addr), val); + missing("unknown word read"); + } + return val; +} + +static uint32_t eepro100_read4(EEPRO100State * s, uint32_t addr) +{ + uint32_t val = 0; + if (addr <= sizeof(s->mem) - sizeof(val)) { + val = e100_read_reg4(s, addr); + } + + switch (addr) { + case SCBStatus: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + break; + case SCBPointer: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + break; + case SCBPort: + val = eepro100_read_port(s); + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + break; + case SCBflash: + val = eepro100_read_eeprom(s); + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + break; + case SCBCtrlMDI: + val = eepro100_read_mdi(s); + break; + default: + logout("addr=%s val=0x%08x\n", regname(addr), val); + missing("unknown longword read"); + } + return val; +} + +static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val) +{ + /* SCBStatus is readonly. */ + if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) { + s->mem[addr] = val; + } + + switch (addr) { + case SCBStatus: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBAck: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_acknowledge(s); + break; + case SCBCmd: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_write_command(s, val); + break; + case SCBIntmask: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + if (val & BIT(1)) { + eepro100_swi_interrupt(s); + } + eepro100_interrupt(s, 0); + break; + case SCBPointer: + case SCBPointer + 1: + case SCBPointer + 2: + case SCBPointer + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBPort: + case SCBPort + 1: + case SCBPort + 2: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBPort + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_write_port(s); + break; + case SCBFlow: /* does not exist on 82557 */ + case SCBFlow + 1: + case SCBFlow + 2: + case SCBpmdr: /* does not exist on 82557 */ + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBeeprom: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_write_eeprom(s->eeprom, val); + break; + case SCBCtrlMDI: + case SCBCtrlMDI + 1: + case SCBCtrlMDI + 2: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + break; + case SCBCtrlMDI + 3: + TRACE(OTHER, logout("addr=%s val=0x%02x\n", regname(addr), val)); + eepro100_write_mdi(s); + break; + default: + logout("addr=%s val=0x%02x\n", regname(addr), val); + missing("unknown byte write"); + } +} + +static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val) +{ + /* SCBStatus is readonly. */ + if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) { + e100_write_reg2(s, addr, val); + } + + switch (addr) { + case SCBStatus: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + s->mem[SCBAck] = (val >> 8); + eepro100_acknowledge(s); + break; + case SCBCmd: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + eepro100_write_command(s, val); + eepro100_write1(s, SCBIntmask, val >> 8); + break; + case SCBPointer: + case SCBPointer + 2: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBPort: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBPort + 2: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + eepro100_write_port(s); + break; + case SCBeeprom: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + eepro100_write_eeprom(s->eeprom, val); + break; + case SCBCtrlMDI: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + break; + case SCBCtrlMDI + 2: + TRACE(OTHER, logout("addr=%s val=0x%04x\n", regname(addr), val)); + eepro100_write_mdi(s); + break; + default: + logout("addr=%s val=0x%04x\n", regname(addr), val); + missing("unknown word write"); + } +} + +static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val) +{ + if (addr <= sizeof(s->mem) - sizeof(val)) { + e100_write_reg4(s, addr, val); + } + + switch (addr) { + case SCBPointer: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + break; + case SCBPort: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + eepro100_write_port(s); + break; + case SCBflash: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + val = val >> 16; + eepro100_write_eeprom(s->eeprom, val); + break; + case SCBCtrlMDI: + TRACE(OTHER, logout("addr=%s val=0x%08x\n", regname(addr), val)); + eepro100_write_mdi(s); + break; + default: + logout("addr=%s val=0x%08x\n", regname(addr), val); + missing("unknown longword write"); + } +} + +static uint64_t eepro100_read(void *opaque, hwaddr addr, + unsigned size) +{ + EEPRO100State *s = opaque; + + switch (size) { + case 1: return eepro100_read1(s, addr); + case 2: return eepro100_read2(s, addr); + case 4: return eepro100_read4(s, addr); + default: abort(); + } +} + +static void eepro100_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + EEPRO100State *s = opaque; + + switch (size) { + case 1: + eepro100_write1(s, addr, data); + break; + case 2: + eepro100_write2(s, addr, data); + break; + case 4: + eepro100_write4(s, addr, data); + break; + default: + abort(); + } +} + +static const MemoryRegionOps eepro100_ops = { + .read = eepro100_read, + .write = eepro100_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size) +{ + /* TODO: + * - Magic packets should set bit 30 in power management driver register. + * - Interesting packets should set bit 29 in power management driver register. + */ + EEPRO100State *s = qemu_get_nic_opaque(nc); + uint16_t rfd_status = 0xa000; +#if defined(CONFIG_PAD_RECEIVED_FRAMES) + uint8_t min_buf[60]; +#endif + static const uint8_t broadcast_macaddr[6] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +#if defined(CONFIG_PAD_RECEIVED_FRAMES) + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + memcpy(min_buf, buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + buf = min_buf; + size = sizeof(min_buf); + } +#endif + + if (s->configuration[8] & 0x80) { + /* CSMA is disabled. */ + logout("%p received while CSMA is disabled\n", s); + return -1; +#if !defined(CONFIG_PAD_RECEIVED_FRAMES) + } else if (size < 64 && (s->configuration[7] & BIT(0))) { + /* Short frame and configuration byte 7/0 (discard short receive) set: + * Short frame is discarded */ + logout("%p received short frame (%zu byte)\n", s, size); + s->statistics.rx_short_frame_errors++; + return -1; +#endif + } else if ((size > MAX_ETH_FRAME_SIZE + 4) && !(s->configuration[18] & BIT(3))) { + /* Long frame and configuration byte 18/3 (long receive ok) not set: + * Long frames are discarded. */ + logout("%p received long frame (%zu byte), ignored\n", s, size); + return -1; + } else if (memcmp(buf, s->conf.macaddr.a, 6) == 0) { /* !!! */ + /* Frame matches individual address. */ + /* TODO: check configuration byte 15/4 (ignore U/L). */ + TRACE(RXTX, logout("%p received frame for me, len=%zu\n", s, size)); + } else if (memcmp(buf, broadcast_macaddr, 6) == 0) { + /* Broadcast frame. */ + TRACE(RXTX, logout("%p received broadcast, len=%zu\n", s, size)); + rfd_status |= 0x0002; + } else if (buf[0] & 0x01) { + /* Multicast frame. */ + TRACE(RXTX, logout("%p received multicast, len=%zu,%s\n", s, size, nic_dump(buf, size))); + if (s->configuration[21] & BIT(3)) { + /* Multicast all bit is set, receive all multicast frames. */ + } else { + unsigned mcast_idx = e100_compute_mcast_idx(buf); + assert(mcast_idx < 64); + if (s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))) { + /* Multicast frame is allowed in hash table. */ + } else if (s->configuration[15] & BIT(0)) { + /* Promiscuous: receive all. */ + rfd_status |= 0x0004; + } else { + TRACE(RXTX, logout("%p multicast ignored\n", s)); + return -1; + } + } + /* TODO: Next not for promiscuous mode? */ + rfd_status |= 0x0002; + } else if (s->configuration[15] & BIT(0)) { + /* Promiscuous: receive all. */ + TRACE(RXTX, logout("%p received frame in promiscuous mode, len=%zu\n", s, size)); + rfd_status |= 0x0004; + } else if (s->configuration[20] & BIT(6)) { + /* Multiple IA bit set. */ + unsigned mcast_idx = compute_mcast_idx(buf); + assert(mcast_idx < 64); + if (s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))) { + TRACE(RXTX, logout("%p accepted, multiple IA bit set\n", s)); + } else { + TRACE(RXTX, logout("%p frame ignored, multiple IA bit set\n", s)); + return -1; + } + } else { + TRACE(RXTX, logout("%p received frame, ignored, len=%zu,%s\n", s, size, + nic_dump(buf, size))); + return size; + } + + if (get_ru_state(s) != ru_ready) { + /* No resources available. */ + logout("no resources, state=%u\n", get_ru_state(s)); + /* TODO: RNR interrupt only at first failed frame? */ + eepro100_rnr_interrupt(s); + s->statistics.rx_resource_errors++; +#if 0 + assert(!"no resources"); +#endif + return -1; + } + /* !!! */ + eepro100_rx_t rx; + pci_dma_read(&s->dev, s->ru_base + s->ru_offset, + &rx, sizeof(eepro100_rx_t)); + uint16_t rfd_command = le16_to_cpu(rx.command); + uint16_t rfd_size = le16_to_cpu(rx.size); + + if (size > rfd_size) { + logout("Receive buffer (%" PRId16 " bytes) too small for data " + "(%zu bytes); data truncated\n", rfd_size, size); + size = rfd_size; + } +#if !defined(CONFIG_PAD_RECEIVED_FRAMES) + if (size < 64) { + rfd_status |= 0x0080; + } +#endif + TRACE(OTHER, logout("command 0x%04x, link 0x%08x, addr 0x%08x, size %u\n", + rfd_command, rx.link, rx.rx_buf_addr, rfd_size)); + stw_le_pci_dma(&s->dev, s->ru_base + s->ru_offset + + offsetof(eepro100_rx_t, status), rfd_status); + stw_le_pci_dma(&s->dev, s->ru_base + s->ru_offset + + offsetof(eepro100_rx_t, count), size); + /* Early receive interrupt not supported. */ +#if 0 + eepro100_er_interrupt(s); +#endif + /* Receive CRC Transfer not supported. */ + if (s->configuration[18] & BIT(2)) { + missing("Receive CRC Transfer"); + return -1; + } + /* TODO: check stripping enable bit. */ +#if 0 + assert(!(s->configuration[17] & BIT(0))); +#endif + pci_dma_write(&s->dev, s->ru_base + s->ru_offset + + sizeof(eepro100_rx_t), buf, size); + s->statistics.rx_good_frames++; + eepro100_fr_interrupt(s); + s->ru_offset = le32_to_cpu(rx.link); + if (rfd_command & COMMAND_EL) { + /* EL bit is set, so this was the last frame. */ + logout("receive: Running out of frames\n"); + set_ru_state(s, ru_no_resources); + eepro100_rnr_interrupt(s); + } + if (rfd_command & COMMAND_S) { + /* S bit is set. */ + set_ru_state(s, ru_suspended); + } + return size; +} + +static const VMStateDescription vmstate_eepro100 = { + .version_id = 3, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(dev, EEPRO100State), + VMSTATE_UNUSED(32), + VMSTATE_BUFFER(mult, EEPRO100State), + VMSTATE_BUFFER(mem, EEPRO100State), + /* Save all members of struct between scb_stat and mem. */ + VMSTATE_UINT8(scb_stat, EEPRO100State), + VMSTATE_UINT8(int_stat, EEPRO100State), + VMSTATE_UNUSED(3*4), + VMSTATE_MACADDR(conf.macaddr, EEPRO100State), + VMSTATE_UNUSED(19*4), + VMSTATE_UINT16_ARRAY(mdimem, EEPRO100State, 32), + /* The eeprom should be saved and restored by its own routines. */ + VMSTATE_UINT32(device, EEPRO100State), + /* TODO check device. */ + VMSTATE_UINT32(cu_base, EEPRO100State), + VMSTATE_UINT32(cu_offset, EEPRO100State), + VMSTATE_UINT32(ru_base, EEPRO100State), + VMSTATE_UINT32(ru_offset, EEPRO100State), + VMSTATE_UINT32(statsaddr, EEPRO100State), + /* Save eepro100_stats_t statistics. */ + VMSTATE_UINT32(statistics.tx_good_frames, EEPRO100State), + VMSTATE_UINT32(statistics.tx_max_collisions, EEPRO100State), + VMSTATE_UINT32(statistics.tx_late_collisions, EEPRO100State), + VMSTATE_UINT32(statistics.tx_underruns, EEPRO100State), + VMSTATE_UINT32(statistics.tx_lost_crs, EEPRO100State), + VMSTATE_UINT32(statistics.tx_deferred, EEPRO100State), + VMSTATE_UINT32(statistics.tx_single_collisions, EEPRO100State), + VMSTATE_UINT32(statistics.tx_multiple_collisions, EEPRO100State), + VMSTATE_UINT32(statistics.tx_total_collisions, EEPRO100State), + VMSTATE_UINT32(statistics.rx_good_frames, EEPRO100State), + VMSTATE_UINT32(statistics.rx_crc_errors, EEPRO100State), + VMSTATE_UINT32(statistics.rx_alignment_errors, EEPRO100State), + VMSTATE_UINT32(statistics.rx_resource_errors, EEPRO100State), + VMSTATE_UINT32(statistics.rx_overrun_errors, EEPRO100State), + VMSTATE_UINT32(statistics.rx_cdt_errors, EEPRO100State), + VMSTATE_UINT32(statistics.rx_short_frame_errors, EEPRO100State), + VMSTATE_UINT32(statistics.fc_xmt_pause, EEPRO100State), + VMSTATE_UINT32(statistics.fc_rcv_pause, EEPRO100State), + VMSTATE_UINT32(statistics.fc_rcv_unsupported, EEPRO100State), + VMSTATE_UINT16(statistics.xmt_tco_frames, EEPRO100State), + VMSTATE_UINT16(statistics.rcv_tco_frames, EEPRO100State), + /* Configuration bytes. */ + VMSTATE_BUFFER(configuration, EEPRO100State), + VMSTATE_END_OF_LIST() + } +}; + +static void pci_nic_uninit(PCIDevice *pci_dev) +{ + EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev); + + vmstate_unregister(&pci_dev->qdev, s->vmstate, s); + eeprom93xx_free(&pci_dev->qdev, s->eeprom); + qemu_del_nic(s->nic); +} + +static NetClientInfo net_eepro100_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = nic_receive, +}; + +static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) +{ + EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev); + E100PCIDeviceInfo *info = eepro100_get_class(s); + + TRACE(OTHER, logout("\n")); + + s->device = info->device; + + e100_pci_reset(s); + + /* Add 64 * 2 EEPROM. i82557 and i82558 support a 64 word EEPROM, + * i82559 and later support 64 or 256 word EEPROM. */ + s->eeprom = eeprom93xx_new(&pci_dev->qdev, EEPROM_SIZE); + + /* Handler for memory-mapped I/O */ + memory_region_init_io(&s->mmio_bar, OBJECT(s), &eepro100_ops, s, + "eepro100-mmio", PCI_MEM_SIZE); + pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->mmio_bar); + memory_region_init_io(&s->io_bar, OBJECT(s), &eepro100_ops, s, + "eepro100-io", PCI_IO_SIZE); + pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->io_bar); + /* FIXME: flash aliases to mmio?! */ + memory_region_init_io(&s->flash_bar, OBJECT(s), &eepro100_ops, s, + "eepro100-flash", PCI_FLASH_SIZE); + pci_register_bar(&s->dev, 2, 0, &s->flash_bar); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + logout("macaddr: %s\n", nic_dump(&s->conf.macaddr.a[0], 6)); + + nic_reset(s); + + s->nic = qemu_new_nic(&net_eepro100_info, &s->conf, + object_get_typename(OBJECT(pci_dev)), pci_dev->qdev.id, s); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + TRACE(OTHER, logout("%s\n", qemu_get_queue(s->nic)->info_str)); + + qemu_register_reset(nic_reset, s); + + s->vmstate = g_malloc(sizeof(vmstate_eepro100)); + memcpy(s->vmstate, &vmstate_eepro100, sizeof(vmstate_eepro100)); + s->vmstate->name = qemu_get_queue(s->nic)->model; + vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); +} + +static void eepro100_instance_init(Object *obj) +{ + EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, PCI_DEVICE(obj)); + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(s), NULL); +} + +static E100PCIDeviceInfo e100_devices[] = { + { + .name = "i82550", + .desc = "Intel i82550 Ethernet", + .device = i82550, + /* TODO: check device id. */ + .device_id = PCI_DEVICE_ID_INTEL_82551IT, + /* Revision ID: 0x0c, 0x0d, 0x0e. */ + .revision = 0x0e, + /* TODO: check size of statistical counters. */ + .stats_size = 80, + /* TODO: check extended tcb support. */ + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82551", + .desc = "Intel i82551 Ethernet", + .device = i82551, + .device_id = PCI_DEVICE_ID_INTEL_82551IT, + /* Revision ID: 0x0f, 0x10. */ + .revision = 0x0f, + /* TODO: check size of statistical counters. */ + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82557a", + .desc = "Intel i82557A Ethernet", + .device = i82557A, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x01, + .power_management = false, + },{ + .name = "i82557b", + .desc = "Intel i82557B Ethernet", + .device = i82557B, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x02, + .power_management = false, + },{ + .name = "i82557c", + .desc = "Intel i82557C Ethernet", + .device = i82557C, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x03, + .power_management = false, + },{ + .name = "i82558a", + .desc = "Intel i82558A Ethernet", + .device = i82558A, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x04, + .stats_size = 76, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82558b", + .desc = "Intel i82558B Ethernet", + .device = i82558B, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x05, + .stats_size = 76, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82559a", + .desc = "Intel i82559A Ethernet", + .device = i82559A, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x06, + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82559b", + .desc = "Intel i82559B Ethernet", + .device = i82559B, + .device_id = PCI_DEVICE_ID_INTEL_82557, + .revision = 0x07, + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82559c", + .desc = "Intel i82559C Ethernet", + .device = i82559C, + .device_id = PCI_DEVICE_ID_INTEL_82557, +#if 0 + .revision = 0x08, +#endif + /* TODO: Windows wants revision id 0x0c. */ + .revision = 0x0c, +#if EEPROM_SIZE > 0 + .subsystem_vendor_id = PCI_VENDOR_ID_INTEL, + .subsystem_id = 0x0040, +#endif + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82559er", + .desc = "Intel i82559ER Ethernet", + .device = i82559ER, + .device_id = PCI_DEVICE_ID_INTEL_82551IT, + .revision = 0x09, + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + },{ + .name = "i82562", + .desc = "Intel i82562 Ethernet", + .device = i82562, + /* TODO: check device id. */ + .device_id = PCI_DEVICE_ID_INTEL_82551IT, + /* TODO: wrong revision id. */ + .revision = 0x0e, + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + },{ + /* Toshiba Tecra 8200. */ + .name = "i82801", + .desc = "Intel i82801 Ethernet", + .device = i82801, + .device_id = 0x2449, + .revision = 0x03, + .stats_size = 80, + .has_extended_tcb_support = true, + .power_management = true, + } +}; + +static E100PCIDeviceInfo *eepro100_get_class_by_name(const char *typename) +{ + E100PCIDeviceInfo *info = NULL; + int i; + + /* This is admittedly awkward but also temporary. QOM allows for + * parameterized typing and for subclassing both of which would suitable + * handle what's going on here. But class_data is already being used as + * a stop-gap hack to allow incremental qdev conversion so we cannot use it + * right now. Once we merge the final QOM series, we can come back here and + * do this in a much more elegant fashion. + */ + for (i = 0; i < ARRAY_SIZE(e100_devices); i++) { + if (strcmp(e100_devices[i].name, typename) == 0) { + info = &e100_devices[i]; + break; + } + } + assert(info != NULL); + + return info; +} + +static E100PCIDeviceInfo *eepro100_get_class(EEPRO100State *s) +{ + return eepro100_get_class_by_name(object_get_typename(OBJECT(s))); +} + +static Property e100_properties[] = { + DEFINE_NIC_PROPERTIES(EEPRO100State, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void eepro100_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + E100PCIDeviceInfo *info; + + info = eepro100_get_class_by_name(object_class_get_name(klass)); + + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->props = e100_properties; + dc->desc = info->desc; + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->class_id = PCI_CLASS_NETWORK_ETHERNET; + k->romfile = "pxe-eepro100.rom"; + k->realize = e100_nic_realize; + k->exit = pci_nic_uninit; + k->device_id = info->device_id; + k->revision = info->revision; + k->subsystem_vendor_id = info->subsystem_vendor_id; + k->subsystem_id = info->subsystem_id; +} + +static void eepro100_register_types(void) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(e100_devices); i++) { + TypeInfo type_info = {}; + E100PCIDeviceInfo *info = &e100_devices[i]; + + type_info.name = info->name; + type_info.parent = TYPE_PCI_DEVICE; + type_info.class_init = eepro100_class_init; + type_info.instance_size = sizeof(EEPRO100State); + type_info.instance_init = eepro100_instance_init; + + type_register(&type_info); + } +} + +type_init(eepro100_register_types) diff --git a/qemu/hw/net/etraxfs_eth.c b/qemu/hw/net/etraxfs_eth.c new file mode 100644 index 000000000..d6002750f --- /dev/null +++ b/qemu/hw/net/etraxfs_eth.c @@ -0,0 +1,646 @@ +/* + * QEMU ETRAX Ethernet Controller. + * + * Copyright (c) 2008 Edgar E. Iglesias, Axis Communications AB. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdio.h> +#include "hw/sysbus.h" +#include "net/net.h" +#include "hw/cris/etraxfs.h" + +#define D(x) + +/* Advertisement control register. */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ + +/* + * The MDIO extensions in the TDK PHY model were reversed engineered from the + * linux driver (PHYID and Diagnostics reg). + * TODO: Add friendly names for the register nums. + */ +struct qemu_phy +{ + uint32_t regs[32]; + + int link; + + unsigned int (*read)(struct qemu_phy *phy, unsigned int req); + void (*write)(struct qemu_phy *phy, unsigned int req, unsigned int data); +}; + +static unsigned int tdk_read(struct qemu_phy *phy, unsigned int req) +{ + int regnum; + unsigned r = 0; + + regnum = req & 0x1f; + + switch (regnum) { + case 1: + if (!phy->link) { + break; + } + /* MR1. */ + /* Speeds and modes. */ + r |= (1 << 13) | (1 << 14); + r |= (1 << 11) | (1 << 12); + r |= (1 << 5); /* Autoneg complete. */ + r |= (1 << 3); /* Autoneg able. */ + r |= (1 << 2); /* link. */ + break; + case 5: + /* Link partner ability. + We are kind; always agree with whatever best mode + the guest advertises. */ + r = 1 << 14; /* Success. */ + /* Copy advertised modes. */ + r |= phy->regs[4] & (15 << 5); + /* Autoneg support. */ + r |= 1; + break; + case 18: + { + /* Diagnostics reg. */ + int duplex = 0; + int speed_100 = 0; + + if (!phy->link) { + break; + } + + /* Are we advertising 100 half or 100 duplex ? */ + speed_100 = !!(phy->regs[4] & ADVERTISE_100HALF); + speed_100 |= !!(phy->regs[4] & ADVERTISE_100FULL); + + /* Are we advertising 10 duplex or 100 duplex ? */ + duplex = !!(phy->regs[4] & ADVERTISE_100FULL); + duplex |= !!(phy->regs[4] & ADVERTISE_10FULL); + r = (speed_100 << 10) | (duplex << 11); + } + break; + + default: + r = phy->regs[regnum]; + break; + } + D(printf("\n%s %x = reg[%d]\n", __func__, r, regnum)); + return r; +} + +static void +tdk_write(struct qemu_phy *phy, unsigned int req, unsigned int data) +{ + int regnum; + + regnum = req & 0x1f; + D(printf("%s reg[%d] = %x\n", __func__, regnum, data)); + switch (regnum) { + default: + phy->regs[regnum] = data; + break; + } +} + +static void +tdk_init(struct qemu_phy *phy) +{ + phy->regs[0] = 0x3100; + /* PHY Id. */ + phy->regs[2] = 0x0300; + phy->regs[3] = 0xe400; + /* Autonegotiation advertisement reg. */ + phy->regs[4] = 0x01E1; + phy->link = 1; + + phy->read = tdk_read; + phy->write = tdk_write; +} + +struct qemu_mdio +{ + /* bus. */ + int mdc; + int mdio; + + /* decoder. */ + enum { + PREAMBLE, + SOF, + OPC, + ADDR, + REQ, + TURNAROUND, + DATA + } state; + unsigned int drive; + + unsigned int cnt; + unsigned int addr; + unsigned int opc; + unsigned int req; + unsigned int data; + + struct qemu_phy *devs[32]; +}; + +static void +mdio_attach(struct qemu_mdio *bus, struct qemu_phy *phy, unsigned int addr) +{ + bus->devs[addr & 0x1f] = phy; +} + +#ifdef USE_THIS_DEAD_CODE +static void +mdio_detach(struct qemu_mdio *bus, struct qemu_phy *phy, unsigned int addr) +{ + bus->devs[addr & 0x1f] = NULL; +} +#endif + +static void mdio_read_req(struct qemu_mdio *bus) +{ + struct qemu_phy *phy; + + phy = bus->devs[bus->addr]; + if (phy && phy->read) { + bus->data = phy->read(phy, bus->req); + } else { + bus->data = 0xffff; + } +} + +static void mdio_write_req(struct qemu_mdio *bus) +{ + struct qemu_phy *phy; + + phy = bus->devs[bus->addr]; + if (phy && phy->write) { + phy->write(phy, bus->req, bus->data); + } +} + +static void mdio_cycle(struct qemu_mdio *bus) +{ + bus->cnt++; + + D(printf("mdc=%d mdio=%d state=%d cnt=%d drv=%d\n", + bus->mdc, bus->mdio, bus->state, bus->cnt, bus->drive)); +#if 0 + if (bus->mdc) { + printf("%d", bus->mdio); + } +#endif + switch (bus->state) { + case PREAMBLE: + if (bus->mdc) { + if (bus->cnt >= (32 * 2) && !bus->mdio) { + bus->cnt = 0; + bus->state = SOF; + bus->data = 0; + } + } + break; + case SOF: + if (bus->mdc) { + if (bus->mdio != 1) { + printf("WARNING: no SOF\n"); + } + if (bus->cnt == 1*2) { + bus->cnt = 0; + bus->opc = 0; + bus->state = OPC; + } + } + break; + case OPC: + if (bus->mdc) { + bus->opc <<= 1; + bus->opc |= bus->mdio & 1; + if (bus->cnt == 2*2) { + bus->cnt = 0; + bus->addr = 0; + bus->state = ADDR; + } + } + break; + case ADDR: + if (bus->mdc) { + bus->addr <<= 1; + bus->addr |= bus->mdio & 1; + + if (bus->cnt == 5*2) { + bus->cnt = 0; + bus->req = 0; + bus->state = REQ; + } + } + break; + case REQ: + if (bus->mdc) { + bus->req <<= 1; + bus->req |= bus->mdio & 1; + if (bus->cnt == 5*2) { + bus->cnt = 0; + bus->state = TURNAROUND; + } + } + break; + case TURNAROUND: + if (bus->mdc && bus->cnt == 2*2) { + bus->mdio = 0; + bus->cnt = 0; + + if (bus->opc == 2) { + bus->drive = 1; + mdio_read_req(bus); + bus->mdio = bus->data & 1; + } + bus->state = DATA; + } + break; + case DATA: + if (!bus->mdc) { + if (bus->drive) { + bus->mdio = !!(bus->data & (1 << 15)); + bus->data <<= 1; + } + } else { + if (!bus->drive) { + bus->data <<= 1; + bus->data |= bus->mdio; + } + if (bus->cnt == 16 * 2) { + bus->cnt = 0; + bus->state = PREAMBLE; + if (!bus->drive) { + mdio_write_req(bus); + } + bus->drive = 0; + } + } + break; + default: + break; + } +} + +/* ETRAX-FS Ethernet MAC block starts here. */ + +#define RW_MA0_LO 0x00 +#define RW_MA0_HI 0x01 +#define RW_MA1_LO 0x02 +#define RW_MA1_HI 0x03 +#define RW_GA_LO 0x04 +#define RW_GA_HI 0x05 +#define RW_GEN_CTRL 0x06 +#define RW_REC_CTRL 0x07 +#define RW_TR_CTRL 0x08 +#define RW_CLR_ERR 0x09 +#define RW_MGM_CTRL 0x0a +#define R_STAT 0x0b +#define FS_ETH_MAX_REGS 0x17 + +#define TYPE_ETRAX_FS_ETH "etraxfs-eth" +#define ETRAX_FS_ETH(obj) \ + OBJECT_CHECK(ETRAXFSEthState, (obj), TYPE_ETRAX_FS_ETH) + +typedef struct ETRAXFSEthState +{ + SysBusDevice parent_obj; + + MemoryRegion mmio; + NICState *nic; + NICConf conf; + + /* Two addrs in the filter. */ + uint8_t macaddr[2][6]; + uint32_t regs[FS_ETH_MAX_REGS]; + + union { + void *vdma_out; + struct etraxfs_dma_client *dma_out; + }; + union { + void *vdma_in; + struct etraxfs_dma_client *dma_in; + }; + + /* MDIO bus. */ + struct qemu_mdio mdio_bus; + unsigned int phyaddr; + int duplex_mismatch; + + /* PHY. */ + struct qemu_phy phy; +} ETRAXFSEthState; + +static void eth_validate_duplex(ETRAXFSEthState *eth) +{ + struct qemu_phy *phy; + unsigned int phy_duplex; + unsigned int mac_duplex; + int new_mm = 0; + + phy = eth->mdio_bus.devs[eth->phyaddr]; + phy_duplex = !!(phy->read(phy, 18) & (1 << 11)); + mac_duplex = !!(eth->regs[RW_REC_CTRL] & 128); + + if (mac_duplex != phy_duplex) { + new_mm = 1; + } + + if (eth->regs[RW_GEN_CTRL] & 1) { + if (new_mm != eth->duplex_mismatch) { + if (new_mm) { + printf("HW: WARNING ETH duplex mismatch MAC=%d PHY=%d\n", + mac_duplex, phy_duplex); + } else { + printf("HW: ETH duplex ok.\n"); + } + } + eth->duplex_mismatch = new_mm; + } +} + +static uint64_t +eth_read(void *opaque, hwaddr addr, unsigned int size) +{ + ETRAXFSEthState *eth = opaque; + uint32_t r = 0; + + addr >>= 2; + + switch (addr) { + case R_STAT: + r = eth->mdio_bus.mdio & 1; + break; + default: + r = eth->regs[addr]; + D(printf("%s %x\n", __func__, addr * 4)); + break; + } + return r; +} + +static void eth_update_ma(ETRAXFSEthState *eth, int ma) +{ + int reg; + int i = 0; + + ma &= 1; + + reg = RW_MA0_LO; + if (ma) { + reg = RW_MA1_LO; + } + + eth->macaddr[ma][i++] = eth->regs[reg]; + eth->macaddr[ma][i++] = eth->regs[reg] >> 8; + eth->macaddr[ma][i++] = eth->regs[reg] >> 16; + eth->macaddr[ma][i++] = eth->regs[reg] >> 24; + eth->macaddr[ma][i++] = eth->regs[reg + 1]; + eth->macaddr[ma][i] = eth->regs[reg + 1] >> 8; + + D(printf("set mac%d=%x.%x.%x.%x.%x.%x\n", ma, + eth->macaddr[ma][0], eth->macaddr[ma][1], + eth->macaddr[ma][2], eth->macaddr[ma][3], + eth->macaddr[ma][4], eth->macaddr[ma][5])); +} + +static void +eth_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + ETRAXFSEthState *eth = opaque; + uint32_t value = val64; + + addr >>= 2; + switch (addr) { + case RW_MA0_LO: + case RW_MA0_HI: + eth->regs[addr] = value; + eth_update_ma(eth, 0); + break; + case RW_MA1_LO: + case RW_MA1_HI: + eth->regs[addr] = value; + eth_update_ma(eth, 1); + break; + + case RW_MGM_CTRL: + /* Attach an MDIO/PHY abstraction. */ + if (value & 2) { + eth->mdio_bus.mdio = value & 1; + } + if (eth->mdio_bus.mdc != (value & 4)) { + mdio_cycle(ð->mdio_bus); + eth_validate_duplex(eth); + } + eth->mdio_bus.mdc = !!(value & 4); + eth->regs[addr] = value; + break; + + case RW_REC_CTRL: + eth->regs[addr] = value; + eth_validate_duplex(eth); + break; + + default: + eth->regs[addr] = value; + D(printf("%s %x %x\n", __func__, addr, value)); + break; + } +} + +/* The ETRAX FS has a groupt address table (GAT) which works like a k=1 bloom + filter dropping group addresses we have not joined. The filter has 64 + bits (m). The has function is a simple nible xor of the group addr. */ +static int eth_match_groupaddr(ETRAXFSEthState *eth, const unsigned char *sa) +{ + unsigned int hsh; + int m_individual = eth->regs[RW_REC_CTRL] & 4; + int match; + + /* First bit on the wire of a MAC address signals multicast or + physical address. */ + if (!m_individual && !(sa[0] & 1)) { + return 0; + } + + /* Calculate the hash index for the GA registers. */ + hsh = 0; + hsh ^= (*sa) & 0x3f; + hsh ^= ((*sa) >> 6) & 0x03; + ++sa; + hsh ^= ((*sa) << 2) & 0x03c; + hsh ^= ((*sa) >> 4) & 0xf; + ++sa; + hsh ^= ((*sa) << 4) & 0x30; + hsh ^= ((*sa) >> 2) & 0x3f; + ++sa; + hsh ^= (*sa) & 0x3f; + hsh ^= ((*sa) >> 6) & 0x03; + ++sa; + hsh ^= ((*sa) << 2) & 0x03c; + hsh ^= ((*sa) >> 4) & 0xf; + ++sa; + hsh ^= ((*sa) << 4) & 0x30; + hsh ^= ((*sa) >> 2) & 0x3f; + + hsh &= 63; + if (hsh > 31) { + match = eth->regs[RW_GA_HI] & (1 << (hsh - 32)); + } else { + match = eth->regs[RW_GA_LO] & (1 << hsh); + } + D(printf("hsh=%x ga=%x.%x mtch=%d\n", hsh, + eth->regs[RW_GA_HI], eth->regs[RW_GA_LO], match)); + return match; +} + +static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + unsigned char sa_bcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + ETRAXFSEthState *eth = qemu_get_nic_opaque(nc); + int use_ma0 = eth->regs[RW_REC_CTRL] & 1; + int use_ma1 = eth->regs[RW_REC_CTRL] & 2; + int r_bcast = eth->regs[RW_REC_CTRL] & 8; + + if (size < 12) { + return -1; + } + + D(printf("%x.%x.%x.%x.%x.%x ma=%d %d bc=%d\n", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], + use_ma0, use_ma1, r_bcast)); + + /* Does the frame get through the address filters? */ + if ((!use_ma0 || memcmp(buf, eth->macaddr[0], 6)) + && (!use_ma1 || memcmp(buf, eth->macaddr[1], 6)) + && (!r_bcast || memcmp(buf, sa_bcast, 6)) + && !eth_match_groupaddr(eth, buf)) { + return size; + } + + /* FIXME: Find another way to pass on the fake csum. */ + etraxfs_dmac_input(eth->dma_in, (void *)buf, size + 4, 1); + + return size; +} + +static int eth_tx_push(void *opaque, unsigned char *buf, int len, bool eop) +{ + ETRAXFSEthState *eth = opaque; + + D(printf("%s buf=%p len=%d\n", __func__, buf, len)); + qemu_send_packet(qemu_get_queue(eth->nic), buf, len); + return len; +} + +static void eth_set_link(NetClientState *nc) +{ + ETRAXFSEthState *eth = qemu_get_nic_opaque(nc); + D(printf("%s %d\n", __func__, nc->link_down)); + eth->phy.link = !nc->link_down; +} + +static const MemoryRegionOps eth_ops = { + .read = eth_read, + .write = eth_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4 + } +}; + +static NetClientInfo net_etraxfs_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = eth_receive, + .link_status_changed = eth_set_link, +}; + +static int fs_eth_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + ETRAXFSEthState *s = ETRAX_FS_ETH(dev); + + if (!s->dma_out || !s->dma_in) { + hw_error("Unconnected ETRAX-FS Ethernet MAC.\n"); + } + + s->dma_out->client.push = eth_tx_push; + s->dma_out->client.opaque = s; + s->dma_in->client.opaque = s; + s->dma_in->client.pull = NULL; + + memory_region_init_io(&s->mmio, OBJECT(dev), ð_ops, s, + "etraxfs-eth", 0x5c); + sysbus_init_mmio(sbd, &s->mmio); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_etraxfs_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + + tdk_init(&s->phy); + mdio_attach(&s->mdio_bus, &s->phy, s->phyaddr); + return 0; +} + +static Property etraxfs_eth_properties[] = { + DEFINE_PROP_UINT32("phyaddr", ETRAXFSEthState, phyaddr, 1), + DEFINE_PROP_PTR("dma_out", ETRAXFSEthState, vdma_out), + DEFINE_PROP_PTR("dma_in", ETRAXFSEthState, vdma_in), + DEFINE_NIC_PROPERTIES(ETRAXFSEthState, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void etraxfs_eth_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = fs_eth_init; + dc->props = etraxfs_eth_properties; + /* Reason: pointer properties "dma_out", "dma_in" */ + dc->cannot_instantiate_with_device_add_yet = true; +} + +static const TypeInfo etraxfs_eth_info = { + .name = TYPE_ETRAX_FS_ETH, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ETRAXFSEthState), + .class_init = etraxfs_eth_class_init, +}; + +static void etraxfs_eth_register_types(void) +{ + type_register_static(&etraxfs_eth_info); +} + +type_init(etraxfs_eth_register_types) diff --git a/qemu/hw/net/fsl_etsec/etsec.c b/qemu/hw/net/fsl_etsec/etsec.c new file mode 100644 index 000000000..0f5cf4477 --- /dev/null +++ b/qemu/hw/net/fsl_etsec/etsec.c @@ -0,0 +1,456 @@ +/* + * QEMU Freescale eTSEC Emulator + * + * Copyright (c) 2011-2013 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * This implementation doesn't include ring priority, TCP/IP Off-Load, QoS. + */ + +#include "sysemu/sysemu.h" +#include "hw/sysbus.h" +#include "trace.h" +#include "hw/ptimer.h" +#include "etsec.h" +#include "registers.h" + +/* #define HEX_DUMP */ +/* #define DEBUG_REGISTER */ + +#ifdef DEBUG_REGISTER +static const int debug_etsec = 1; +#else +static const int debug_etsec; +#endif + +#define DPRINTF(fmt, ...) do { \ + if (debug_etsec) { \ + qemu_log(fmt , ## __VA_ARGS__); \ + } \ + } while (0) + +static uint64_t etsec_read(void *opaque, hwaddr addr, unsigned size) +{ + eTSEC *etsec = opaque; + uint32_t reg_index = addr / 4; + eTSEC_Register *reg = NULL; + uint32_t ret = 0x0; + + assert(reg_index < ETSEC_REG_NUMBER); + + reg = &etsec->regs[reg_index]; + + + switch (reg->access) { + case ACC_WO: + ret = 0x00000000; + break; + + case ACC_RW: + case ACC_W1C: + case ACC_RO: + default: + ret = reg->value; + break; + } + + DPRINTF("Read 0x%08x @ 0x" TARGET_FMT_plx + " : %s (%s)\n", + ret, addr, reg->name, reg->desc); + + return ret; +} + +static void write_tstat(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + int i = 0; + + for (i = 0; i < 8; i++) { + /* Check THLTi flag in TSTAT */ + if (value & (1 << (31 - i))) { + etsec_walk_tx_ring(etsec, i); + } + } + + /* Write 1 to clear */ + reg->value &= ~value; +} + +static void write_rstat(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + int i = 0; + + for (i = 0; i < 8; i++) { + /* Check QHLTi flag in RSTAT */ + if (value & (1 << (23 - i)) && !(reg->value & (1 << (23 - i)))) { + etsec_walk_rx_ring(etsec, i); + } + } + + /* Write 1 to clear */ + reg->value &= ~value; +} + +static void write_tbasex(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + reg->value = value & ~0x7; + + /* Copy this value in the ring's TxBD pointer */ + etsec->regs[TBPTR0 + (reg_index - TBASE0)].value = value & ~0x7; +} + +static void write_rbasex(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + reg->value = value & ~0x7; + + /* Copy this value in the ring's RxBD pointer */ + etsec->regs[RBPTR0 + (reg_index - RBASE0)].value = value & ~0x7; +} + +static void write_ievent(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + /* Write 1 to clear */ + reg->value &= ~value; + + if (!(reg->value & (IEVENT_TXF | IEVENT_TXF))) { + qemu_irq_lower(etsec->tx_irq); + } + if (!(reg->value & (IEVENT_RXF | IEVENT_RXF))) { + qemu_irq_lower(etsec->rx_irq); + } + + if (!(reg->value & (IEVENT_MAG | IEVENT_GTSC | IEVENT_GRSC | IEVENT_TXC | + IEVENT_RXC | IEVENT_BABR | IEVENT_BABT | IEVENT_LC | + IEVENT_CRL | IEVENT_FGPI | IEVENT_FIR | IEVENT_FIQ | + IEVENT_DPE | IEVENT_PERR | IEVENT_EBERR | IEVENT_TXE | + IEVENT_XFUN | IEVENT_BSY | IEVENT_MSRO | IEVENT_MMRD | + IEVENT_MMRW))) { + qemu_irq_lower(etsec->err_irq); + } +} + +static void write_dmactrl(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + reg->value = value; + + if (value & DMACTRL_GRS) { + + if (etsec->rx_buffer_len != 0) { + /* Graceful receive stop delayed until end of frame */ + } else { + /* Graceful receive stop now */ + etsec->regs[IEVENT].value |= IEVENT_GRSC; + if (etsec->regs[IMASK].value & IMASK_GRSCEN) { + qemu_irq_raise(etsec->err_irq); + } + } + } + + if (value & DMACTRL_GTS) { + + if (etsec->tx_buffer_len != 0) { + /* Graceful transmit stop delayed until end of frame */ + } else { + /* Graceful transmit stop now */ + etsec->regs[IEVENT].value |= IEVENT_GTSC; + if (etsec->regs[IMASK].value & IMASK_GTSCEN) { + qemu_irq_raise(etsec->err_irq); + } + } + } + + if (!(value & DMACTRL_WOP)) { + /* Start polling */ + ptimer_stop(etsec->ptimer); + ptimer_set_count(etsec->ptimer, 1); + ptimer_run(etsec->ptimer, 1); + } +} + +static void etsec_write(void *opaque, + hwaddr addr, + uint64_t value, + unsigned size) +{ + eTSEC *etsec = opaque; + uint32_t reg_index = addr / 4; + eTSEC_Register *reg = NULL; + uint32_t before = 0x0; + + assert(reg_index < ETSEC_REG_NUMBER); + + reg = &etsec->regs[reg_index]; + before = reg->value; + + switch (reg_index) { + case IEVENT: + write_ievent(etsec, reg, reg_index, value); + break; + + case DMACTRL: + write_dmactrl(etsec, reg, reg_index, value); + break; + + case TSTAT: + write_tstat(etsec, reg, reg_index, value); + break; + + case RSTAT: + write_rstat(etsec, reg, reg_index, value); + break; + + case TBASE0 ... TBASE7: + write_tbasex(etsec, reg, reg_index, value); + break; + + case RBASE0 ... RBASE7: + write_rbasex(etsec, reg, reg_index, value); + break; + + case MIIMCFG ... MIIMIND: + etsec_write_miim(etsec, reg, reg_index, value); + break; + + default: + /* Default handling */ + switch (reg->access) { + + case ACC_RW: + case ACC_WO: + reg->value = value; + break; + + case ACC_W1C: + reg->value &= ~value; + break; + + case ACC_RO: + default: + /* Read Only or Unknown register */ + break; + } + } + + DPRINTF("Write 0x%08x @ 0x" TARGET_FMT_plx + " val:0x%08x->0x%08x : %s (%s)\n", + (unsigned int)value, addr, before, reg->value, + reg->name, reg->desc); +} + +static const MemoryRegionOps etsec_ops = { + .read = etsec_read, + .write = etsec_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void etsec_timer_hit(void *opaque) +{ + eTSEC *etsec = opaque; + + ptimer_stop(etsec->ptimer); + + if (!(etsec->regs[DMACTRL].value & DMACTRL_WOP)) { + + if (!(etsec->regs[DMACTRL].value & DMACTRL_GTS)) { + etsec_walk_tx_ring(etsec, 0); + } + ptimer_set_count(etsec->ptimer, 1); + ptimer_run(etsec->ptimer, 1); + } +} + +static void etsec_reset(DeviceState *d) +{ + eTSEC *etsec = ETSEC_COMMON(d); + int i = 0; + int reg_index = 0; + + /* Default value for all registers */ + for (i = 0; i < ETSEC_REG_NUMBER; i++) { + etsec->regs[i].name = "Reserved"; + etsec->regs[i].desc = ""; + etsec->regs[i].access = ACC_UNKNOWN; + etsec->regs[i].value = 0x00000000; + } + + /* Set-up known registers */ + for (i = 0; eTSEC_registers_def[i].name != NULL; i++) { + + reg_index = eTSEC_registers_def[i].offset / 4; + + etsec->regs[reg_index].name = eTSEC_registers_def[i].name; + etsec->regs[reg_index].desc = eTSEC_registers_def[i].desc; + etsec->regs[reg_index].access = eTSEC_registers_def[i].access; + etsec->regs[reg_index].value = eTSEC_registers_def[i].reset; + } + + etsec->tx_buffer = NULL; + etsec->tx_buffer_len = 0; + etsec->rx_buffer = NULL; + etsec->rx_buffer_len = 0; + + etsec->phy_status = + MII_SR_EXTENDED_CAPS | MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | + MII_SR_AUTONEG_COMPLETE | MII_SR_PREAMBLE_SUPPRESS | + MII_SR_EXTENDED_STATUS | MII_SR_100T2_HD_CAPS | MII_SR_100T2_FD_CAPS | + MII_SR_10T_HD_CAPS | MII_SR_10T_FD_CAPS | MII_SR_100X_HD_CAPS | + MII_SR_100X_FD_CAPS | MII_SR_100T4_CAPS; +} + +static ssize_t etsec_receive(NetClientState *nc, + const uint8_t *buf, + size_t size) +{ + ssize_t ret; + eTSEC *etsec = qemu_get_nic_opaque(nc); + +#if defined(HEX_DUMP) + fprintf(stderr, "%s receive size:%d\n", etsec->nic->nc.name, size); + qemu_hexdump(buf, stderr, "", size); +#endif + /* Flush is unnecessary as are already in receiving path */ + etsec->need_flush = false; + ret = etsec_rx_ring_write(etsec, buf, size); + if (ret == 0) { + /* The packet will be queued, let's flush it when buffer is avilable + * again. */ + etsec->need_flush = true; + } + return ret; +} + + +static void etsec_set_link_status(NetClientState *nc) +{ + eTSEC *etsec = qemu_get_nic_opaque(nc); + + etsec_miim_link_status(etsec, nc); +} + +static NetClientInfo net_etsec_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = etsec_receive, + .link_status_changed = etsec_set_link_status, +}; + +static void etsec_realize(DeviceState *dev, Error **errp) +{ + eTSEC *etsec = ETSEC_COMMON(dev); + + etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf, + object_get_typename(OBJECT(dev)), dev->id, etsec); + qemu_format_nic_info_str(qemu_get_queue(etsec->nic), etsec->conf.macaddr.a); + + + etsec->bh = qemu_bh_new(etsec_timer_hit, etsec); + etsec->ptimer = ptimer_init(etsec->bh); + ptimer_set_freq(etsec->ptimer, 100); +} + +static void etsec_instance_init(Object *obj) +{ + eTSEC *etsec = ETSEC_COMMON(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&etsec->io_area, OBJECT(etsec), &etsec_ops, etsec, + "eTSEC", 0x1000); + sysbus_init_mmio(sbd, &etsec->io_area); + + sysbus_init_irq(sbd, &etsec->tx_irq); + sysbus_init_irq(sbd, &etsec->rx_irq); + sysbus_init_irq(sbd, &etsec->err_irq); +} + +static Property etsec_properties[] = { + DEFINE_NIC_PROPERTIES(eTSEC, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void etsec_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = etsec_realize; + dc->reset = etsec_reset; + dc->props = etsec_properties; +} + +static TypeInfo etsec_info = { + .name = "eTSEC", + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(eTSEC), + .class_init = etsec_class_init, + .instance_init = etsec_instance_init, +}; + +static void etsec_register_types(void) +{ + type_register_static(&etsec_info); +} + +type_init(etsec_register_types) + +DeviceState *etsec_create(hwaddr base, + MemoryRegion * mr, + NICInfo * nd, + qemu_irq tx_irq, + qemu_irq rx_irq, + qemu_irq err_irq) +{ + DeviceState *dev; + + dev = qdev_create(NULL, "eTSEC"); + qdev_set_nic_properties(dev, nd); + qdev_init_nofail(dev); + + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, tx_irq); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, rx_irq); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, err_irq); + + memory_region_add_subregion(mr, base, + SYS_BUS_DEVICE(dev)->mmio[0].memory); + + return dev; +} diff --git a/qemu/hw/net/fsl_etsec/etsec.h b/qemu/hw/net/fsl_etsec/etsec.h new file mode 100644 index 000000000..e7dc0a4b9 --- /dev/null +++ b/qemu/hw/net/fsl_etsec/etsec.h @@ -0,0 +1,176 @@ +/* + * QEMU Freescale eTSEC Emulator + * + * Copyright (c) 2011-2013 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef _ETSEC_H_ +#define _ETSEC_H_ + +#include "hw/qdev.h" +#include "hw/sysbus.h" +#include "net/net.h" +#include "hw/ptimer.h" + +/* Buffer Descriptors */ + +typedef struct eTSEC_rxtx_bd { + uint16_t flags; + uint16_t length; + uint32_t bufptr; +} eTSEC_rxtx_bd; + +#define BD_WRAP (1 << 13) +#define BD_INTERRUPT (1 << 12) +#define BD_LAST (1 << 11) + +#define BD_TX_READY (1 << 15) +#define BD_TX_PADCRC (1 << 14) +#define BD_TX_TC (1 << 10) +#define BD_TX_PREDEF (1 << 9) +#define BD_TX_HFELC (1 << 7) +#define BD_TX_CFRL (1 << 6) +#define BD_TX_RC_MASK 0xF +#define BD_TX_RC_OFFSET 0x2 +#define BD_TX_TOEUN (1 << 1) +#define BD_TX_TR (1 << 0) + +#define BD_RX_EMPTY (1 << 15) +#define BD_RX_RO1 (1 << 14) +#define BD_RX_FIRST (1 << 10) +#define BD_RX_MISS (1 << 8) +#define BD_RX_BROADCAST (1 << 7) +#define BD_RX_MULTICAST (1 << 6) +#define BD_RX_LG (1 << 5) +#define BD_RX_NO (1 << 4) +#define BD_RX_SH (1 << 3) +#define BD_RX_CR (1 << 2) +#define BD_RX_OV (1 << 1) +#define BD_RX_TR (1 << 0) + +/* Tx FCB flags */ +#define FCB_TX_VLN (1 << 7) +#define FCB_TX_IP (1 << 6) +#define FCB_TX_IP6 (1 << 5) +#define FCB_TX_TUP (1 << 4) +#define FCB_TX_UDP (1 << 3) +#define FCB_TX_CIP (1 << 2) +#define FCB_TX_CTU (1 << 1) +#define FCB_TX_NPH (1 << 0) + +/* PHY Status Register */ +#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ +#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ +#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ +#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ +#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ +#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ +#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ +#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ +#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ +#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ +#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ +#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ +#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ +#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ +#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ + +/* eTSEC */ + +/* Number of register in the device */ +#define ETSEC_REG_NUMBER 1024 + +typedef struct eTSEC_Register { + const char *name; + const char *desc; + uint32_t access; + uint32_t value; +} eTSEC_Register; + +typedef struct eTSEC { + SysBusDevice busdev; + + MemoryRegion io_area; + + eTSEC_Register regs[ETSEC_REG_NUMBER]; + + NICState *nic; + NICConf conf; + + /* Tx */ + + uint8_t *tx_buffer; + uint32_t tx_buffer_len; + eTSEC_rxtx_bd first_bd; + + /* Rx */ + + uint8_t *rx_buffer; + uint32_t rx_buffer_len; + uint32_t rx_remaining_data; + uint8_t rx_first_in_frame; + uint8_t rx_fcb_size; + eTSEC_rxtx_bd rx_first_bd; + uint8_t rx_fcb[10]; + uint32_t rx_padding; + + /* IRQs */ + qemu_irq tx_irq; + qemu_irq rx_irq; + qemu_irq err_irq; + + + uint16_t phy_status; + uint16_t phy_control; + + /* Polling */ + QEMUBH *bh; + struct ptimer_state *ptimer; + + /* Whether we should flush the rx queue when buffer becomes available. */ + bool need_flush; +} eTSEC; + +#define TYPE_ETSEC_COMMON "eTSEC" +#define ETSEC_COMMON(obj) \ + OBJECT_CHECK(eTSEC, (obj), TYPE_ETSEC_COMMON) + +#define eTSEC_TRANSMIT 1 +#define eTSEC_RECEIVE 2 + +DeviceState *etsec_create(hwaddr base, + MemoryRegion *mr, + NICInfo *nd, + qemu_irq tx_irq, + qemu_irq rx_irq, + qemu_irq err_irq); + +void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr); +void etsec_walk_rx_ring(eTSEC *etsec, int ring_nbr); +ssize_t etsec_rx_ring_write(eTSEC *etsec, const uint8_t *buf, size_t size); + +void etsec_write_miim(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value); + +void etsec_miim_link_status(eTSEC *etsec, NetClientState *nc); + +#endif /* ! _ETSEC_H_ */ diff --git a/qemu/hw/net/fsl_etsec/miim.c b/qemu/hw/net/fsl_etsec/miim.c new file mode 100644 index 000000000..1931b74e6 --- /dev/null +++ b/qemu/hw/net/fsl_etsec/miim.c @@ -0,0 +1,146 @@ +/* + * QEMU Freescale eTSEC Emulator + * + * Copyright (c) 2011-2013 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "etsec.h" +#include "registers.h" + +/* #define DEBUG_MIIM */ + +#define MIIM_CONTROL 0 +#define MIIM_STATUS 1 +#define MIIM_PHY_ID_1 2 +#define MIIM_PHY_ID_2 3 +#define MIIM_T2_STATUS 10 +#define MIIM_EXT_STATUS 15 + +static void miim_read_cycle(eTSEC *etsec) +{ + uint8_t phy; + uint8_t addr; + uint16_t value; + + phy = (etsec->regs[MIIMADD].value >> 8) & 0x1F; + (void)phy; /* Unreferenced */ + addr = etsec->regs[MIIMADD].value & 0x1F; + + switch (addr) { + case MIIM_CONTROL: + value = etsec->phy_control; + break; + case MIIM_STATUS: + value = etsec->phy_status; + break; + case MIIM_T2_STATUS: + value = 0x1800; /* Local and remote receivers OK */ + break; + default: + value = 0x0; + break; + }; + +#ifdef DEBUG_MIIM + qemu_log("%s phy:%d addr:0x%x value:0x%x\n", __func__, phy, addr, value); +#endif + + etsec->regs[MIIMSTAT].value = value; +} + +static void miim_write_cycle(eTSEC *etsec) +{ + uint8_t phy; + uint8_t addr; + uint16_t value; + + phy = (etsec->regs[MIIMADD].value >> 8) & 0x1F; + (void)phy; /* Unreferenced */ + addr = etsec->regs[MIIMADD].value & 0x1F; + value = etsec->regs[MIIMCON].value & 0xffff; + +#ifdef DEBUG_MIIM + qemu_log("%s phy:%d addr:0x%x value:0x%x\n", __func__, phy, addr, value); +#endif + + switch (addr) { + case MIIM_CONTROL: + etsec->phy_control = value & ~(0x8100); + break; + default: + break; + }; +} + +void etsec_write_miim(eTSEC *etsec, + eTSEC_Register *reg, + uint32_t reg_index, + uint32_t value) +{ + + switch (reg_index) { + + case MIIMCOM: + /* Read and scan cycle */ + + if ((!(reg->value & MIIMCOM_READ)) && (value & MIIMCOM_READ)) { + /* Read */ + miim_read_cycle(etsec); + } + reg->value = value; + break; + + case MIIMCON: + reg->value = value & 0xffff; + miim_write_cycle(etsec); + break; + + default: + /* Default handling */ + switch (reg->access) { + + case ACC_RW: + case ACC_WO: + reg->value = value; + break; + + case ACC_W1C: + reg->value &= ~value; + break; + + case ACC_RO: + default: + /* Read Only or Unknown register */ + break; + } + } + +} + +void etsec_miim_link_status(eTSEC *etsec, NetClientState *nc) +{ + /* Set link status */ + if (nc->link_down) { + etsec->phy_status &= ~MII_SR_LINK_STATUS; + } else { + etsec->phy_status |= MII_SR_LINK_STATUS; + } +} diff --git a/qemu/hw/net/fsl_etsec/registers.c b/qemu/hw/net/fsl_etsec/registers.c new file mode 100644 index 000000000..a7bbfa113 --- /dev/null +++ b/qemu/hw/net/fsl_etsec/registers.c @@ -0,0 +1,295 @@ +/* + * QEMU Freescale eTSEC Emulator + * + * Copyright (c) 2011-2013 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "registers.h" + +const eTSEC_Register_Definition eTSEC_registers_def[] = { +{0x000, "TSEC_ID", "Controller ID register", ACC_RO, 0x01240000}, +{0x004, "TSEC_ID2", "Controller ID register 2", ACC_RO, 0x003000F0}, +{0x010, "IEVENT", "Interrupt event register", ACC_W1C, 0x00000000}, +{0x014, "IMASK", "Interrupt mask register", ACC_RW, 0x00000000}, +{0x018, "EDIS", "Error disabled register", ACC_RW, 0x00000000}, +{0x020, "ECNTRL", "Ethernet control register", ACC_RW, 0x00000040}, +{0x028, "PTV", "Pause time value register", ACC_RW, 0x00000000}, +{0x02C, "DMACTRL", "DMA control register", ACC_RW, 0x00000000}, +{0x030, "TBIPA", "TBI PHY address register", ACC_RW, 0x00000000}, + +/* eTSEC FIFO Control and Status Registers */ + +{0x058, "FIFO_RX_ALARM", "FIFO receive alarm start threshold register", ACC_RW, 0x00000040}, +{0x05C, "FIFO_RX_ALARM_SHUTOFF", "FIFO receive alarm shut-off threshold register", ACC_RW, 0x00000080}, +{0x08C, "FIFO_TX_THR", "FIFO transmit threshold register", ACC_RW, 0x00000080}, +{0x098, "FIFO_TX_STARVE", "FIFO transmit starve register", ACC_RW, 0x00000040}, +{0x09C, "FIFO_TX_STARVE_SHUTOFF", "FIFO transmit starve shut-off register", ACC_RW, 0x00000080}, + +/* eTSEC Transmit Control and Status Registers */ + +{0x100, "TCTRL", "Transmit control register", ACC_RW, 0x00000000}, +{0x104, "TSTAT", "Transmit status register", ACC_W1C, 0x00000000}, +{0x108, "DFVLAN", "Default VLAN control word", ACC_RW, 0x81000000}, +{0x110, "TXIC", "Transmit interrupt coalescing register", ACC_RW, 0x00000000}, +{0x114, "TQUEUE", "Transmit queue control register", ACC_RW, 0x00008000}, +{0x140, "TR03WT", "TxBD Rings 0-3 round-robin weightings", ACC_RW, 0x00000000}, +{0x144, "TR47WT", "TxBD Rings 4-7 round-robin weightings", ACC_RW, 0x00000000}, +{0x180, "TBDBPH", "Tx data buffer pointer high bits", ACC_RW, 0x00000000}, +{0x184, "TBPTR0", "TxBD pointer for ring 0", ACC_RW, 0x00000000}, +{0x18C, "TBPTR1", "TxBD pointer for ring 1", ACC_RW, 0x00000000}, +{0x194, "TBPTR2", "TxBD pointer for ring 2", ACC_RW, 0x00000000}, +{0x19C, "TBPTR3", "TxBD pointer for ring 3", ACC_RW, 0x00000000}, +{0x1A4, "TBPTR4", "TxBD pointer for ring 4", ACC_RW, 0x00000000}, +{0x1AC, "TBPTR5", "TxBD pointer for ring 5", ACC_RW, 0x00000000}, +{0x1B4, "TBPTR6", "TxBD pointer for ring 6", ACC_RW, 0x00000000}, +{0x1BC, "TBPTR7", "TxBD pointer for ring 7", ACC_RW, 0x00000000}, +{0x200, "TBASEH", "TxBD base address high bits", ACC_RW, 0x00000000}, +{0x204, "TBASE0", "TxBD base address of ring 0", ACC_RW, 0x00000000}, +{0x20C, "TBASE1", "TxBD base address of ring 1", ACC_RW, 0x00000000}, +{0x214, "TBASE2", "TxBD base address of ring 2", ACC_RW, 0x00000000}, +{0x21C, "TBASE3", "TxBD base address of ring 3", ACC_RW, 0x00000000}, +{0x224, "TBASE4", "TxBD base address of ring 4", ACC_RW, 0x00000000}, +{0x22C, "TBASE5", "TxBD base address of ring 5", ACC_RW, 0x00000000}, +{0x234, "TBASE6", "TxBD base address of ring 6", ACC_RW, 0x00000000}, +{0x23C, "TBASE7", "TxBD base address of ring 7", ACC_RW, 0x00000000}, +{0x280, "TMR_TXTS1_ID", "Tx time stamp identification tag (set 1)", ACC_RO, 0x00000000}, +{0x284, "TMR_TXTS2_ID", "Tx time stamp identification tag (set 2)", ACC_RO, 0x00000000}, +{0x2C0, "TMR_TXTS1_H", "Tx time stamp high (set 1)", ACC_RO, 0x00000000}, +{0x2C4, "TMR_TXTS1_L", "Tx time stamp high (set 1)", ACC_RO, 0x00000000}, +{0x2C8, "TMR_TXTS2_H", "Tx time stamp high (set 2)", ACC_RO, 0x00000000}, +{0x2CC, "TMR_TXTS2_L", "Tx time stamp high (set 2)", ACC_RO, 0x00000000}, + +/* eTSEC Receive Control and Status Registers */ + +{0x300, "RCTRL", "Receive control register", ACC_RW, 0x00000000}, +{0x304, "RSTAT", "Receive status register", ACC_W1C, 0x00000000}, +{0x310, "RXIC", "Receive interrupt coalescing register", ACC_RW, 0x00000000}, +{0x314, "RQUEUE", "Receive queue control register.", ACC_RW, 0x00800080}, +{0x330, "RBIFX", "Receive bit field extract control register", ACC_RW, 0x00000000}, +{0x334, "RQFAR", "Receive queue filing table address register", ACC_RW, 0x00000000}, +{0x338, "RQFCR", "Receive queue filing table control register", ACC_RW, 0x00000000}, +{0x33C, "RQFPR", "Receive queue filing table property register", ACC_RW, 0x00000000}, +{0x340, "MRBLR", "Maximum receive buffer length register", ACC_RW, 0x00000000}, +{0x380, "RBDBPH", "Rx data buffer pointer high bits", ACC_RW, 0x00000000}, +{0x384, "RBPTR0", "RxBD pointer for ring 0", ACC_RW, 0x00000000}, +{0x38C, "RBPTR1", "RxBD pointer for ring 1", ACC_RW, 0x00000000}, +{0x394, "RBPTR2", "RxBD pointer for ring 2", ACC_RW, 0x00000000}, +{0x39C, "RBPTR3", "RxBD pointer for ring 3", ACC_RW, 0x00000000}, +{0x3A4, "RBPTR4", "RxBD pointer for ring 4", ACC_RW, 0x00000000}, +{0x3AC, "RBPTR5", "RxBD pointer for ring 5", ACC_RW, 0x00000000}, +{0x3B4, "RBPTR6", "RxBD pointer for ring 6", ACC_RW, 0x00000000}, +{0x3BC, "RBPTR7", "RxBD pointer for ring 7", ACC_RW, 0x00000000}, +{0x400, "RBASEH", "RxBD base address high bits", ACC_RW, 0x00000000}, +{0x404, "RBASE0", "RxBD base address of ring 0", ACC_RW, 0x00000000}, +{0x40C, "RBASE1", "RxBD base address of ring 1", ACC_RW, 0x00000000}, +{0x414, "RBASE2", "RxBD base address of ring 2", ACC_RW, 0x00000000}, +{0x41C, "RBASE3", "RxBD base address of ring 3", ACC_RW, 0x00000000}, +{0x424, "RBASE4", "RxBD base address of ring 4", ACC_RW, 0x00000000}, +{0x42C, "RBASE5", "RxBD base address of ring 5", ACC_RW, 0x00000000}, +{0x434, "RBASE6", "RxBD base address of ring 6", ACC_RW, 0x00000000}, +{0x43C, "RBASE7", "RxBD base address of ring 7", ACC_RW, 0x00000000}, +{0x4C0, "TMR_RXTS_H", "Rx timer time stamp register high", ACC_RW, 0x00000000}, +{0x4C4, "TMR_RXTS_L", "Rx timer time stamp register low", ACC_RW, 0x00000000}, + +/* eTSEC MAC Registers */ + +{0x500, "MACCFG1", "MAC configuration register 1", ACC_RW, 0x00000000}, +{0x504, "MACCFG2", "MAC configuration register 2", ACC_RW, 0x00007000}, +{0x508, "IPGIFG", "Inter-packet/inter-frame gap register", ACC_RW, 0x40605060}, +{0x50C, "HAFDUP", "Half-duplex control", ACC_RW, 0x00A1F037}, +{0x510, "MAXFRM", "Maximum frame length", ACC_RW, 0x00000600}, +{0x520, "MIIMCFG", "MII management configuration", ACC_RW, 0x00000007}, +{0x524, "MIIMCOM", "MII management command", ACC_RW, 0x00000000}, +{0x528, "MIIMADD", "MII management address", ACC_RW, 0x00000000}, +{0x52C, "MIIMCON", "MII management control", ACC_WO, 0x00000000}, +{0x530, "MIIMSTAT", "MII management status", ACC_RO, 0x00000000}, +{0x534, "MIIMIND", "MII management indicator", ACC_RO, 0x00000000}, +{0x53C, "IFSTAT", "Interface status", ACC_RO, 0x00000000}, +{0x540, "MACSTNADDR1", "MAC station address register 1", ACC_RW, 0x00000000}, +{0x544, "MACSTNADDR2", "MAC station address register 2", ACC_RW, 0x00000000}, +{0x548, "MAC01ADDR1", "MAC exact match address 1, part 1", ACC_RW, 0x00000000}, +{0x54C, "MAC01ADDR2", "MAC exact match address 1, part 2", ACC_RW, 0x00000000}, +{0x550, "MAC02ADDR1", "MAC exact match address 2, part 1", ACC_RW, 0x00000000}, +{0x554, "MAC02ADDR2", "MAC exact match address 2, part 2", ACC_RW, 0x00000000}, +{0x558, "MAC03ADDR1", "MAC exact match address 3, part 1", ACC_RW, 0x00000000}, +{0x55C, "MAC03ADDR2", "MAC exact match address 3, part 2", ACC_RW, 0x00000000}, +{0x560, "MAC04ADDR1", "MAC exact match address 4, part 1", ACC_RW, 0x00000000}, +{0x564, "MAC04ADDR2", "MAC exact match address 4, part 2", ACC_RW, 0x00000000}, +{0x568, "MAC05ADDR1", "MAC exact match address 5, part 1", ACC_RW, 0x00000000}, +{0x56C, "MAC05ADDR2", "MAC exact match address 5, part 2", ACC_RW, 0x00000000}, +{0x570, "MAC06ADDR1", "MAC exact match address 6, part 1", ACC_RW, 0x00000000}, +{0x574, "MAC06ADDR2", "MAC exact match address 6, part 2", ACC_RW, 0x00000000}, +{0x578, "MAC07ADDR1", "MAC exact match address 7, part 1", ACC_RW, 0x00000000}, +{0x57C, "MAC07ADDR2", "MAC exact match address 7, part 2", ACC_RW, 0x00000000}, +{0x580, "MAC08ADDR1", "MAC exact match address 8, part 1", ACC_RW, 0x00000000}, +{0x584, "MAC08ADDR2", "MAC exact match address 8, part 2", ACC_RW, 0x00000000}, +{0x588, "MAC09ADDR1", "MAC exact match address 9, part 1", ACC_RW, 0x00000000}, +{0x58C, "MAC09ADDR2", "MAC exact match address 9, part 2", ACC_RW, 0x00000000}, +{0x590, "MAC10ADDR1", "MAC exact match address 10, part 1", ACC_RW, 0x00000000}, +{0x594, "MAC10ADDR2", "MAC exact match address 10, part 2", ACC_RW, 0x00000000}, +{0x598, "MAC11ADDR1", "MAC exact match address 11, part 1", ACC_RW, 0x00000000}, +{0x59C, "MAC11ADDR2", "MAC exact match address 11, part 2", ACC_RW, 0x00000000}, +{0x5A0, "MAC12ADDR1", "MAC exact match address 12, part 1", ACC_RW, 0x00000000}, +{0x5A4, "MAC12ADDR2", "MAC exact match address 12, part 2", ACC_RW, 0x00000000}, +{0x5A8, "MAC13ADDR1", "MAC exact match address 13, part 1", ACC_RW, 0x00000000}, +{0x5AC, "MAC13ADDR2", "MAC exact match address 13, part 2", ACC_RW, 0x00000000}, +{0x5B0, "MAC14ADDR1", "MAC exact match address 14, part 1", ACC_RW, 0x00000000}, +{0x5B4, "MAC14ADDR2", "MAC exact match address 14, part 2", ACC_RW, 0x00000000}, +{0x5B8, "MAC15ADDR1", "MAC exact match address 15, part 1", ACC_RW, 0x00000000}, +{0x5BC, "MAC15ADDR2", "MAC exact match address 15, part 2", ACC_RW, 0x00000000}, + +/* eTSEC, "Transmit", "and", Receive, Counters */ + +{0x680, "TR64", "Transmit and receive 64-byte frame counter ", ACC_RW, 0x00000000}, +{0x684, "TR127", "Transmit and receive 65- to 127-byte frame counter", ACC_RW, 0x00000000}, +{0x688, "TR255", "Transmit and receive 128- to 255-byte frame counter", ACC_RW, 0x00000000}, +{0x68C, "TR511", "Transmit and receive 256- to 511-byte frame counter", ACC_RW, 0x00000000}, +{0x690, "TR1K", "Transmit and receive 512- to 1023-byte frame counter", ACC_RW, 0x00000000}, +{0x694, "TRMAX", "Transmit and receive 1024- to 1518-byte frame counter", ACC_RW, 0x00000000}, +{0x698, "TRMGV", "Transmit and receive 1519- to 1522-byte good VLAN frame count", ACC_RW, 0x00000000}, + +/* eTSEC Receive Counters */ + +{0x69C, "RBYT", "Receive byte counter", ACC_RW, 0x00000000}, +{0x6A0, "RPKT", "Receive packet counter", ACC_RW, 0x00000000}, +{0x6A4, "RFCS", "Receive FCS error counter", ACC_RW, 0x00000000}, +{0x6A8, "RMCA", "Receive multicast packet counter", ACC_RW, 0x00000000}, +{0x6AC, "RBCA", "Receive broadcast packet counter", ACC_RW, 0x00000000}, +{0x6B0, "RXCF", "Receive control frame packet counter ", ACC_RW, 0x00000000}, +{0x6B4, "RXPF", "Receive PAUSE frame packet counter", ACC_RW, 0x00000000}, +{0x6B8, "RXUO", "Receive unknown OP code counter ", ACC_RW, 0x00000000}, +{0x6BC, "RALN", "Receive alignment error counter ", ACC_RW, 0x00000000}, +{0x6C0, "RFLR", "Receive frame length error counter ", ACC_RW, 0x00000000}, +{0x6C4, "RCDE", "Receive code error counter ", ACC_RW, 0x00000000}, +{0x6C8, "RCSE", "Receive carrier sense error counter", ACC_RW, 0x00000000}, +{0x6CC, "RUND", "Receive undersize packet counter", ACC_RW, 0x00000000}, +{0x6D0, "ROVR", "Receive oversize packet counter ", ACC_RW, 0x00000000}, +{0x6D4, "RFRG", "Receive fragments counter", ACC_RW, 0x00000000}, +{0x6D8, "RJBR", "Receive jabber counter ", ACC_RW, 0x00000000}, +{0x6DC, "RDRP", "Receive drop counter", ACC_RW, 0x00000000}, + +/* eTSEC Transmit Counters */ + +{0x6E0, "TBYT", "Transmit byte counter", ACC_RW, 0x00000000}, +{0x6E4, "TPKT", "Transmit packet counter", ACC_RW, 0x00000000}, +{0x6E8, "TMCA", "Transmit multicast packet counter ", ACC_RW, 0x00000000}, +{0x6EC, "TBCA", "Transmit broadcast packet counter ", ACC_RW, 0x00000000}, +{0x6F0, "TXPF", "Transmit PAUSE control frame counter ", ACC_RW, 0x00000000}, +{0x6F4, "TDFR", "Transmit deferral packet counter ", ACC_RW, 0x00000000}, +{0x6F8, "TEDF", "Transmit excessive deferral packet counter ", ACC_RW, 0x00000000}, +{0x6FC, "TSCL", "Transmit single collision packet counter", ACC_RW, 0x00000000}, +{0x700, "TMCL", "Transmit multiple collision packet counter", ACC_RW, 0x00000000}, +{0x704, "TLCL", "Transmit late collision packet counter", ACC_RW, 0x00000000}, +{0x708, "TXCL", "Transmit excessive collision packet counter", ACC_RW, 0x00000000}, +{0x70C, "TNCL", "Transmit total collision counter ", ACC_RW, 0x00000000}, +{0x714, "TDRP", "Transmit drop frame counter", ACC_RW, 0x00000000}, +{0x718, "TJBR", "Transmit jabber frame counter ", ACC_RW, 0x00000000}, +{0x71C, "TFCS", "Transmit FCS error counter", ACC_RW, 0x00000000}, +{0x720, "TXCF", "Transmit control frame counter ", ACC_RW, 0x00000000}, +{0x724, "TOVR", "Transmit oversize frame counter", ACC_RW, 0x00000000}, +{0x728, "TUND", "Transmit undersize frame counter ", ACC_RW, 0x00000000}, +{0x72C, "TFRG", "Transmit fragments frame counter ", ACC_RW, 0x00000000}, + +/* eTSEC Counter Control and TOE Statistics Registers */ + +{0x730, "CAR1", "Carry register one register", ACC_W1C, 0x00000000}, +{0x734, "CAR2", "Carry register two register ", ACC_W1C, 0x00000000}, +{0x738, "CAM1", "Carry register one mask register ", ACC_RW, 0xFE03FFFF}, +{0x73C, "CAM2", "Carry register two mask register ", ACC_RW, 0x000FFFFD}, +{0x740, "RREJ", "Receive filer rejected packet counter", ACC_RW, 0x00000000}, + +/* Hash Function Registers */ + +{0x800, "IGADDR0", "Individual/group address register 0", ACC_RW, 0x00000000}, +{0x804, "IGADDR1", "Individual/group address register 1", ACC_RW, 0x00000000}, +{0x808, "IGADDR2", "Individual/group address register 2", ACC_RW, 0x00000000}, +{0x80C, "IGADDR3", "Individual/group address register 3", ACC_RW, 0x00000000}, +{0x810, "IGADDR4", "Individual/group address register 4", ACC_RW, 0x00000000}, +{0x814, "IGADDR5", "Individual/group address register 5", ACC_RW, 0x00000000}, +{0x818, "IGADDR6", "Individual/group address register 6", ACC_RW, 0x00000000}, +{0x81C, "IGADDR7", "Individual/group address register 7", ACC_RW, 0x00000000}, +{0x880, "GADDR0", "Group address register 0", ACC_RW, 0x00000000}, +{0x884, "GADDR1", "Group address register 1", ACC_RW, 0x00000000}, +{0x888, "GADDR2", "Group address register 2", ACC_RW, 0x00000000}, +{0x88C, "GADDR3", "Group address register 3", ACC_RW, 0x00000000}, +{0x890, "GADDR4", "Group address register 4", ACC_RW, 0x00000000}, +{0x894, "GADDR5", "Group address register 5", ACC_RW, 0x00000000}, +{0x898, "GADDR6", "Group address register 6", ACC_RW, 0x00000000}, +{0x89C, "GADDR7", "Group address register 7", ACC_RW, 0x00000000}, + +/* eTSEC DMA Attribute Registers */ + +{0xBF8, "ATTR", "Attribute register", ACC_RW, 0x00000000}, +{0xBFC, "ATTRELI", "Attribute extract length and extract index register", ACC_RW, 0x00000000}, + + +/* eTSEC Lossless Flow Control Registers */ + +{0xC00, "RQPRM0", "Receive Queue Parameters register 0 ", ACC_RW, 0x00000000}, +{0xC04, "RQPRM1", "Receive Queue Parameters register 1 ", ACC_RW, 0x00000000}, +{0xC08, "RQPRM2", "Receive Queue Parameters register 2 ", ACC_RW, 0x00000000}, +{0xC0C, "RQPRM3", "Receive Queue Parameters register 3 ", ACC_RW, 0x00000000}, +{0xC10, "RQPRM4", "Receive Queue Parameters register 4 ", ACC_RW, 0x00000000}, +{0xC14, "RQPRM5", "Receive Queue Parameters register 5 ", ACC_RW, 0x00000000}, +{0xC18, "RQPRM6", "Receive Queue Parameters register 6 ", ACC_RW, 0x00000000}, +{0xC1C, "RQPRM7", "Receive Queue Parameters register 7 ", ACC_RW, 0x00000000}, +{0xC44, "RFBPTR0", "Last Free RxBD pointer for ring 0", ACC_RW, 0x00000000}, +{0xC4C, "RFBPTR1", "Last Free RxBD pointer for ring 1", ACC_RW, 0x00000000}, +{0xC54, "RFBPTR2", "Last Free RxBD pointer for ring 2", ACC_RW, 0x00000000}, +{0xC5C, "RFBPTR3", "Last Free RxBD pointer for ring 3", ACC_RW, 0x00000000}, +{0xC64, "RFBPTR4", "Last Free RxBD pointer for ring 4", ACC_RW, 0x00000000}, +{0xC6C, "RFBPTR5", "Last Free RxBD pointer for ring 5", ACC_RW, 0x00000000}, +{0xC74, "RFBPTR6", "Last Free RxBD pointer for ring 6", ACC_RW, 0x00000000}, +{0xC7C, "RFBPTR7", "Last Free RxBD pointer for ring 7", ACC_RW, 0x00000000}, + +/* eTSEC Future Expansion Space */ + +/* Reserved*/ + +/* eTSEC IEEE 1588 Registers */ + +{0xE00, "TMR_CTRL", "Timer control register", ACC_RW, 0x00010001}, +{0xE04, "TMR_TEVENT", "time stamp event register", ACC_W1C, 0x00000000}, +{0xE08, "TMR_TEMASK", "Timer event mask register", ACC_RW, 0x00000000}, +{0xE0C, "TMR_PEVENT", "time stamp event register", ACC_RW, 0x00000000}, +{0xE10, "TMR_PEMASK", "Timer event mask register", ACC_RW, 0x00000000}, +{0xE14, "TMR_STAT", "time stamp status register", ACC_RW, 0x00000000}, +{0xE18, "TMR_CNT_H", "timer counter high register", ACC_RW, 0x00000000}, +{0xE1C, "TMR_CNT_L", "timer counter low register", ACC_RW, 0x00000000}, +{0xE20, "TMR_ADD", "Timer drift compensation addend register", ACC_RW, 0x00000000}, +{0xE24, "TMR_ACC", "Timer accumulator register", ACC_RW, 0x00000000}, +{0xE28, "TMR_PRSC", "Timer prescale", ACC_RW, 0x00000002}, +{0xE30, "TMROFF_H", "Timer offset high", ACC_RW, 0x00000000}, +{0xE34, "TMROFF_L", "Timer offset low", ACC_RW, 0x00000000}, +{0xE40, "TMR_ALARM1_H", "Timer alarm 1 high register", ACC_RW, 0xFFFFFFFF}, +{0xE44, "TMR_ALARM1_L", "Timer alarm 1 high register", ACC_RW, 0xFFFFFFFF}, +{0xE48, "TMR_ALARM2_H", "Timer alarm 2 high register", ACC_RW, 0xFFFFFFFF}, +{0xE4C, "TMR_ALARM2_L", "Timer alarm 2 high register", ACC_RW, 0xFFFFFFFF}, +{0xE80, "TMR_FIPER1", "Timer fixed period interval", ACC_RW, 0xFFFFFFFF}, +{0xE84, "TMR_FIPER2", "Timer fixed period interval", ACC_RW, 0xFFFFFFFF}, +{0xE88, "TMR_FIPER3", "Timer fixed period interval", ACC_RW, 0xFFFFFFFF}, +{0xEA0, "TMR_ETTS1_H", "Time stamp of general purpose external trigger ", ACC_RW, 0x00000000}, +{0xEA4, "TMR_ETTS1_L", "Time stamp of general purpose external trigger", ACC_RW, 0x00000000}, +{0xEA8, "TMR_ETTS2_H", "Time stamp of general purpose external trigger ", ACC_RW, 0x00000000}, +{0xEAC, "TMR_ETTS2_L", "Time stamp of general purpose external trigger", ACC_RW, 0x00000000}, + +/* End Of Table */ +{0x0, 0x0, 0x0, 0x0, 0x0} +}; diff --git a/qemu/hw/net/fsl_etsec/registers.h b/qemu/hw/net/fsl_etsec/registers.h new file mode 100644 index 000000000..7ad768647 --- /dev/null +++ b/qemu/hw/net/fsl_etsec/registers.h @@ -0,0 +1,320 @@ +/* + * QEMU Freescale eTSEC Emulator + * + * Copyright (c) 2011-2013 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef _ETSEC_REGISTERS_H_ +#define _ETSEC_REGISTERS_H_ + +#include <stdint.h> + +enum eTSEC_Register_Access_Type { + ACC_RW = 1, /* Read/Write */ + ACC_RO = 2, /* Read Only */ + ACC_WO = 3, /* Write Only */ + ACC_W1C = 4, /* Write 1 to clear */ + ACC_UNKNOWN = 5 /* Unknown register*/ +}; + +typedef struct eTSEC_Register_Definition { + uint32_t offset; + const char *name; + const char *desc; + enum eTSEC_Register_Access_Type access; + uint32_t reset; +} eTSEC_Register_Definition; + +extern const eTSEC_Register_Definition eTSEC_registers_def[]; + +#define DMACTRL_LE (1 << 15) +#define DMACTRL_GRS (1 << 4) +#define DMACTRL_GTS (1 << 3) +#define DMACTRL_WOP (1 << 0) + +#define IEVENT_PERR (1 << 0) +#define IEVENT_DPE (1 << 1) +#define IEVENT_FIQ (1 << 2) +#define IEVENT_FIR (1 << 3) +#define IEVENT_FGPI (1 << 4) +#define IEVENT_RXF (1 << 7) +#define IEVENT_GRSC (1 << 8) +#define IEVENT_MMRW (1 << 9) +#define IEVENT_MMRD (1 << 10) +#define IEVENT_MAG (1 << 11) +#define IEVENT_RXB (1 << 15) +#define IEVENT_XFUN (1 << 16) +#define IEVENT_CRL (1 << 17) +#define IEVENT_LC (1 << 18) +#define IEVENT_TXF (1 << 20) +#define IEVENT_TXB (1 << 21) +#define IEVENT_TXE (1 << 22) +#define IEVENT_TXC (1 << 23) +#define IEVENT_BABT (1 << 24) +#define IEVENT_GTSC (1 << 25) +#define IEVENT_MSRO (1 << 26) +#define IEVENT_EBERR (1 << 28) +#define IEVENT_BSY (1 << 29) +#define IEVENT_RXC (1 << 30) +#define IEVENT_BABR (1 << 31) + +#define IMASK_RXFEN (1 << 7) +#define IMASK_GRSCEN (1 << 8) +#define IMASK_RXBEN (1 << 15) +#define IMASK_TXFEN (1 << 20) +#define IMASK_TXBEN (1 << 21) +#define IMASK_GTSCEN (1 << 25) + +#define MACCFG1_TX_EN (1 << 0) +#define MACCFG1_RX_EN (1 << 2) + +#define MACCFG2_CRC_EN (1 << 1) +#define MACCFG2_PADCRC (1 << 2) + +#define MIIMCOM_READ (1 << 0) +#define MIIMCOM_SCAN (1 << 1) + +#define RCTRL_PRSDEP_MASK (0x3) +#define RCTRL_PRSDEP_OFFSET (6) +#define RCTRL_RSF (1 << 2) + +/* Index of each register */ + +#define TSEC_ID (0x000 / 4) +#define TSEC_ID2 (0x004 / 4) +#define IEVENT (0x010 / 4) +#define IMASK (0x014 / 4) +#define EDIS (0x018 / 4) +#define ECNTRL (0x020 / 4) +#define PTV (0x028 / 4) +#define DMACTRL (0x02C / 4) +#define TBIPA (0x030 / 4) +#define TCTRL (0x100 / 4) +#define TSTAT (0x104 / 4) +#define DFVLAN (0x108 / 4) +#define TXIC (0x110 / 4) +#define TQUEUE (0x114 / 4) +#define TR03WT (0x140 / 4) +#define TR47WT (0x144 / 4) +#define TBDBPH (0x180 / 4) +#define TBPTR0 (0x184 / 4) +#define TBPTR1 (0x18C / 4) +#define TBPTR2 (0x194 / 4) +#define TBPTR3 (0x19C / 4) +#define TBPTR4 (0x1A4 / 4) +#define TBPTR5 (0x1AC / 4) +#define TBPTR6 (0x1B4 / 4) +#define TBPTR7 (0x1BC / 4) +#define TBASEH (0x200 / 4) +#define TBASE0 (0x204 / 4) +#define TBASE1 (0x20C / 4) +#define TBASE2 (0x214 / 4) +#define TBASE3 (0x21C / 4) +#define TBASE4 (0x224 / 4) +#define TBASE5 (0x22C / 4) +#define TBASE6 (0x234 / 4) +#define TBASE7 (0x23C / 4) +#define TMR_TXTS1_ID (0x280 / 4) +#define TMR_TXTS2_ID (0x284 / 4) +#define TMR_TXTS1_H (0x2C0 / 4) +#define TMR_TXTS1_L (0x2C4 / 4) +#define TMR_TXTS2_H (0x2C8 / 4) +#define TMR_TXTS2_L (0x2CC / 4) +#define RCTRL (0x300 / 4) +#define RSTAT (0x304 / 4) +#define RXIC (0x310 / 4) +#define RQUEUE (0x314 / 4) +#define RBIFX (0x330 / 4) +#define RQFAR (0x334 / 4) +#define RQFCR (0x338 / 4) +#define RQFPR (0x33C / 4) +#define MRBLR (0x340 / 4) +#define RBDBPH (0x380 / 4) +#define RBPTR0 (0x384 / 4) +#define RBPTR1 (0x38C / 4) +#define RBPTR2 (0x394 / 4) +#define RBPTR3 (0x39C / 4) +#define RBPTR4 (0x3A4 / 4) +#define RBPTR5 (0x3AC / 4) +#define RBPTR6 (0x3B4 / 4) +#define RBPTR7 (0x3BC / 4) +#define RBASEH (0x400 / 4) +#define RBASE0 (0x404 / 4) +#define RBASE1 (0x40C / 4) +#define RBASE2 (0x414 / 4) +#define RBASE3 (0x41C / 4) +#define RBASE4 (0x424 / 4) +#define RBASE5 (0x42C / 4) +#define RBASE6 (0x434 / 4) +#define RBASE7 (0x43C / 4) +#define TMR_RXTS_H (0x4C0 / 4) +#define TMR_RXTS_L (0x4C4 / 4) +#define MACCFG1 (0x500 / 4) +#define MACCFG2 (0x504 / 4) +#define IPGIFG (0x508 / 4) +#define HAFDUP (0x50C / 4) +#define MAXFRM (0x510 / 4) +#define MIIMCFG (0x520 / 4) +#define MIIMCOM (0x524 / 4) +#define MIIMADD (0x528 / 4) +#define MIIMCON (0x52C / 4) +#define MIIMSTAT (0x530 / 4) +#define MIIMIND (0x534 / 4) +#define IFSTAT (0x53C / 4) +#define MACSTNADDR1 (0x540 / 4) +#define MACSTNADDR2 (0x544 / 4) +#define MAC01ADDR1 (0x548 / 4) +#define MAC01ADDR2 (0x54C / 4) +#define MAC02ADDR1 (0x550 / 4) +#define MAC02ADDR2 (0x554 / 4) +#define MAC03ADDR1 (0x558 / 4) +#define MAC03ADDR2 (0x55C / 4) +#define MAC04ADDR1 (0x560 / 4) +#define MAC04ADDR2 (0x564 / 4) +#define MAC05ADDR1 (0x568 / 4) +#define MAC05ADDR2 (0x56C / 4) +#define MAC06ADDR1 (0x570 / 4) +#define MAC06ADDR2 (0x574 / 4) +#define MAC07ADDR1 (0x578 / 4) +#define MAC07ADDR2 (0x57C / 4) +#define MAC08ADDR1 (0x580 / 4) +#define MAC08ADDR2 (0x584 / 4) +#define MAC09ADDR1 (0x588 / 4) +#define MAC09ADDR2 (0x58C / 4) +#define MAC10ADDR1 (0x590 / 4) +#define MAC10ADDR2 (0x594 / 4) +#define MAC11ADDR1 (0x598 / 4) +#define MAC11ADDR2 (0x59C / 4) +#define MAC12ADDR1 (0x5A0 / 4) +#define MAC12ADDR2 (0x5A4 / 4) +#define MAC13ADDR1 (0x5A8 / 4) +#define MAC13ADDR2 (0x5AC / 4) +#define MAC14ADDR1 (0x5B0 / 4) +#define MAC14ADDR2 (0x5B4 / 4) +#define MAC15ADDR1 (0x5B8 / 4) +#define MAC15ADDR2 (0x5BC / 4) +#define TR64 (0x680 / 4) +#define TR127 (0x684 / 4) +#define TR255 (0x688 / 4) +#define TR511 (0x68C / 4) +#define TR1K (0x690 / 4) +#define TRMAX (0x694 / 4) +#define TRMGV (0x698 / 4) +#define RBYT (0x69C / 4) +#define RPKT (0x6A0 / 4) +#define RFCS (0x6A4 / 4) +#define RMCA (0x6A8 / 4) +#define RBCA (0x6AC / 4) +#define RXCF (0x6B0 / 4) +#define RXPF (0x6B4 / 4) +#define RXUO (0x6B8 / 4) +#define RALN (0x6BC / 4) +#define RFLR (0x6C0 / 4) +#define RCDE (0x6C4 / 4) +#define RCSE (0x6C8 / 4) +#define RUND (0x6CC / 4) +#define ROVR (0x6D0 / 4) +#define RFRG (0x6D4 / 4) +#define RJBR (0x6D8 / 4) +#define RDRP (0x6DC / 4) +#define TBYT (0x6E0 / 4) +#define TPKT (0x6E4 / 4) +#define TMCA (0x6E8 / 4) +#define TBCA (0x6EC / 4) +#define TXPF (0x6F0 / 4) +#define TDFR (0x6F4 / 4) +#define TEDF (0x6F8 / 4) +#define TSCL (0x6FC / 4) +#define TMCL (0x700 / 4) +#define TLCL (0x704 / 4) +#define TXCL (0x708 / 4) +#define TNCL (0x70C / 4) +#define TDRP (0x714 / 4) +#define TJBR (0x718 / 4) +#define TFCS (0x71C / 4) +#define TXCF (0x720 / 4) +#define TOVR (0x724 / 4) +#define TUND (0x728 / 4) +#define TFRG (0x72C / 4) +#define CAR1 (0x730 / 4) +#define CAR2 (0x734 / 4) +#define CAM1 (0x738 / 4) +#define CAM2 (0x73C / 4) +#define RREJ (0x740 / 4) +#define IGADDR0 (0x800 / 4) +#define IGADDR1 (0x804 / 4) +#define IGADDR2 (0x808 / 4) +#define IGADDR3 (0x80C / 4) +#define IGADDR4 (0x810 / 4) +#define IGADDR5 (0x814 / 4) +#define IGADDR6 (0x818 / 4) +#define IGADDR7 (0x81C / 4) +#define GADDR0 (0x880 / 4) +#define GADDR1 (0x884 / 4) +#define GADDR2 (0x888 / 4) +#define GADDR3 (0x88C / 4) +#define GADDR4 (0x890 / 4) +#define GADDR5 (0x894 / 4) +#define GADDR6 (0x898 / 4) +#define GADDR7 (0x89C / 4) +#define ATTR (0xBF8 / 4) +#define ATTRELI (0xBFC / 4) +#define RQPRM0 (0xC00 / 4) +#define RQPRM1 (0xC04 / 4) +#define RQPRM2 (0xC08 / 4) +#define RQPRM3 (0xC0C / 4) +#define RQPRM4 (0xC10 / 4) +#define RQPRM5 (0xC14 / 4) +#define RQPRM6 (0xC18 / 4) +#define RQPRM7 (0xC1C / 4) +#define RFBPTR0 (0xC44 / 4) +#define RFBPTR1 (0xC4C / 4) +#define RFBPTR2 (0xC54 / 4) +#define RFBPTR3 (0xC5C / 4) +#define RFBPTR4 (0xC64 / 4) +#define RFBPTR5 (0xC6C / 4) +#define RFBPTR6 (0xC74 / 4) +#define RFBPTR7 (0xC7C / 4) +#define TMR_CTRL (0xE00 / 4) +#define TMR_TEVENT (0xE04 / 4) +#define TMR_TEMASK (0xE08 / 4) +#define TMR_PEVENT (0xE0C / 4) +#define TMR_PEMASK (0xE10 / 4) +#define TMR_STAT (0xE14 / 4) +#define TMR_CNT_H (0xE18 / 4) +#define TMR_CNT_L (0xE1C / 4) +#define TMR_ADD (0xE20 / 4) +#define TMR_ACC (0xE24 / 4) +#define TMR_PRSC (0xE28 / 4) +#define TMROFF_H (0xE30 / 4) +#define TMROFF_L (0xE34 / 4) +#define TMR_ALARM1_H (0xE40 / 4) +#define TMR_ALARM1_L (0xE44 / 4) +#define TMR_ALARM2_H (0xE48 / 4) +#define TMR_ALARM2_L (0xE4C / 4) +#define TMR_FIPER1 (0xE80 / 4) +#define TMR_FIPER2 (0xE84 / 4) +#define TMR_FIPER3 (0xE88 / 4) +#define TMR_ETTS1_H (0xEA0 / 4) +#define TMR_ETTS1_L (0xEA4 / 4) +#define TMR_ETTS2_H (0xEA8 / 4) +#define TMR_ETTS2_L (0xEAC / 4) + +#endif /* ! _ETSEC_REGISTERS_H_ */ diff --git a/qemu/hw/net/fsl_etsec/rings.c b/qemu/hw/net/fsl_etsec/rings.c new file mode 100644 index 000000000..68e7b6d16 --- /dev/null +++ b/qemu/hw/net/fsl_etsec/rings.c @@ -0,0 +1,655 @@ +/* + * QEMU Freescale eTSEC Emulator + * + * Copyright (c) 2011-2013 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "net/checksum.h" + +#include "etsec.h" +#include "registers.h" + +/* #define ETSEC_RING_DEBUG */ +/* #define HEX_DUMP */ +/* #define DEBUG_BD */ + +#ifdef ETSEC_RING_DEBUG +static const int debug_etsec = 1; +#else +static const int debug_etsec; +#endif + +#define RING_DEBUG(fmt, ...) do { \ + if (debug_etsec) { \ + qemu_log(fmt , ## __VA_ARGS__); \ + } \ + } while (0) + +#ifdef DEBUG_BD + +static void print_tx_bd_flags(uint16_t flags) +{ + qemu_log(" Ready: %d\n", !!(flags & BD_TX_READY)); + qemu_log(" PAD/CRC: %d\n", !!(flags & BD_TX_PADCRC)); + qemu_log(" Wrap: %d\n", !!(flags & BD_WRAP)); + qemu_log(" Interrupt: %d\n", !!(flags & BD_INTERRUPT)); + qemu_log(" Last in frame: %d\n", !!(flags & BD_LAST)); + qemu_log(" Tx CRC: %d\n", !!(flags & BD_TX_TC)); + qemu_log(" User-defined preamble / defer: %d\n", + !!(flags & BD_TX_PREDEF)); + qemu_log(" Huge frame enable / Late collision: %d\n", + !!(flags & BD_TX_HFELC)); + qemu_log(" Control frame / Retransmission Limit: %d\n", + !!(flags & BD_TX_CFRL)); + qemu_log(" Retry count: %d\n", + (flags >> BD_TX_RC_OFFSET) & BD_TX_RC_MASK); + qemu_log(" Underrun / TCP/IP off-load enable: %d\n", + !!(flags & BD_TX_TOEUN)); + qemu_log(" Truncation: %d\n", !!(flags & BD_TX_TR)); +} + +static void print_rx_bd_flags(uint16_t flags) +{ + qemu_log(" Empty: %d\n", !!(flags & BD_RX_EMPTY)); + qemu_log(" Receive software ownership: %d\n", !!(flags & BD_RX_RO1)); + qemu_log(" Wrap: %d\n", !!(flags & BD_WRAP)); + qemu_log(" Interrupt: %d\n", !!(flags & BD_INTERRUPT)); + qemu_log(" Last in frame: %d\n", !!(flags & BD_LAST)); + qemu_log(" First in frame: %d\n", !!(flags & BD_RX_FIRST)); + qemu_log(" Miss: %d\n", !!(flags & BD_RX_MISS)); + qemu_log(" Broadcast: %d\n", !!(flags & BD_RX_BROADCAST)); + qemu_log(" Multicast: %d\n", !!(flags & BD_RX_MULTICAST)); + qemu_log(" Rx frame length violation: %d\n", !!(flags & BD_RX_LG)); + qemu_log(" Rx non-octet aligned frame: %d\n", !!(flags & BD_RX_NO)); + qemu_log(" Short frame: %d\n", !!(flags & BD_RX_SH)); + qemu_log(" Rx CRC Error: %d\n", !!(flags & BD_RX_CR)); + qemu_log(" Overrun: %d\n", !!(flags & BD_RX_OV)); + qemu_log(" Truncation: %d\n", !!(flags & BD_RX_TR)); +} + + +static void print_bd(eTSEC_rxtx_bd bd, int mode, uint32_t index) +{ + qemu_log("eTSEC %s Data Buffer Descriptor (%u)\n", + mode == eTSEC_TRANSMIT ? "Transmit" : "Receive", + index); + qemu_log(" Flags : 0x%04x\n", bd.flags); + if (mode == eTSEC_TRANSMIT) { + print_tx_bd_flags(bd.flags); + } else { + print_rx_bd_flags(bd.flags); + } + qemu_log(" Length : 0x%04x\n", bd.length); + qemu_log(" Pointer : 0x%08x\n", bd.bufptr); +} + +#endif /* DEBUG_BD */ + +static void read_buffer_descriptor(eTSEC *etsec, + hwaddr addr, + eTSEC_rxtx_bd *bd) +{ + assert(bd != NULL); + + RING_DEBUG("READ Buffer Descriptor @ 0x" TARGET_FMT_plx"\n", addr); + cpu_physical_memory_read(addr, + bd, + sizeof(eTSEC_rxtx_bd)); + + if (etsec->regs[DMACTRL].value & DMACTRL_LE) { + bd->flags = lduw_le_p(&bd->flags); + bd->length = lduw_le_p(&bd->length); + bd->bufptr = ldl_le_p(&bd->bufptr); + } else { + bd->flags = lduw_be_p(&bd->flags); + bd->length = lduw_be_p(&bd->length); + bd->bufptr = ldl_be_p(&bd->bufptr); + } +} + +static void write_buffer_descriptor(eTSEC *etsec, + hwaddr addr, + eTSEC_rxtx_bd *bd) +{ + assert(bd != NULL); + + if (etsec->regs[DMACTRL].value & DMACTRL_LE) { + stw_le_p(&bd->flags, bd->flags); + stw_le_p(&bd->length, bd->length); + stl_le_p(&bd->bufptr, bd->bufptr); + } else { + stw_be_p(&bd->flags, bd->flags); + stw_be_p(&bd->length, bd->length); + stl_be_p(&bd->bufptr, bd->bufptr); + } + + RING_DEBUG("Write Buffer Descriptor @ 0x" TARGET_FMT_plx"\n", addr); + cpu_physical_memory_write(addr, + bd, + sizeof(eTSEC_rxtx_bd)); +} + +static void ievent_set(eTSEC *etsec, + uint32_t flags) +{ + etsec->regs[IEVENT].value |= flags; + + if ((flags & IEVENT_TXB && etsec->regs[IMASK].value & IMASK_TXBEN) + || (flags & IEVENT_TXF && etsec->regs[IMASK].value & IMASK_TXFEN)) { + qemu_irq_raise(etsec->tx_irq); + RING_DEBUG("%s Raise Tx IRQ\n", __func__); + } + + if ((flags & IEVENT_RXB && etsec->regs[IMASK].value & IMASK_RXBEN) + || (flags & IEVENT_RXF && etsec->regs[IMASK].value & IMASK_RXFEN)) { + qemu_irq_raise(etsec->rx_irq); + RING_DEBUG("%s Raise Rx IRQ\n", __func__); + } +} + +static void tx_padding_and_crc(eTSEC *etsec, uint32_t min_frame_len) +{ + int add = min_frame_len - etsec->tx_buffer_len; + + /* Padding */ + if (add > 0) { + RING_DEBUG("pad:%u\n", add); + etsec->tx_buffer = g_realloc(etsec->tx_buffer, + etsec->tx_buffer_len + add); + + memset(etsec->tx_buffer + etsec->tx_buffer_len, 0x0, add); + etsec->tx_buffer_len += add; + } + + /* Never add CRC in QEMU */ +} + +static void process_tx_fcb(eTSEC *etsec) +{ + uint8_t flags = (uint8_t)(*etsec->tx_buffer); + /* L3 header offset from start of frame */ + uint8_t l3_header_offset = (uint8_t)*(etsec->tx_buffer + 3); + /* L4 header offset from start of L3 header */ + uint8_t l4_header_offset = (uint8_t)*(etsec->tx_buffer + 2); + /* L3 header */ + uint8_t *l3_header = etsec->tx_buffer + 8 + l3_header_offset; + /* L4 header */ + uint8_t *l4_header = l3_header + l4_header_offset; + + /* if packet is IP4 and IP checksum is requested */ + if (flags & FCB_TX_IP && flags & FCB_TX_CIP) { + /* do IP4 checksum (TODO This function does TCP/UDP checksum + * but not sure if it also does IP4 checksum.) */ + net_checksum_calculate(etsec->tx_buffer + 8, + etsec->tx_buffer_len - 8); + } + /* TODO Check the correct usage of the PHCS field of the FCB in case the NPH + * flag is on */ + + /* if packet is IP4 and TCP or UDP */ + if (flags & FCB_TX_IP && flags & FCB_TX_TUP) { + /* if UDP */ + if (flags & FCB_TX_UDP) { + /* if checksum is requested */ + if (flags & FCB_TX_CTU) { + /* do UDP checksum */ + + net_checksum_calculate(etsec->tx_buffer + 8, + etsec->tx_buffer_len - 8); + } else { + /* set checksum field to 0 */ + l4_header[6] = 0; + l4_header[7] = 0; + } + } else if (flags & FCB_TX_CTU) { /* if TCP and checksum is requested */ + /* do TCP checksum */ + net_checksum_calculate(etsec->tx_buffer + 8, + etsec->tx_buffer_len - 8); + } + } +} + +static void process_tx_bd(eTSEC *etsec, + eTSEC_rxtx_bd *bd) +{ + uint8_t *tmp_buff = NULL; + hwaddr tbdbth = (hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32; + + if (bd->length == 0) { + /* ERROR */ + return; + } + + if (etsec->tx_buffer_len == 0) { + /* It's the first BD */ + etsec->first_bd = *bd; + } + + /* TODO: if TxBD[TOE/UN] skip the Tx Frame Control Block*/ + + /* Load this Data Buffer */ + etsec->tx_buffer = g_realloc(etsec->tx_buffer, + etsec->tx_buffer_len + bd->length); + tmp_buff = etsec->tx_buffer + etsec->tx_buffer_len; + cpu_physical_memory_read(bd->bufptr + tbdbth, tmp_buff, bd->length); + + /* Update buffer length */ + etsec->tx_buffer_len += bd->length; + + + if (etsec->tx_buffer_len != 0 && (bd->flags & BD_LAST)) { + if (etsec->regs[MACCFG1].value & MACCFG1_TX_EN) { + /* MAC Transmit enabled */ + + /* Process offload Tx FCB */ + if (etsec->first_bd.flags & BD_TX_TOEUN) { + process_tx_fcb(etsec); + } + + if (etsec->first_bd.flags & BD_TX_PADCRC + || etsec->regs[MACCFG2].value & MACCFG2_PADCRC) { + + /* Padding and CRC (Padding implies CRC) */ + tx_padding_and_crc(etsec, 64); + + } else if (etsec->first_bd.flags & BD_TX_TC + || etsec->regs[MACCFG2].value & MACCFG2_CRC_EN) { + + /* Only CRC */ + /* Never add CRC in QEMU */ + } + +#if defined(HEX_DUMP) + qemu_log("eTSEC Send packet size:%d\n", etsec->tx_buffer_len); + qemu_hexdump(etsec->tx_buffer, stderr, "", etsec->tx_buffer_len); +#endif /* ETSEC_RING_DEBUG */ + + if (etsec->first_bd.flags & BD_TX_TOEUN) { + qemu_send_packet(qemu_get_queue(etsec->nic), + etsec->tx_buffer + 8, + etsec->tx_buffer_len - 8); + } else { + qemu_send_packet(qemu_get_queue(etsec->nic), + etsec->tx_buffer, + etsec->tx_buffer_len); + } + + } + + etsec->tx_buffer_len = 0; + + if (bd->flags & BD_INTERRUPT) { + ievent_set(etsec, IEVENT_TXF); + } + } else { + if (bd->flags & BD_INTERRUPT) { + ievent_set(etsec, IEVENT_TXB); + } + } + + /* Update DB flags */ + + /* Clear Ready */ + bd->flags &= ~BD_TX_READY; + + /* Clear Defer */ + bd->flags &= ~BD_TX_PREDEF; + + /* Clear Late Collision */ + bd->flags &= ~BD_TX_HFELC; + + /* Clear Retransmission Limit */ + bd->flags &= ~BD_TX_CFRL; + + /* Clear Retry Count */ + bd->flags &= ~(BD_TX_RC_MASK << BD_TX_RC_OFFSET); + + /* Clear Underrun */ + bd->flags &= ~BD_TX_TOEUN; + + /* Clear Truncation */ + bd->flags &= ~BD_TX_TR; +} + +void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr) +{ + hwaddr ring_base = 0; + hwaddr bd_addr = 0; + eTSEC_rxtx_bd bd; + uint16_t bd_flags; + + if (!(etsec->regs[MACCFG1].value & MACCFG1_TX_EN)) { + RING_DEBUG("%s: MAC Transmit not enabled\n", __func__); + return; + } + + ring_base = (hwaddr)(etsec->regs[TBASEH].value & 0xF) << 32; + ring_base += etsec->regs[TBASE0 + ring_nbr].value & ~0x7; + bd_addr = etsec->regs[TBPTR0 + ring_nbr].value & ~0x7; + + do { + read_buffer_descriptor(etsec, bd_addr, &bd); + +#ifdef DEBUG_BD + print_bd(bd, + eTSEC_TRANSMIT, + (bd_addr - ring_base) / sizeof(eTSEC_rxtx_bd)); + +#endif /* DEBUG_BD */ + + /* Save flags before BD update */ + bd_flags = bd.flags; + + if (bd_flags & BD_TX_READY) { + process_tx_bd(etsec, &bd); + + /* Write back BD after update */ + write_buffer_descriptor(etsec, bd_addr, &bd); + } + + /* Wrap or next BD */ + if (bd_flags & BD_WRAP) { + bd_addr = ring_base; + } else { + bd_addr += sizeof(eTSEC_rxtx_bd); + } + + } while (bd_addr != ring_base); + + bd_addr = ring_base; + + /* Save the Buffer Descriptor Pointers to current bd */ + etsec->regs[TBPTR0 + ring_nbr].value = bd_addr; + + /* Set transmit halt THLTx */ + etsec->regs[TSTAT].value |= 1 << (31 - ring_nbr); +} + +static void fill_rx_bd(eTSEC *etsec, + eTSEC_rxtx_bd *bd, + const uint8_t **buf, + size_t *size) +{ + uint16_t to_write; + hwaddr bufptr = bd->bufptr + + ((hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32); + uint8_t padd[etsec->rx_padding]; + uint8_t rem; + + RING_DEBUG("eTSEC fill Rx buffer @ 0x%016" HWADDR_PRIx + " size:%zu(padding + crc:%u) + fcb:%u\n", + bufptr, *size, etsec->rx_padding, etsec->rx_fcb_size); + + bd->length = 0; + + /* This operation will only write FCB */ + if (etsec->rx_fcb_size != 0) { + + cpu_physical_memory_write(bufptr, etsec->rx_fcb, etsec->rx_fcb_size); + + bufptr += etsec->rx_fcb_size; + bd->length += etsec->rx_fcb_size; + etsec->rx_fcb_size = 0; + + } + + /* We remove padding from the computation of to_write because it is not + * allocated in the buffer. + */ + to_write = MIN(*size - etsec->rx_padding, + etsec->regs[MRBLR].value - etsec->rx_fcb_size); + + /* This operation can only write packet data and no padding */ + if (to_write > 0) { + cpu_physical_memory_write(bufptr, *buf, to_write); + + *buf += to_write; + bufptr += to_write; + *size -= to_write; + + bd->flags &= ~BD_RX_EMPTY; + bd->length += to_write; + } + + if (*size == etsec->rx_padding) { + /* The remaining bytes are only for padding which is not actually + * allocated in the data buffer. + */ + + rem = MIN(etsec->regs[MRBLR].value - bd->length, etsec->rx_padding); + + if (rem > 0) { + memset(padd, 0x0, sizeof(padd)); + etsec->rx_padding -= rem; + *size -= rem; + bd->length += rem; + cpu_physical_memory_write(bufptr, padd, rem); + } + } +} + +static void rx_init_frame(eTSEC *etsec, const uint8_t *buf, size_t size) +{ + uint32_t fcb_size = 0; + uint8_t prsdep = (etsec->regs[RCTRL].value >> RCTRL_PRSDEP_OFFSET) + & RCTRL_PRSDEP_MASK; + + if (prsdep != 0) { + /* Prepend FCB (FCB size + RCTRL[PAL]) */ + fcb_size = 8 + ((etsec->regs[RCTRL].value >> 16) & 0x1F); + + etsec->rx_fcb_size = fcb_size; + + /* TODO: fill_FCB(etsec); */ + memset(etsec->rx_fcb, 0x0, sizeof(etsec->rx_fcb)); + + } else { + etsec->rx_fcb_size = 0; + } + + if (etsec->rx_buffer != NULL) { + g_free(etsec->rx_buffer); + } + + /* Do not copy the frame for now */ + etsec->rx_buffer = (uint8_t *)buf; + etsec->rx_buffer_len = size; + + /* CRC padding (We don't have to compute the CRC) */ + etsec->rx_padding = 4; + + etsec->rx_first_in_frame = 1; + etsec->rx_remaining_data = etsec->rx_buffer_len; + RING_DEBUG("%s: rx_buffer_len:%u rx_padding+crc:%u\n", __func__, + etsec->rx_buffer_len, etsec->rx_padding); +} + +ssize_t etsec_rx_ring_write(eTSEC *etsec, const uint8_t *buf, size_t size) +{ + int ring_nbr = 0; /* Always use ring0 (no filer) */ + + if (etsec->rx_buffer_len != 0) { + RING_DEBUG("%s: We can't receive now," + " a buffer is already in the pipe\n", __func__); + return 0; + } + + if (etsec->regs[RSTAT].value & 1 << (23 - ring_nbr)) { + RING_DEBUG("%s: The ring is halted\n", __func__); + return -1; + } + + if (etsec->regs[DMACTRL].value & DMACTRL_GRS) { + RING_DEBUG("%s: Graceful receive stop\n", __func__); + return -1; + } + + if (!(etsec->regs[MACCFG1].value & MACCFG1_RX_EN)) { + RING_DEBUG("%s: MAC Receive not enabled\n", __func__); + return -1; + } + + if ((etsec->regs[RCTRL].value & RCTRL_RSF) && (size < 60)) { + /* CRC is not in the packet yet, so short frame is below 60 bytes */ + RING_DEBUG("%s: Drop short frame\n", __func__); + return -1; + } + + rx_init_frame(etsec, buf, size); + + etsec_walk_rx_ring(etsec, ring_nbr); + + return size; +} + +void etsec_walk_rx_ring(eTSEC *etsec, int ring_nbr) +{ + hwaddr ring_base = 0; + hwaddr bd_addr = 0; + hwaddr start_bd_addr = 0; + eTSEC_rxtx_bd bd; + uint16_t bd_flags; + size_t remaining_data; + const uint8_t *buf; + uint8_t *tmp_buf; + size_t size; + + if (etsec->rx_buffer_len == 0) { + /* No frame to send */ + RING_DEBUG("No frame to send\n"); + return; + } + + remaining_data = etsec->rx_remaining_data + etsec->rx_padding; + buf = etsec->rx_buffer + + (etsec->rx_buffer_len - etsec->rx_remaining_data); + size = etsec->rx_buffer_len + etsec->rx_padding; + + ring_base = (hwaddr)(etsec->regs[RBASEH].value & 0xF) << 32; + ring_base += etsec->regs[RBASE0 + ring_nbr].value & ~0x7; + start_bd_addr = bd_addr = etsec->regs[RBPTR0 + ring_nbr].value & ~0x7; + + do { + read_buffer_descriptor(etsec, bd_addr, &bd); + +#ifdef DEBUG_BD + print_bd(bd, + eTSEC_RECEIVE, + (bd_addr - ring_base) / sizeof(eTSEC_rxtx_bd)); + +#endif /* DEBUG_BD */ + + /* Save flags before BD update */ + bd_flags = bd.flags; + + if (bd_flags & BD_RX_EMPTY) { + fill_rx_bd(etsec, &bd, &buf, &remaining_data); + + if (etsec->rx_first_in_frame) { + bd.flags |= BD_RX_FIRST; + etsec->rx_first_in_frame = 0; + etsec->rx_first_bd = bd; + } + + /* Last in frame */ + if (remaining_data == 0) { + + /* Clear flags */ + + bd.flags &= ~0x7ff; + + bd.flags |= BD_LAST; + + /* NOTE: non-octet aligned frame is impossible in qemu */ + + if (size >= etsec->regs[MAXFRM].value) { + /* frame length violation */ + qemu_log("%s frame length violation: size:%zu MAXFRM:%d\n", + __func__, size, etsec->regs[MAXFRM].value); + + bd.flags |= BD_RX_LG; + } + + if (size < 64) { + /* Short frame */ + bd.flags |= BD_RX_SH; + } + + /* TODO: Broadcast and Multicast */ + + if (bd.flags & BD_INTERRUPT) { + /* Set RXFx */ + etsec->regs[RSTAT].value |= 1 << (7 - ring_nbr); + + /* Set IEVENT */ + ievent_set(etsec, IEVENT_RXF); + } + + } else { + if (bd.flags & BD_INTERRUPT) { + /* Set IEVENT */ + ievent_set(etsec, IEVENT_RXB); + } + } + + /* Write back BD after update */ + write_buffer_descriptor(etsec, bd_addr, &bd); + } + + /* Wrap or next BD */ + if (bd_flags & BD_WRAP) { + bd_addr = ring_base; + } else { + bd_addr += sizeof(eTSEC_rxtx_bd); + } + } while (remaining_data != 0 + && (bd_flags & BD_RX_EMPTY) + && bd_addr != start_bd_addr); + + /* Reset ring ptr */ + etsec->regs[RBPTR0 + ring_nbr].value = bd_addr; + + /* The frame is too large to fit in the Rx ring */ + if (remaining_data > 0) { + + /* Set RSTAT[QHLTx] */ + etsec->regs[RSTAT].value |= 1 << (23 - ring_nbr); + + /* Save remaining data to send the end of the frame when the ring will + * be restarted + */ + etsec->rx_remaining_data = remaining_data; + + /* Copy the frame */ + tmp_buf = g_malloc(size); + memcpy(tmp_buf, etsec->rx_buffer, size); + etsec->rx_buffer = tmp_buf; + + RING_DEBUG("no empty RxBD available any more\n"); + } else { + etsec->rx_buffer_len = 0; + etsec->rx_buffer = NULL; + if (etsec->need_flush) { + qemu_flush_queued_packets(qemu_get_queue(etsec->nic)); + } + } + + RING_DEBUG("eTSEC End of ring_write: remaining_data:%zu\n", remaining_data); +} diff --git a/qemu/hw/net/lan9118.c b/qemu/hw/net/lan9118.c new file mode 100644 index 000000000..4f0e840f0 --- /dev/null +++ b/qemu/hw/net/lan9118.c @@ -0,0 +1,1392 @@ +/* + * SMSC LAN9118 Ethernet interface emulation + * + * Copyright (c) 2009 CodeSourcery, LLC. + * Written by Paul Brook + * + * This code is licensed under the GNU GPL v2 + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "hw/sysbus.h" +#include "net/net.h" +#include "hw/devices.h" +#include "sysemu/sysemu.h" +#include "hw/ptimer.h" +/* For crc32 */ +#include <zlib.h> + +//#define DEBUG_LAN9118 + +#ifdef DEBUG_LAN9118 +#define DPRINTF(fmt, ...) \ +do { printf("lan9118: " fmt , ## __VA_ARGS__); } while (0) +#define BADF(fmt, ...) \ +do { hw_error("lan9118: error: " fmt , ## __VA_ARGS__);} while (0) +#else +#define DPRINTF(fmt, ...) do {} while(0) +#define BADF(fmt, ...) \ +do { fprintf(stderr, "lan9118: error: " fmt , ## __VA_ARGS__);} while (0) +#endif + +#define CSR_ID_REV 0x50 +#define CSR_IRQ_CFG 0x54 +#define CSR_INT_STS 0x58 +#define CSR_INT_EN 0x5c +#define CSR_BYTE_TEST 0x64 +#define CSR_FIFO_INT 0x68 +#define CSR_RX_CFG 0x6c +#define CSR_TX_CFG 0x70 +#define CSR_HW_CFG 0x74 +#define CSR_RX_DP_CTRL 0x78 +#define CSR_RX_FIFO_INF 0x7c +#define CSR_TX_FIFO_INF 0x80 +#define CSR_PMT_CTRL 0x84 +#define CSR_GPIO_CFG 0x88 +#define CSR_GPT_CFG 0x8c +#define CSR_GPT_CNT 0x90 +#define CSR_WORD_SWAP 0x98 +#define CSR_FREE_RUN 0x9c +#define CSR_RX_DROP 0xa0 +#define CSR_MAC_CSR_CMD 0xa4 +#define CSR_MAC_CSR_DATA 0xa8 +#define CSR_AFC_CFG 0xac +#define CSR_E2P_CMD 0xb0 +#define CSR_E2P_DATA 0xb4 + +/* IRQ_CFG */ +#define IRQ_INT 0x00001000 +#define IRQ_EN 0x00000100 +#define IRQ_POL 0x00000010 +#define IRQ_TYPE 0x00000001 + +/* INT_STS/INT_EN */ +#define SW_INT 0x80000000 +#define TXSTOP_INT 0x02000000 +#define RXSTOP_INT 0x01000000 +#define RXDFH_INT 0x00800000 +#define TX_IOC_INT 0x00200000 +#define RXD_INT 0x00100000 +#define GPT_INT 0x00080000 +#define PHY_INT 0x00040000 +#define PME_INT 0x00020000 +#define TXSO_INT 0x00010000 +#define RWT_INT 0x00008000 +#define RXE_INT 0x00004000 +#define TXE_INT 0x00002000 +#define TDFU_INT 0x00000800 +#define TDFO_INT 0x00000400 +#define TDFA_INT 0x00000200 +#define TSFF_INT 0x00000100 +#define TSFL_INT 0x00000080 +#define RXDF_INT 0x00000040 +#define RDFL_INT 0x00000020 +#define RSFF_INT 0x00000010 +#define RSFL_INT 0x00000008 +#define GPIO2_INT 0x00000004 +#define GPIO1_INT 0x00000002 +#define GPIO0_INT 0x00000001 +#define RESERVED_INT 0x7c001000 + +#define MAC_CR 1 +#define MAC_ADDRH 2 +#define MAC_ADDRL 3 +#define MAC_HASHH 4 +#define MAC_HASHL 5 +#define MAC_MII_ACC 6 +#define MAC_MII_DATA 7 +#define MAC_FLOW 8 +#define MAC_VLAN1 9 /* TODO */ +#define MAC_VLAN2 10 /* TODO */ +#define MAC_WUFF 11 /* TODO */ +#define MAC_WUCSR 12 /* TODO */ + +#define MAC_CR_RXALL 0x80000000 +#define MAC_CR_RCVOWN 0x00800000 +#define MAC_CR_LOOPBK 0x00200000 +#define MAC_CR_FDPX 0x00100000 +#define MAC_CR_MCPAS 0x00080000 +#define MAC_CR_PRMS 0x00040000 +#define MAC_CR_INVFILT 0x00020000 +#define MAC_CR_PASSBAD 0x00010000 +#define MAC_CR_HO 0x00008000 +#define MAC_CR_HPFILT 0x00002000 +#define MAC_CR_LCOLL 0x00001000 +#define MAC_CR_BCAST 0x00000800 +#define MAC_CR_DISRTY 0x00000400 +#define MAC_CR_PADSTR 0x00000100 +#define MAC_CR_BOLMT 0x000000c0 +#define MAC_CR_DFCHK 0x00000020 +#define MAC_CR_TXEN 0x00000008 +#define MAC_CR_RXEN 0x00000004 +#define MAC_CR_RESERVED 0x7f404213 + +#define PHY_INT_ENERGYON 0x80 +#define PHY_INT_AUTONEG_COMPLETE 0x40 +#define PHY_INT_FAULT 0x20 +#define PHY_INT_DOWN 0x10 +#define PHY_INT_AUTONEG_LP 0x08 +#define PHY_INT_PARFAULT 0x04 +#define PHY_INT_AUTONEG_PAGE 0x02 + +#define GPT_TIMER_EN 0x20000000 + +enum tx_state { + TX_IDLE, + TX_B, + TX_DATA +}; + +typedef struct { + /* state is a tx_state but we can't put enums in VMStateDescriptions. */ + uint32_t state; + uint32_t cmd_a; + uint32_t cmd_b; + int32_t buffer_size; + int32_t offset; + int32_t pad; + int32_t fifo_used; + int32_t len; + uint8_t data[2048]; +} LAN9118Packet; + +static const VMStateDescription vmstate_lan9118_packet = { + .name = "lan9118_packet", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(state, LAN9118Packet), + VMSTATE_UINT32(cmd_a, LAN9118Packet), + VMSTATE_UINT32(cmd_b, LAN9118Packet), + VMSTATE_INT32(buffer_size, LAN9118Packet), + VMSTATE_INT32(offset, LAN9118Packet), + VMSTATE_INT32(pad, LAN9118Packet), + VMSTATE_INT32(fifo_used, LAN9118Packet), + VMSTATE_INT32(len, LAN9118Packet), + VMSTATE_UINT8_ARRAY(data, LAN9118Packet, 2048), + VMSTATE_END_OF_LIST() + } +}; + +#define TYPE_LAN9118 "lan9118" +#define LAN9118(obj) OBJECT_CHECK(lan9118_state, (obj), TYPE_LAN9118) + +typedef struct { + SysBusDevice parent_obj; + + NICState *nic; + NICConf conf; + qemu_irq irq; + MemoryRegion mmio; + ptimer_state *timer; + + uint32_t irq_cfg; + uint32_t int_sts; + uint32_t int_en; + uint32_t fifo_int; + uint32_t rx_cfg; + uint32_t tx_cfg; + uint32_t hw_cfg; + uint32_t pmt_ctrl; + uint32_t gpio_cfg; + uint32_t gpt_cfg; + uint32_t word_swap; + uint32_t free_timer_start; + uint32_t mac_cmd; + uint32_t mac_data; + uint32_t afc_cfg; + uint32_t e2p_cmd; + uint32_t e2p_data; + + uint32_t mac_cr; + uint32_t mac_hashh; + uint32_t mac_hashl; + uint32_t mac_mii_acc; + uint32_t mac_mii_data; + uint32_t mac_flow; + + uint32_t phy_status; + uint32_t phy_control; + uint32_t phy_advertise; + uint32_t phy_int; + uint32_t phy_int_mask; + + int32_t eeprom_writable; + uint8_t eeprom[128]; + + int32_t tx_fifo_size; + LAN9118Packet *txp; + LAN9118Packet tx_packet; + + int32_t tx_status_fifo_used; + int32_t tx_status_fifo_head; + uint32_t tx_status_fifo[512]; + + int32_t rx_status_fifo_size; + int32_t rx_status_fifo_used; + int32_t rx_status_fifo_head; + uint32_t rx_status_fifo[896]; + int32_t rx_fifo_size; + int32_t rx_fifo_used; + int32_t rx_fifo_head; + uint32_t rx_fifo[3360]; + int32_t rx_packet_size_head; + int32_t rx_packet_size_tail; + int32_t rx_packet_size[1024]; + + int32_t rxp_offset; + int32_t rxp_size; + int32_t rxp_pad; + + uint32_t write_word_prev_offset; + uint32_t write_word_n; + uint16_t write_word_l; + uint16_t write_word_h; + uint32_t read_word_prev_offset; + uint32_t read_word_n; + uint32_t read_long; + + uint32_t mode_16bit; +} lan9118_state; + +static const VMStateDescription vmstate_lan9118 = { + .name = "lan9118", + .version_id = 2, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_PTIMER(timer, lan9118_state), + VMSTATE_UINT32(irq_cfg, lan9118_state), + VMSTATE_UINT32(int_sts, lan9118_state), + VMSTATE_UINT32(int_en, lan9118_state), + VMSTATE_UINT32(fifo_int, lan9118_state), + VMSTATE_UINT32(rx_cfg, lan9118_state), + VMSTATE_UINT32(tx_cfg, lan9118_state), + VMSTATE_UINT32(hw_cfg, lan9118_state), + VMSTATE_UINT32(pmt_ctrl, lan9118_state), + VMSTATE_UINT32(gpio_cfg, lan9118_state), + VMSTATE_UINT32(gpt_cfg, lan9118_state), + VMSTATE_UINT32(word_swap, lan9118_state), + VMSTATE_UINT32(free_timer_start, lan9118_state), + VMSTATE_UINT32(mac_cmd, lan9118_state), + VMSTATE_UINT32(mac_data, lan9118_state), + VMSTATE_UINT32(afc_cfg, lan9118_state), + VMSTATE_UINT32(e2p_cmd, lan9118_state), + VMSTATE_UINT32(e2p_data, lan9118_state), + VMSTATE_UINT32(mac_cr, lan9118_state), + VMSTATE_UINT32(mac_hashh, lan9118_state), + VMSTATE_UINT32(mac_hashl, lan9118_state), + VMSTATE_UINT32(mac_mii_acc, lan9118_state), + VMSTATE_UINT32(mac_mii_data, lan9118_state), + VMSTATE_UINT32(mac_flow, lan9118_state), + VMSTATE_UINT32(phy_status, lan9118_state), + VMSTATE_UINT32(phy_control, lan9118_state), + VMSTATE_UINT32(phy_advertise, lan9118_state), + VMSTATE_UINT32(phy_int, lan9118_state), + VMSTATE_UINT32(phy_int_mask, lan9118_state), + VMSTATE_INT32(eeprom_writable, lan9118_state), + VMSTATE_UINT8_ARRAY(eeprom, lan9118_state, 128), + VMSTATE_INT32(tx_fifo_size, lan9118_state), + /* txp always points at tx_packet so need not be saved */ + VMSTATE_STRUCT(tx_packet, lan9118_state, 0, + vmstate_lan9118_packet, LAN9118Packet), + VMSTATE_INT32(tx_status_fifo_used, lan9118_state), + VMSTATE_INT32(tx_status_fifo_head, lan9118_state), + VMSTATE_UINT32_ARRAY(tx_status_fifo, lan9118_state, 512), + VMSTATE_INT32(rx_status_fifo_size, lan9118_state), + VMSTATE_INT32(rx_status_fifo_used, lan9118_state), + VMSTATE_INT32(rx_status_fifo_head, lan9118_state), + VMSTATE_UINT32_ARRAY(rx_status_fifo, lan9118_state, 896), + VMSTATE_INT32(rx_fifo_size, lan9118_state), + VMSTATE_INT32(rx_fifo_used, lan9118_state), + VMSTATE_INT32(rx_fifo_head, lan9118_state), + VMSTATE_UINT32_ARRAY(rx_fifo, lan9118_state, 3360), + VMSTATE_INT32(rx_packet_size_head, lan9118_state), + VMSTATE_INT32(rx_packet_size_tail, lan9118_state), + VMSTATE_INT32_ARRAY(rx_packet_size, lan9118_state, 1024), + VMSTATE_INT32(rxp_offset, lan9118_state), + VMSTATE_INT32(rxp_size, lan9118_state), + VMSTATE_INT32(rxp_pad, lan9118_state), + VMSTATE_UINT32_V(write_word_prev_offset, lan9118_state, 2), + VMSTATE_UINT32_V(write_word_n, lan9118_state, 2), + VMSTATE_UINT16_V(write_word_l, lan9118_state, 2), + VMSTATE_UINT16_V(write_word_h, lan9118_state, 2), + VMSTATE_UINT32_V(read_word_prev_offset, lan9118_state, 2), + VMSTATE_UINT32_V(read_word_n, lan9118_state, 2), + VMSTATE_UINT32_V(read_long, lan9118_state, 2), + VMSTATE_UINT32_V(mode_16bit, lan9118_state, 2), + VMSTATE_END_OF_LIST() + } +}; + +static void lan9118_update(lan9118_state *s) +{ + int level; + + /* TODO: Implement FIFO level IRQs. */ + level = (s->int_sts & s->int_en) != 0; + if (level) { + s->irq_cfg |= IRQ_INT; + } else { + s->irq_cfg &= ~IRQ_INT; + } + if ((s->irq_cfg & IRQ_EN) == 0) { + level = 0; + } + if ((s->irq_cfg & (IRQ_TYPE | IRQ_POL)) != (IRQ_TYPE | IRQ_POL)) { + /* Interrupt is active low unless we're configured as + * active-high polarity, push-pull type. + */ + level = !level; + } + qemu_set_irq(s->irq, level); +} + +static void lan9118_mac_changed(lan9118_state *s) +{ + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); +} + +static void lan9118_reload_eeprom(lan9118_state *s) +{ + int i; + if (s->eeprom[0] != 0xa5) { + s->e2p_cmd &= ~0x10; + DPRINTF("MACADDR load failed\n"); + return; + } + for (i = 0; i < 6; i++) { + s->conf.macaddr.a[i] = s->eeprom[i + 1]; + } + s->e2p_cmd |= 0x10; + DPRINTF("MACADDR loaded from eeprom\n"); + lan9118_mac_changed(s); +} + +static void phy_update_irq(lan9118_state *s) +{ + if (s->phy_int & s->phy_int_mask) { + s->int_sts |= PHY_INT; + } else { + s->int_sts &= ~PHY_INT; + } + lan9118_update(s); +} + +static void phy_update_link(lan9118_state *s) +{ + /* Autonegotiation status mirrors link status. */ + if (qemu_get_queue(s->nic)->link_down) { + s->phy_status &= ~0x0024; + s->phy_int |= PHY_INT_DOWN; + } else { + s->phy_status |= 0x0024; + s->phy_int |= PHY_INT_ENERGYON; + s->phy_int |= PHY_INT_AUTONEG_COMPLETE; + } + phy_update_irq(s); +} + +static void lan9118_set_link(NetClientState *nc) +{ + phy_update_link(qemu_get_nic_opaque(nc)); +} + +static void phy_reset(lan9118_state *s) +{ + s->phy_status = 0x7809; + s->phy_control = 0x3000; + s->phy_advertise = 0x01e1; + s->phy_int_mask = 0; + s->phy_int = 0; + phy_update_link(s); +} + +static void lan9118_reset(DeviceState *d) +{ + lan9118_state *s = LAN9118(d); + + s->irq_cfg &= (IRQ_TYPE | IRQ_POL); + s->int_sts = 0; + s->int_en = 0; + s->fifo_int = 0x48000000; + s->rx_cfg = 0; + s->tx_cfg = 0; + s->hw_cfg = s->mode_16bit ? 0x00050000 : 0x00050004; + s->pmt_ctrl &= 0x45; + s->gpio_cfg = 0; + s->txp->fifo_used = 0; + s->txp->state = TX_IDLE; + s->txp->cmd_a = 0xffffffffu; + s->txp->cmd_b = 0xffffffffu; + s->txp->len = 0; + s->txp->fifo_used = 0; + s->tx_fifo_size = 4608; + s->tx_status_fifo_used = 0; + s->rx_status_fifo_size = 704; + s->rx_fifo_size = 2640; + s->rx_fifo_used = 0; + s->rx_status_fifo_size = 176; + s->rx_status_fifo_used = 0; + s->rxp_offset = 0; + s->rxp_size = 0; + s->rxp_pad = 0; + s->rx_packet_size_tail = s->rx_packet_size_head; + s->rx_packet_size[s->rx_packet_size_head] = 0; + s->mac_cmd = 0; + s->mac_data = 0; + s->afc_cfg = 0; + s->e2p_cmd = 0; + s->e2p_data = 0; + s->free_timer_start = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / 40; + + ptimer_stop(s->timer); + ptimer_set_count(s->timer, 0xffff); + s->gpt_cfg = 0xffff; + + s->mac_cr = MAC_CR_PRMS; + s->mac_hashh = 0; + s->mac_hashl = 0; + s->mac_mii_acc = 0; + s->mac_mii_data = 0; + s->mac_flow = 0; + + s->read_word_n = 0; + s->write_word_n = 0; + + phy_reset(s); + + s->eeprom_writable = 0; + lan9118_reload_eeprom(s); +} + +static void rx_fifo_push(lan9118_state *s, uint32_t val) +{ + int fifo_pos; + fifo_pos = s->rx_fifo_head + s->rx_fifo_used; + if (fifo_pos >= s->rx_fifo_size) + fifo_pos -= s->rx_fifo_size; + s->rx_fifo[fifo_pos] = val; + s->rx_fifo_used++; +} + +/* Return nonzero if the packet is accepted by the filter. */ +static int lan9118_filter(lan9118_state *s, const uint8_t *addr) +{ + int multicast; + uint32_t hash; + + if (s->mac_cr & MAC_CR_PRMS) { + return 1; + } + if (addr[0] == 0xff && addr[1] == 0xff && addr[2] == 0xff && + addr[3] == 0xff && addr[4] == 0xff && addr[5] == 0xff) { + return (s->mac_cr & MAC_CR_BCAST) == 0; + } + + multicast = addr[0] & 1; + if (multicast &&s->mac_cr & MAC_CR_MCPAS) { + return 1; + } + if (multicast ? (s->mac_cr & MAC_CR_HPFILT) == 0 + : (s->mac_cr & MAC_CR_HO) == 0) { + /* Exact matching. */ + hash = memcmp(addr, s->conf.macaddr.a, 6); + if (s->mac_cr & MAC_CR_INVFILT) { + return hash != 0; + } else { + return hash == 0; + } + } else { + /* Hash matching */ + hash = compute_mcast_idx(addr); + if (hash & 0x20) { + return (s->mac_hashh >> (hash & 0x1f)) & 1; + } else { + return (s->mac_hashl >> (hash & 0x1f)) & 1; + } + } +} + +static ssize_t lan9118_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + lan9118_state *s = qemu_get_nic_opaque(nc); + int fifo_len; + int offset; + int src_pos; + int n; + int filter; + uint32_t val; + uint32_t crc; + uint32_t status; + + if ((s->mac_cr & MAC_CR_RXEN) == 0) { + return -1; + } + + if (size >= 2048 || size < 14) { + return -1; + } + + /* TODO: Implement FIFO overflow notification. */ + if (s->rx_status_fifo_used == s->rx_status_fifo_size) { + return -1; + } + + filter = lan9118_filter(s, buf); + if (!filter && (s->mac_cr & MAC_CR_RXALL) == 0) { + return size; + } + + offset = (s->rx_cfg >> 8) & 0x1f; + n = offset & 3; + fifo_len = (size + n + 3) >> 2; + /* Add a word for the CRC. */ + fifo_len++; + if (s->rx_fifo_size - s->rx_fifo_used < fifo_len) { + return -1; + } + + DPRINTF("Got packet len:%d fifo:%d filter:%s\n", + (int)size, fifo_len, filter ? "pass" : "fail"); + val = 0; + crc = bswap32(crc32(~0, buf, size)); + for (src_pos = 0; src_pos < size; src_pos++) { + val = (val >> 8) | ((uint32_t)buf[src_pos] << 24); + n++; + if (n == 4) { + n = 0; + rx_fifo_push(s, val); + val = 0; + } + } + if (n) { + val >>= ((4 - n) * 8); + val |= crc << (n * 8); + rx_fifo_push(s, val); + val = crc >> ((4 - n) * 8); + rx_fifo_push(s, val); + } else { + rx_fifo_push(s, crc); + } + n = s->rx_status_fifo_head + s->rx_status_fifo_used; + if (n >= s->rx_status_fifo_size) { + n -= s->rx_status_fifo_size; + } + s->rx_packet_size[s->rx_packet_size_tail] = fifo_len; + s->rx_packet_size_tail = (s->rx_packet_size_tail + 1023) & 1023; + s->rx_status_fifo_used++; + + status = (size + 4) << 16; + if (buf[0] == 0xff && buf[1] == 0xff && buf[2] == 0xff && + buf[3] == 0xff && buf[4] == 0xff && buf[5] == 0xff) { + status |= 0x00002000; + } else if (buf[0] & 1) { + status |= 0x00000400; + } + if (!filter) { + status |= 0x40000000; + } + s->rx_status_fifo[n] = status; + + if (s->rx_status_fifo_used > (s->fifo_int & 0xff)) { + s->int_sts |= RSFL_INT; + } + lan9118_update(s); + + return size; +} + +static uint32_t rx_fifo_pop(lan9118_state *s) +{ + int n; + uint32_t val; + + if (s->rxp_size == 0 && s->rxp_pad == 0) { + s->rxp_size = s->rx_packet_size[s->rx_packet_size_head]; + s->rx_packet_size[s->rx_packet_size_head] = 0; + if (s->rxp_size != 0) { + s->rx_packet_size_head = (s->rx_packet_size_head + 1023) & 1023; + s->rxp_offset = (s->rx_cfg >> 10) & 7; + n = s->rxp_offset + s->rxp_size; + switch (s->rx_cfg >> 30) { + case 1: + n = (-n) & 3; + break; + case 2: + n = (-n) & 7; + break; + default: + n = 0; + break; + } + s->rxp_pad = n; + DPRINTF("Pop packet size:%d offset:%d pad: %d\n", + s->rxp_size, s->rxp_offset, s->rxp_pad); + } + } + if (s->rxp_offset > 0) { + s->rxp_offset--; + val = 0; + } else if (s->rxp_size > 0) { + s->rxp_size--; + val = s->rx_fifo[s->rx_fifo_head++]; + if (s->rx_fifo_head >= s->rx_fifo_size) { + s->rx_fifo_head -= s->rx_fifo_size; + } + s->rx_fifo_used--; + } else if (s->rxp_pad > 0) { + s->rxp_pad--; + val = 0; + } else { + DPRINTF("RX underflow\n"); + s->int_sts |= RXE_INT; + val = 0; + } + lan9118_update(s); + return val; +} + +static void do_tx_packet(lan9118_state *s) +{ + int n; + uint32_t status; + + /* FIXME: Honor TX disable, and allow queueing of packets. */ + if (s->phy_control & 0x4000) { + /* This assumes the receive routine doesn't touch the VLANClient. */ + lan9118_receive(qemu_get_queue(s->nic), s->txp->data, s->txp->len); + } else { + qemu_send_packet(qemu_get_queue(s->nic), s->txp->data, s->txp->len); + } + s->txp->fifo_used = 0; + + if (s->tx_status_fifo_used == 512) { + /* Status FIFO full */ + return; + } + /* Add entry to status FIFO. */ + status = s->txp->cmd_b & 0xffff0000u; + DPRINTF("Sent packet tag:%04x len %d\n", status >> 16, s->txp->len); + n = (s->tx_status_fifo_head + s->tx_status_fifo_used) & 511; + s->tx_status_fifo[n] = status; + s->tx_status_fifo_used++; + if (s->tx_status_fifo_used == 512) { + s->int_sts |= TSFF_INT; + /* TODO: Stop transmission. */ + } +} + +static uint32_t rx_status_fifo_pop(lan9118_state *s) +{ + uint32_t val; + + val = s->rx_status_fifo[s->rx_status_fifo_head]; + if (s->rx_status_fifo_used != 0) { + s->rx_status_fifo_used--; + s->rx_status_fifo_head++; + if (s->rx_status_fifo_head >= s->rx_status_fifo_size) { + s->rx_status_fifo_head -= s->rx_status_fifo_size; + } + /* ??? What value should be returned when the FIFO is empty? */ + DPRINTF("RX status pop 0x%08x\n", val); + } + return val; +} + +static uint32_t tx_status_fifo_pop(lan9118_state *s) +{ + uint32_t val; + + val = s->tx_status_fifo[s->tx_status_fifo_head]; + if (s->tx_status_fifo_used != 0) { + s->tx_status_fifo_used--; + s->tx_status_fifo_head = (s->tx_status_fifo_head + 1) & 511; + /* ??? What value should be returned when the FIFO is empty? */ + } + return val; +} + +static void tx_fifo_push(lan9118_state *s, uint32_t val) +{ + int n; + + if (s->txp->fifo_used == s->tx_fifo_size) { + s->int_sts |= TDFO_INT; + return; + } + switch (s->txp->state) { + case TX_IDLE: + s->txp->cmd_a = val & 0x831f37ff; + s->txp->fifo_used++; + s->txp->state = TX_B; + s->txp->buffer_size = extract32(s->txp->cmd_a, 0, 11); + s->txp->offset = extract32(s->txp->cmd_a, 16, 5); + break; + case TX_B: + if (s->txp->cmd_a & 0x2000) { + /* First segment */ + s->txp->cmd_b = val; + s->txp->fifo_used++; + /* End alignment does not include command words. */ + n = (s->txp->buffer_size + s->txp->offset + 3) >> 2; + switch ((n >> 24) & 3) { + case 1: + n = (-n) & 3; + break; + case 2: + n = (-n) & 7; + break; + default: + n = 0; + } + s->txp->pad = n; + s->txp->len = 0; + } + DPRINTF("Block len:%d offset:%d pad:%d cmd %08x\n", + s->txp->buffer_size, s->txp->offset, s->txp->pad, + s->txp->cmd_a); + s->txp->state = TX_DATA; + break; + case TX_DATA: + if (s->txp->offset >= 4) { + s->txp->offset -= 4; + break; + } + if (s->txp->buffer_size <= 0 && s->txp->pad != 0) { + s->txp->pad--; + } else { + n = MIN(4, s->txp->buffer_size + s->txp->offset); + while (s->txp->offset) { + val >>= 8; + n--; + s->txp->offset--; + } + /* Documentation is somewhat unclear on the ordering of bytes + in FIFO words. Empirical results show it to be little-endian. + */ + /* TODO: FIFO overflow checking. */ + while (n--) { + s->txp->data[s->txp->len] = val & 0xff; + s->txp->len++; + val >>= 8; + s->txp->buffer_size--; + } + s->txp->fifo_used++; + } + if (s->txp->buffer_size <= 0 && s->txp->pad == 0) { + if (s->txp->cmd_a & 0x1000) { + do_tx_packet(s); + } + if (s->txp->cmd_a & 0x80000000) { + s->int_sts |= TX_IOC_INT; + } + s->txp->state = TX_IDLE; + } + break; + } +} + +static uint32_t do_phy_read(lan9118_state *s, int reg) +{ + uint32_t val; + + switch (reg) { + case 0: /* Basic Control */ + return s->phy_control; + case 1: /* Basic Status */ + return s->phy_status; + case 2: /* ID1 */ + return 0x0007; + case 3: /* ID2 */ + return 0xc0d1; + case 4: /* Auto-neg advertisement */ + return s->phy_advertise; + case 5: /* Auto-neg Link Partner Ability */ + return 0x0f71; + case 6: /* Auto-neg Expansion */ + return 1; + /* TODO 17, 18, 27, 29, 30, 31 */ + case 29: /* Interrupt source. */ + val = s->phy_int; + s->phy_int = 0; + phy_update_irq(s); + return val; + case 30: /* Interrupt mask */ + return s->phy_int_mask; + default: + BADF("PHY read reg %d\n", reg); + return 0; + } +} + +static void do_phy_write(lan9118_state *s, int reg, uint32_t val) +{ + switch (reg) { + case 0: /* Basic Control */ + if (val & 0x8000) { + phy_reset(s); + break; + } + s->phy_control = val & 0x7980; + /* Complete autonegotiation immediately. */ + if (val & 0x1000) { + s->phy_status |= 0x0020; + } + break; + case 4: /* Auto-neg advertisement */ + s->phy_advertise = (val & 0x2d7f) | 0x80; + break; + /* TODO 17, 18, 27, 31 */ + case 30: /* Interrupt mask */ + s->phy_int_mask = val & 0xff; + phy_update_irq(s); + break; + default: + BADF("PHY write reg %d = 0x%04x\n", reg, val); + } +} + +static void do_mac_write(lan9118_state *s, int reg, uint32_t val) +{ + switch (reg) { + case MAC_CR: + if ((s->mac_cr & MAC_CR_RXEN) != 0 && (val & MAC_CR_RXEN) == 0) { + s->int_sts |= RXSTOP_INT; + } + s->mac_cr = val & ~MAC_CR_RESERVED; + DPRINTF("MAC_CR: %08x\n", val); + break; + case MAC_ADDRH: + s->conf.macaddr.a[4] = val & 0xff; + s->conf.macaddr.a[5] = (val >> 8) & 0xff; + lan9118_mac_changed(s); + break; + case MAC_ADDRL: + s->conf.macaddr.a[0] = val & 0xff; + s->conf.macaddr.a[1] = (val >> 8) & 0xff; + s->conf.macaddr.a[2] = (val >> 16) & 0xff; + s->conf.macaddr.a[3] = (val >> 24) & 0xff; + lan9118_mac_changed(s); + break; + case MAC_HASHH: + s->mac_hashh = val; + break; + case MAC_HASHL: + s->mac_hashl = val; + break; + case MAC_MII_ACC: + s->mac_mii_acc = val & 0xffc2; + if (val & 2) { + DPRINTF("PHY write %d = 0x%04x\n", + (val >> 6) & 0x1f, s->mac_mii_data); + do_phy_write(s, (val >> 6) & 0x1f, s->mac_mii_data); + } else { + s->mac_mii_data = do_phy_read(s, (val >> 6) & 0x1f); + DPRINTF("PHY read %d = 0x%04x\n", + (val >> 6) & 0x1f, s->mac_mii_data); + } + break; + case MAC_MII_DATA: + s->mac_mii_data = val & 0xffff; + break; + case MAC_FLOW: + s->mac_flow = val & 0xffff0000; + break; + case MAC_VLAN1: + /* Writing to this register changes a condition for + * FrameTooLong bit in rx_status. Since we do not set + * FrameTooLong anyway, just ignore write to this. + */ + break; + default: + hw_error("lan9118: Unimplemented MAC register write: %d = 0x%x\n", + s->mac_cmd & 0xf, val); + } +} + +static uint32_t do_mac_read(lan9118_state *s, int reg) +{ + switch (reg) { + case MAC_CR: + return s->mac_cr; + case MAC_ADDRH: + return s->conf.macaddr.a[4] | (s->conf.macaddr.a[5] << 8); + case MAC_ADDRL: + return s->conf.macaddr.a[0] | (s->conf.macaddr.a[1] << 8) + | (s->conf.macaddr.a[2] << 16) | (s->conf.macaddr.a[3] << 24); + case MAC_HASHH: + return s->mac_hashh; + break; + case MAC_HASHL: + return s->mac_hashl; + break; + case MAC_MII_ACC: + return s->mac_mii_acc; + case MAC_MII_DATA: + return s->mac_mii_data; + case MAC_FLOW: + return s->mac_flow; + default: + hw_error("lan9118: Unimplemented MAC register read: %d\n", + s->mac_cmd & 0xf); + } +} + +static void lan9118_eeprom_cmd(lan9118_state *s, int cmd, int addr) +{ + s->e2p_cmd = (s->e2p_cmd & 0x10) | (cmd << 28) | addr; + switch (cmd) { + case 0: + s->e2p_data = s->eeprom[addr]; + DPRINTF("EEPROM Read %d = 0x%02x\n", addr, s->e2p_data); + break; + case 1: + s->eeprom_writable = 0; + DPRINTF("EEPROM Write Disable\n"); + break; + case 2: /* EWEN */ + s->eeprom_writable = 1; + DPRINTF("EEPROM Write Enable\n"); + break; + case 3: /* WRITE */ + if (s->eeprom_writable) { + s->eeprom[addr] &= s->e2p_data; + DPRINTF("EEPROM Write %d = 0x%02x\n", addr, s->e2p_data); + } else { + DPRINTF("EEPROM Write %d (ignored)\n", addr); + } + break; + case 4: /* WRAL */ + if (s->eeprom_writable) { + for (addr = 0; addr < 128; addr++) { + s->eeprom[addr] &= s->e2p_data; + } + DPRINTF("EEPROM Write All 0x%02x\n", s->e2p_data); + } else { + DPRINTF("EEPROM Write All (ignored)\n"); + } + break; + case 5: /* ERASE */ + if (s->eeprom_writable) { + s->eeprom[addr] = 0xff; + DPRINTF("EEPROM Erase %d\n", addr); + } else { + DPRINTF("EEPROM Erase %d (ignored)\n", addr); + } + break; + case 6: /* ERAL */ + if (s->eeprom_writable) { + memset(s->eeprom, 0xff, 128); + DPRINTF("EEPROM Erase All\n"); + } else { + DPRINTF("EEPROM Erase All (ignored)\n"); + } + break; + case 7: /* RELOAD */ + lan9118_reload_eeprom(s); + break; + } +} + +static void lan9118_tick(void *opaque) +{ + lan9118_state *s = (lan9118_state *)opaque; + if (s->int_en & GPT_INT) { + s->int_sts |= GPT_INT; + } + lan9118_update(s); +} + +static void lan9118_writel(void *opaque, hwaddr offset, + uint64_t val, unsigned size) +{ + lan9118_state *s = (lan9118_state *)opaque; + offset &= 0xff; + + //DPRINTF("Write reg 0x%02x = 0x%08x\n", (int)offset, val); + if (offset >= 0x20 && offset < 0x40) { + /* TX FIFO */ + tx_fifo_push(s, val); + return; + } + switch (offset) { + case CSR_IRQ_CFG: + /* TODO: Implement interrupt deassertion intervals. */ + val &= (IRQ_EN | IRQ_POL | IRQ_TYPE); + s->irq_cfg = (s->irq_cfg & IRQ_INT) | val; + break; + case CSR_INT_STS: + s->int_sts &= ~val; + break; + case CSR_INT_EN: + s->int_en = val & ~RESERVED_INT; + s->int_sts |= val & SW_INT; + break; + case CSR_FIFO_INT: + DPRINTF("FIFO INT levels %08x\n", val); + s->fifo_int = val; + break; + case CSR_RX_CFG: + if (val & 0x8000) { + /* RX_DUMP */ + s->rx_fifo_used = 0; + s->rx_status_fifo_used = 0; + s->rx_packet_size_tail = s->rx_packet_size_head; + s->rx_packet_size[s->rx_packet_size_head] = 0; + } + s->rx_cfg = val & 0xcfff1ff0; + break; + case CSR_TX_CFG: + if (val & 0x8000) { + s->tx_status_fifo_used = 0; + } + if (val & 0x4000) { + s->txp->state = TX_IDLE; + s->txp->fifo_used = 0; + s->txp->cmd_a = 0xffffffff; + } + s->tx_cfg = val & 6; + break; + case CSR_HW_CFG: + if (val & 1) { + /* SRST */ + lan9118_reset(DEVICE(s)); + } else { + s->hw_cfg = (val & 0x003f300) | (s->hw_cfg & 0x4); + } + break; + case CSR_RX_DP_CTRL: + if (val & 0x80000000) { + /* Skip forward to next packet. */ + s->rxp_pad = 0; + s->rxp_offset = 0; + if (s->rxp_size == 0) { + /* Pop a word to start the next packet. */ + rx_fifo_pop(s); + s->rxp_pad = 0; + s->rxp_offset = 0; + } + s->rx_fifo_head += s->rxp_size; + if (s->rx_fifo_head >= s->rx_fifo_size) { + s->rx_fifo_head -= s->rx_fifo_size; + } + } + break; + case CSR_PMT_CTRL: + if (val & 0x400) { + phy_reset(s); + } + s->pmt_ctrl &= ~0x34e; + s->pmt_ctrl |= (val & 0x34e); + break; + case CSR_GPIO_CFG: + /* Probably just enabling LEDs. */ + s->gpio_cfg = val & 0x7777071f; + break; + case CSR_GPT_CFG: + if ((s->gpt_cfg ^ val) & GPT_TIMER_EN) { + if (val & GPT_TIMER_EN) { + ptimer_set_count(s->timer, val & 0xffff); + ptimer_run(s->timer, 0); + } else { + ptimer_stop(s->timer); + ptimer_set_count(s->timer, 0xffff); + } + } + s->gpt_cfg = val & (GPT_TIMER_EN | 0xffff); + break; + case CSR_WORD_SWAP: + /* Ignored because we're in 32-bit mode. */ + s->word_swap = val; + break; + case CSR_MAC_CSR_CMD: + s->mac_cmd = val & 0x4000000f; + if (val & 0x80000000) { + if (val & 0x40000000) { + s->mac_data = do_mac_read(s, val & 0xf); + DPRINTF("MAC read %d = 0x%08x\n", val & 0xf, s->mac_data); + } else { + DPRINTF("MAC write %d = 0x%08x\n", val & 0xf, s->mac_data); + do_mac_write(s, val & 0xf, s->mac_data); + } + } + break; + case CSR_MAC_CSR_DATA: + s->mac_data = val; + break; + case CSR_AFC_CFG: + s->afc_cfg = val & 0x00ffffff; + break; + case CSR_E2P_CMD: + lan9118_eeprom_cmd(s, (val >> 28) & 7, val & 0x7f); + break; + case CSR_E2P_DATA: + s->e2p_data = val & 0xff; + break; + + default: + hw_error("lan9118_write: Bad reg 0x%x = %x\n", (int)offset, (int)val); + break; + } + lan9118_update(s); +} + +static void lan9118_writew(void *opaque, hwaddr offset, + uint32_t val) +{ + lan9118_state *s = (lan9118_state *)opaque; + offset &= 0xff; + + if (s->write_word_prev_offset != (offset & ~0x3)) { + /* New offset, reset word counter */ + s->write_word_n = 0; + s->write_word_prev_offset = offset & ~0x3; + } + + if (offset & 0x2) { + s->write_word_h = val; + } else { + s->write_word_l = val; + } + + //DPRINTF("Writew reg 0x%02x = 0x%08x\n", (int)offset, val); + s->write_word_n++; + if (s->write_word_n == 2) { + s->write_word_n = 0; + lan9118_writel(s, offset & ~3, s->write_word_l + + (s->write_word_h << 16), 4); + } +} + +static void lan9118_16bit_mode_write(void *opaque, hwaddr offset, + uint64_t val, unsigned size) +{ + switch (size) { + case 2: + lan9118_writew(opaque, offset, (uint32_t)val); + return; + case 4: + lan9118_writel(opaque, offset, val, size); + return; + } + + hw_error("lan9118_write: Bad size 0x%x\n", size); +} + +static uint64_t lan9118_readl(void *opaque, hwaddr offset, + unsigned size) +{ + lan9118_state *s = (lan9118_state *)opaque; + + //DPRINTF("Read reg 0x%02x\n", (int)offset); + if (offset < 0x20) { + /* RX FIFO */ + return rx_fifo_pop(s); + } + switch (offset) { + case 0x40: + return rx_status_fifo_pop(s); + case 0x44: + return s->rx_status_fifo[s->tx_status_fifo_head]; + case 0x48: + return tx_status_fifo_pop(s); + case 0x4c: + return s->tx_status_fifo[s->tx_status_fifo_head]; + case CSR_ID_REV: + return 0x01180001; + case CSR_IRQ_CFG: + return s->irq_cfg; + case CSR_INT_STS: + return s->int_sts; + case CSR_INT_EN: + return s->int_en; + case CSR_BYTE_TEST: + return 0x87654321; + case CSR_FIFO_INT: + return s->fifo_int; + case CSR_RX_CFG: + return s->rx_cfg; + case CSR_TX_CFG: + return s->tx_cfg; + case CSR_HW_CFG: + return s->hw_cfg; + case CSR_RX_DP_CTRL: + return 0; + case CSR_RX_FIFO_INF: + return (s->rx_status_fifo_used << 16) | (s->rx_fifo_used << 2); + case CSR_TX_FIFO_INF: + return (s->tx_status_fifo_used << 16) + | (s->tx_fifo_size - s->txp->fifo_used); + case CSR_PMT_CTRL: + return s->pmt_ctrl; + case CSR_GPIO_CFG: + return s->gpio_cfg; + case CSR_GPT_CFG: + return s->gpt_cfg; + case CSR_GPT_CNT: + return ptimer_get_count(s->timer); + case CSR_WORD_SWAP: + return s->word_swap; + case CSR_FREE_RUN: + return (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / 40) - s->free_timer_start; + case CSR_RX_DROP: + /* TODO: Implement dropped frames counter. */ + return 0; + case CSR_MAC_CSR_CMD: + return s->mac_cmd; + case CSR_MAC_CSR_DATA: + return s->mac_data; + case CSR_AFC_CFG: + return s->afc_cfg; + case CSR_E2P_CMD: + return s->e2p_cmd; + case CSR_E2P_DATA: + return s->e2p_data; + } + hw_error("lan9118_read: Bad reg 0x%x\n", (int)offset); + return 0; +} + +static uint32_t lan9118_readw(void *opaque, hwaddr offset) +{ + lan9118_state *s = (lan9118_state *)opaque; + uint32_t val; + + if (s->read_word_prev_offset != (offset & ~0x3)) { + /* New offset, reset word counter */ + s->read_word_n = 0; + s->read_word_prev_offset = offset & ~0x3; + } + + s->read_word_n++; + if (s->read_word_n == 1) { + s->read_long = lan9118_readl(s, offset & ~3, 4); + } else { + s->read_word_n = 0; + } + + if (offset & 2) { + val = s->read_long >> 16; + } else { + val = s->read_long & 0xFFFF; + } + + //DPRINTF("Readw reg 0x%02x, val 0x%x\n", (int)offset, val); + return val; +} + +static uint64_t lan9118_16bit_mode_read(void *opaque, hwaddr offset, + unsigned size) +{ + switch (size) { + case 2: + return lan9118_readw(opaque, offset); + case 4: + return lan9118_readl(opaque, offset, size); + } + + hw_error("lan9118_read: Bad size 0x%x\n", size); + return 0; +} + +static const MemoryRegionOps lan9118_mem_ops = { + .read = lan9118_readl, + .write = lan9118_writel, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps lan9118_16bit_mem_ops = { + .read = lan9118_16bit_mode_read, + .write = lan9118_16bit_mode_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static NetClientInfo net_lan9118_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = lan9118_receive, + .link_status_changed = lan9118_set_link, +}; + +static int lan9118_init1(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + lan9118_state *s = LAN9118(dev); + QEMUBH *bh; + int i; + const MemoryRegionOps *mem_ops = + s->mode_16bit ? &lan9118_16bit_mem_ops : &lan9118_mem_ops; + + memory_region_init_io(&s->mmio, OBJECT(dev), mem_ops, s, + "lan9118-mmio", 0x100); + sysbus_init_mmio(sbd, &s->mmio); + sysbus_init_irq(sbd, &s->irq); + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + s->nic = qemu_new_nic(&net_lan9118_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + s->eeprom[0] = 0xa5; + for (i = 0; i < 6; i++) { + s->eeprom[i + 1] = s->conf.macaddr.a[i]; + } + s->pmt_ctrl = 1; + s->txp = &s->tx_packet; + + bh = qemu_bh_new(lan9118_tick, s); + s->timer = ptimer_init(bh); + ptimer_set_freq(s->timer, 10000); + ptimer_set_limit(s->timer, 0xffff, 1); + + return 0; +} + +static Property lan9118_properties[] = { + DEFINE_NIC_PROPERTIES(lan9118_state, conf), + DEFINE_PROP_UINT32("mode_16bit", lan9118_state, mode_16bit, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void lan9118_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = lan9118_init1; + dc->reset = lan9118_reset; + dc->props = lan9118_properties; + dc->vmsd = &vmstate_lan9118; +} + +static const TypeInfo lan9118_info = { + .name = TYPE_LAN9118, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(lan9118_state), + .class_init = lan9118_class_init, +}; + +static void lan9118_register_types(void) +{ + type_register_static(&lan9118_info); +} + +/* Legacy helper function. Should go away when machine config files are + implemented. */ +void lan9118_init(NICInfo *nd, uint32_t base, qemu_irq irq) +{ + DeviceState *dev; + SysBusDevice *s; + + qemu_check_nic_model(nd, "lan9118"); + dev = qdev_create(NULL, TYPE_LAN9118); + qdev_set_nic_properties(dev, nd); + qdev_init_nofail(dev); + s = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(s, 0, base); + sysbus_connect_irq(s, 0, irq); +} + +type_init(lan9118_register_types) diff --git a/qemu/hw/net/lance.c b/qemu/hw/net/lance.c new file mode 100644 index 000000000..780b39d65 --- /dev/null +++ b/qemu/hw/net/lance.c @@ -0,0 +1,183 @@ +/* + * QEMU AMD PC-Net II (Am79C970A) emulation + * + * Copyright (c) 2004 Antony T Curtis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This software was written to be compatible with the specification: + * AMD Am79C970A PCnet-PCI II Ethernet Controller Data-Sheet + * AMD Publication# 19436 Rev:E Amendment/0 Issue Date: June 2000 + */ + +/* + * On Sparc32, this is the Lance (Am7990) part of chip STP2000 (Master I/O), also + * produced as NCR89C100. See + * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR89C100.txt + * and + * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR92C990.txt + */ + +#include "hw/sysbus.h" +#include "net/net.h" +#include "qemu/timer.h" +#include "qemu/sockets.h" +#include "hw/sparc/sun4m.h" +#include "pcnet.h" +#include "trace.h" +#include "sysemu/sysemu.h" + +#define TYPE_LANCE "lance" +#define SYSBUS_PCNET(obj) \ + OBJECT_CHECK(SysBusPCNetState, (obj), TYPE_LANCE) + +typedef struct { + SysBusDevice parent_obj; + + PCNetState state; +} SysBusPCNetState; + +static void parent_lance_reset(void *opaque, int irq, int level) +{ + SysBusPCNetState *d = opaque; + if (level) + pcnet_h_reset(&d->state); +} + +static void lance_mem_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + SysBusPCNetState *d = opaque; + + trace_lance_mem_writew(addr, val & 0xffff); + pcnet_ioport_writew(&d->state, addr, val & 0xffff); +} + +static uint64_t lance_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + SysBusPCNetState *d = opaque; + uint32_t val; + + val = pcnet_ioport_readw(&d->state, addr); + trace_lance_mem_readw(addr, val & 0xffff); + return val & 0xffff; +} + +static const MemoryRegionOps lance_mem_ops = { + .read = lance_mem_read, + .write = lance_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 2, + .max_access_size = 2, + }, +}; + +static NetClientInfo net_lance_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = pcnet_receive, + .link_status_changed = pcnet_set_link_status, +}; + +static const VMStateDescription vmstate_lance = { + .name = "pcnet", + .version_id = 3, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(state, SysBusPCNetState, 0, vmstate_pcnet, PCNetState), + VMSTATE_END_OF_LIST() + } +}; + +static int lance_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + SysBusPCNetState *d = SYSBUS_PCNET(dev); + PCNetState *s = &d->state; + + memory_region_init_io(&s->mmio, OBJECT(d), &lance_mem_ops, d, + "lance-mmio", 4); + + qdev_init_gpio_in(dev, parent_lance_reset, 1); + + sysbus_init_mmio(sbd, &s->mmio); + + sysbus_init_irq(sbd, &s->irq); + + s->phys_mem_read = ledma_memory_read; + s->phys_mem_write = ledma_memory_write; + pcnet_common_init(dev, s, &net_lance_info); + return 0; +} + +static void lance_reset(DeviceState *dev) +{ + SysBusPCNetState *d = SYSBUS_PCNET(dev); + + pcnet_h_reset(&d->state); +} + +static void lance_instance_init(Object *obj) +{ + SysBusPCNetState *d = SYSBUS_PCNET(obj); + PCNetState *s = &d->state; + + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static Property lance_properties[] = { + DEFINE_PROP_PTR("dma", SysBusPCNetState, state.dma_opaque), + DEFINE_NIC_PROPERTIES(SysBusPCNetState, state.conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void lance_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = lance_init; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->fw_name = "ethernet"; + dc->reset = lance_reset; + dc->vmsd = &vmstate_lance; + dc->props = lance_properties; + /* Reason: pointer property "dma" */ + dc->cannot_instantiate_with_device_add_yet = true; +} + +static const TypeInfo lance_info = { + .name = TYPE_LANCE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SysBusPCNetState), + .class_init = lance_class_init, + .instance_init = lance_instance_init, +}; + +static void lance_register_types(void) +{ + type_register_static(&lance_info); +} + +type_init(lance_register_types) diff --git a/qemu/hw/net/mcf_fec.c b/qemu/hw/net/mcf_fec.c new file mode 100644 index 000000000..21928f9f3 --- /dev/null +++ b/qemu/hw/net/mcf_fec.c @@ -0,0 +1,534 @@ +/* + * ColdFire Fast Ethernet Controller emulation. + * + * Copyright (c) 2007 CodeSourcery. + * + * This code is licensed under the GPL + */ +#include "hw/hw.h" +#include "net/net.h" +#include "hw/m68k/mcf.h" +#include "hw/net/mii.h" +/* For crc32 */ +#include <zlib.h> +#include "exec/address-spaces.h" + +//#define DEBUG_FEC 1 + +#ifdef DEBUG_FEC +#define DPRINTF(fmt, ...) \ +do { printf("mcf_fec: " fmt , ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) do {} while(0) +#endif + +#define FEC_MAX_FRAME_SIZE 2032 + +typedef struct { + MemoryRegion *sysmem; + MemoryRegion iomem; + qemu_irq *irq; + NICState *nic; + NICConf conf; + uint32_t irq_state; + uint32_t eir; + uint32_t eimr; + int rx_enabled; + uint32_t rx_descriptor; + uint32_t tx_descriptor; + uint32_t ecr; + uint32_t mmfr; + uint32_t mscr; + uint32_t rcr; + uint32_t tcr; + uint32_t tfwr; + uint32_t rfsr; + uint32_t erdsr; + uint32_t etdsr; + uint32_t emrbr; +} mcf_fec_state; + +#define FEC_INT_HB 0x80000000 +#define FEC_INT_BABR 0x40000000 +#define FEC_INT_BABT 0x20000000 +#define FEC_INT_GRA 0x10000000 +#define FEC_INT_TXF 0x08000000 +#define FEC_INT_TXB 0x04000000 +#define FEC_INT_RXF 0x02000000 +#define FEC_INT_RXB 0x01000000 +#define FEC_INT_MII 0x00800000 +#define FEC_INT_EB 0x00400000 +#define FEC_INT_LC 0x00200000 +#define FEC_INT_RL 0x00100000 +#define FEC_INT_UN 0x00080000 + +#define FEC_EN 2 +#define FEC_RESET 1 + +/* Map interrupt flags onto IRQ lines. */ +#define FEC_NUM_IRQ 13 +static const uint32_t mcf_fec_irq_map[FEC_NUM_IRQ] = { + FEC_INT_TXF, + FEC_INT_TXB, + FEC_INT_UN, + FEC_INT_RL, + FEC_INT_RXF, + FEC_INT_RXB, + FEC_INT_MII, + FEC_INT_LC, + FEC_INT_HB, + FEC_INT_GRA, + FEC_INT_EB, + FEC_INT_BABT, + FEC_INT_BABR +}; + +/* Buffer Descriptor. */ +typedef struct { + uint16_t flags; + uint16_t length; + uint32_t data; +} mcf_fec_bd; + +#define FEC_BD_R 0x8000 +#define FEC_BD_E 0x8000 +#define FEC_BD_O1 0x4000 +#define FEC_BD_W 0x2000 +#define FEC_BD_O2 0x1000 +#define FEC_BD_L 0x0800 +#define FEC_BD_TC 0x0400 +#define FEC_BD_ABC 0x0200 +#define FEC_BD_M 0x0100 +#define FEC_BD_BC 0x0080 +#define FEC_BD_MC 0x0040 +#define FEC_BD_LG 0x0020 +#define FEC_BD_NO 0x0010 +#define FEC_BD_CR 0x0004 +#define FEC_BD_OV 0x0002 +#define FEC_BD_TR 0x0001 + +static void mcf_fec_read_bd(mcf_fec_bd *bd, uint32_t addr) +{ + cpu_physical_memory_read(addr, bd, sizeof(*bd)); + be16_to_cpus(&bd->flags); + be16_to_cpus(&bd->length); + be32_to_cpus(&bd->data); +} + +static void mcf_fec_write_bd(mcf_fec_bd *bd, uint32_t addr) +{ + mcf_fec_bd tmp; + tmp.flags = cpu_to_be16(bd->flags); + tmp.length = cpu_to_be16(bd->length); + tmp.data = cpu_to_be32(bd->data); + cpu_physical_memory_write(addr, &tmp, sizeof(tmp)); +} + +static void mcf_fec_update(mcf_fec_state *s) +{ + uint32_t active; + uint32_t changed; + uint32_t mask; + int i; + + active = s->eir & s->eimr; + changed = active ^s->irq_state; + for (i = 0; i < FEC_NUM_IRQ; i++) { + mask = mcf_fec_irq_map[i]; + if (changed & mask) { + DPRINTF("IRQ %d = %d\n", i, (active & mask) != 0); + qemu_set_irq(s->irq[i], (active & mask) != 0); + } + } + s->irq_state = active; +} + +static void mcf_fec_do_tx(mcf_fec_state *s) +{ + uint32_t addr; + mcf_fec_bd bd; + int frame_size; + int len; + uint8_t frame[FEC_MAX_FRAME_SIZE]; + uint8_t *ptr; + + DPRINTF("do_tx\n"); + ptr = frame; + frame_size = 0; + addr = s->tx_descriptor; + while (1) { + mcf_fec_read_bd(&bd, addr); + DPRINTF("tx_bd %x flags %04x len %d data %08x\n", + addr, bd.flags, bd.length, bd.data); + if ((bd.flags & FEC_BD_R) == 0) { + /* Run out of descriptors to transmit. */ + break; + } + len = bd.length; + if (frame_size + len > FEC_MAX_FRAME_SIZE) { + len = FEC_MAX_FRAME_SIZE - frame_size; + s->eir |= FEC_INT_BABT; + } + cpu_physical_memory_read(bd.data, ptr, len); + ptr += len; + frame_size += len; + if (bd.flags & FEC_BD_L) { + /* Last buffer in frame. */ + DPRINTF("Sending packet\n"); + qemu_send_packet(qemu_get_queue(s->nic), frame, len); + ptr = frame; + frame_size = 0; + s->eir |= FEC_INT_TXF; + } + s->eir |= FEC_INT_TXB; + bd.flags &= ~FEC_BD_R; + /* Write back the modified descriptor. */ + mcf_fec_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if ((bd.flags & FEC_BD_W) != 0) { + addr = s->etdsr; + } else { + addr += 8; + } + } + s->tx_descriptor = addr; +} + +static void mcf_fec_enable_rx(mcf_fec_state *s) +{ + NetClientState *nc = qemu_get_queue(s->nic); + mcf_fec_bd bd; + + mcf_fec_read_bd(&bd, s->rx_descriptor); + s->rx_enabled = ((bd.flags & FEC_BD_E) != 0); + if (s->rx_enabled) { + qemu_flush_queued_packets(nc); + } +} + +static void mcf_fec_reset(mcf_fec_state *s) +{ + s->eir = 0; + s->eimr = 0; + s->rx_enabled = 0; + s->ecr = 0; + s->mscr = 0; + s->rcr = 0x05ee0001; + s->tcr = 0; + s->tfwr = 0; + s->rfsr = 0x500; +} + +#define MMFR_WRITE_OP (1 << 28) +#define MMFR_READ_OP (2 << 28) +#define MMFR_PHYADDR(v) (((v) >> 23) & 0x1f) +#define MMFR_REGNUM(v) (((v) >> 18) & 0x1f) + +static uint64_t mcf_fec_read_mdio(mcf_fec_state *s) +{ + uint64_t v; + + if (s->mmfr & MMFR_WRITE_OP) + return s->mmfr; + if (MMFR_PHYADDR(s->mmfr) != 1) + return s->mmfr |= 0xffff; + + switch (MMFR_REGNUM(s->mmfr)) { + case MII_BMCR: + v = MII_BMCR_SPEED | MII_BMCR_AUTOEN | MII_BMCR_FD; + break; + case MII_BMSR: + v = MII_BMSR_100TX_FD | MII_BMSR_100TX_HD | MII_BMSR_10T_FD | + MII_BMSR_10T_HD | MII_BMSR_MFPS | MII_BMSR_AN_COMP | + MII_BMSR_AUTONEG | MII_BMSR_LINK_ST; + break; + case MII_PHYID1: + v = DP83848_PHYID1; + break; + case MII_PHYID2: + v = DP83848_PHYID2; + break; + case MII_ANAR: + v = MII_ANAR_TXFD | MII_ANAR_TX | MII_ANAR_10FD | + MII_ANAR_10 | MII_ANAR_CSMACD; + break; + case MII_ANLPAR: + v = MII_ANLPAR_ACK | MII_ANLPAR_TXFD | MII_ANLPAR_TX | + MII_ANLPAR_10FD | MII_ANLPAR_10 | MII_ANLPAR_CSMACD; + break; + default: + v = 0xffff; + break; + } + s->mmfr = (s->mmfr & ~0xffff) | v; + return s->mmfr; +} + +static uint64_t mcf_fec_read(void *opaque, hwaddr addr, + unsigned size) +{ + mcf_fec_state *s = (mcf_fec_state *)opaque; + switch (addr & 0x3ff) { + case 0x004: return s->eir; + case 0x008: return s->eimr; + case 0x010: return s->rx_enabled ? (1 << 24) : 0; /* RDAR */ + case 0x014: return 0; /* TDAR */ + case 0x024: return s->ecr; + case 0x040: return mcf_fec_read_mdio(s); + case 0x044: return s->mscr; + case 0x064: return 0; /* MIBC */ + case 0x084: return s->rcr; + case 0x0c4: return s->tcr; + case 0x0e4: /* PALR */ + return (s->conf.macaddr.a[0] << 24) | (s->conf.macaddr.a[1] << 16) + | (s->conf.macaddr.a[2] << 8) | s->conf.macaddr.a[3]; + break; + case 0x0e8: /* PAUR */ + return (s->conf.macaddr.a[4] << 24) | (s->conf.macaddr.a[5] << 16) | 0x8808; + case 0x0ec: return 0x10000; /* OPD */ + case 0x118: return 0; + case 0x11c: return 0; + case 0x120: return 0; + case 0x124: return 0; + case 0x144: return s->tfwr; + case 0x14c: return 0x600; + case 0x150: return s->rfsr; + case 0x180: return s->erdsr; + case 0x184: return s->etdsr; + case 0x188: return s->emrbr; + default: + hw_error("mcf_fec_read: Bad address 0x%x\n", (int)addr); + return 0; + } +} + +static void mcf_fec_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + mcf_fec_state *s = (mcf_fec_state *)opaque; + switch (addr & 0x3ff) { + case 0x004: + s->eir &= ~value; + break; + case 0x008: + s->eimr = value; + break; + case 0x010: /* RDAR */ + if ((s->ecr & FEC_EN) && !s->rx_enabled) { + DPRINTF("RX enable\n"); + mcf_fec_enable_rx(s); + } + break; + case 0x014: /* TDAR */ + if (s->ecr & FEC_EN) { + mcf_fec_do_tx(s); + } + break; + case 0x024: + s->ecr = value; + if (value & FEC_RESET) { + DPRINTF("Reset\n"); + mcf_fec_reset(s); + } + if ((s->ecr & FEC_EN) == 0) { + s->rx_enabled = 0; + } + break; + case 0x040: + s->mmfr = value; + s->eir |= FEC_INT_MII; + break; + case 0x044: + s->mscr = value & 0xfe; + break; + case 0x064: + /* TODO: Implement MIB. */ + break; + case 0x084: + s->rcr = value & 0x07ff003f; + /* TODO: Implement LOOP mode. */ + break; + case 0x0c4: /* TCR */ + /* We transmit immediately, so raise GRA immediately. */ + s->tcr = value; + if (value & 1) + s->eir |= FEC_INT_GRA; + break; + case 0x0e4: /* PALR */ + s->conf.macaddr.a[0] = value >> 24; + s->conf.macaddr.a[1] = value >> 16; + s->conf.macaddr.a[2] = value >> 8; + s->conf.macaddr.a[3] = value; + break; + case 0x0e8: /* PAUR */ + s->conf.macaddr.a[4] = value >> 24; + s->conf.macaddr.a[5] = value >> 16; + break; + case 0x0ec: + /* OPD */ + break; + case 0x118: + case 0x11c: + case 0x120: + case 0x124: + /* TODO: implement MAC hash filtering. */ + break; + case 0x144: + s->tfwr = value & 3; + break; + case 0x14c: + /* FRBR writes ignored. */ + break; + case 0x150: + s->rfsr = (value & 0x3fc) | 0x400; + break; + case 0x180: + s->erdsr = value & ~3; + s->rx_descriptor = s->erdsr; + break; + case 0x184: + s->etdsr = value & ~3; + s->tx_descriptor = s->etdsr; + break; + case 0x188: + s->emrbr = value & 0x7f0; + break; + default: + hw_error("mcf_fec_write Bad address 0x%x\n", (int)addr); + } + mcf_fec_update(s); +} + +static int mcf_fec_have_receive_space(mcf_fec_state *s, size_t want) +{ + mcf_fec_bd bd; + uint32_t addr; + + /* Walk descriptor list to determine if we have enough buffer */ + addr = s->rx_descriptor; + while (want > 0) { + mcf_fec_read_bd(&bd, addr); + if ((bd.flags & FEC_BD_E) == 0) { + return 0; + } + if (want < s->emrbr) { + return 1; + } + want -= s->emrbr; + /* Advance to the next descriptor. */ + if ((bd.flags & FEC_BD_W) != 0) { + addr = s->erdsr; + } else { + addr += 8; + } + } + return 0; +} + +static ssize_t mcf_fec_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + mcf_fec_state *s = qemu_get_nic_opaque(nc); + mcf_fec_bd bd; + uint32_t flags = 0; + uint32_t addr; + uint32_t crc; + uint32_t buf_addr; + uint8_t *crc_ptr; + unsigned int buf_len; + size_t retsize; + + DPRINTF("do_rx len %d\n", size); + if (!s->rx_enabled) { + return -1; + } + /* 4 bytes for the CRC. */ + size += 4; + crc = cpu_to_be32(crc32(~0, buf, size)); + crc_ptr = (uint8_t *)&crc; + /* Huge frames are truncted. */ + if (size > FEC_MAX_FRAME_SIZE) { + size = FEC_MAX_FRAME_SIZE; + flags |= FEC_BD_TR | FEC_BD_LG; + } + /* Frames larger than the user limit just set error flags. */ + if (size > (s->rcr >> 16)) { + flags |= FEC_BD_LG; + } + /* Check if we have enough space in current descriptors */ + if (!mcf_fec_have_receive_space(s, size)) { + return 0; + } + addr = s->rx_descriptor; + retsize = size; + while (size > 0) { + mcf_fec_read_bd(&bd, addr); + buf_len = (size <= s->emrbr) ? size: s->emrbr; + bd.length = buf_len; + size -= buf_len; + DPRINTF("rx_bd %x length %d\n", addr, bd.length); + /* The last 4 bytes are the CRC. */ + if (size < 4) + buf_len += size - 4; + buf_addr = bd.data; + cpu_physical_memory_write(buf_addr, buf, buf_len); + buf += buf_len; + if (size < 4) { + cpu_physical_memory_write(buf_addr + buf_len, crc_ptr, 4 - size); + crc_ptr += 4 - size; + } + bd.flags &= ~FEC_BD_E; + if (size == 0) { + /* Last buffer in frame. */ + bd.flags |= flags | FEC_BD_L; + DPRINTF("rx frame flags %04x\n", bd.flags); + s->eir |= FEC_INT_RXF; + } else { + s->eir |= FEC_INT_RXB; + } + mcf_fec_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if ((bd.flags & FEC_BD_W) != 0) { + addr = s->erdsr; + } else { + addr += 8; + } + } + s->rx_descriptor = addr; + mcf_fec_enable_rx(s); + mcf_fec_update(s); + return retsize; +} + +static const MemoryRegionOps mcf_fec_ops = { + .read = mcf_fec_read, + .write = mcf_fec_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static NetClientInfo net_mcf_fec_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = mcf_fec_receive, +}; + +void mcf_fec_init(MemoryRegion *sysmem, NICInfo *nd, + hwaddr base, qemu_irq *irq) +{ + mcf_fec_state *s; + + qemu_check_nic_model(nd, "mcf_fec"); + + s = (mcf_fec_state *)g_malloc0(sizeof(mcf_fec_state)); + s->sysmem = sysmem; + s->irq = irq; + + memory_region_init_io(&s->iomem, NULL, &mcf_fec_ops, s, "fec", 0x400); + memory_region_add_subregion(sysmem, base, &s->iomem); + + s->conf.macaddr = nd->macaddr; + s->conf.peers.ncs[0] = nd->netdev; + + s->nic = qemu_new_nic(&net_mcf_fec_info, &s->conf, nd->model, nd->name, s); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); +} diff --git a/qemu/hw/net/milkymist-minimac2.c b/qemu/hw/net/milkymist-minimac2.c new file mode 100644 index 000000000..5d1cf0851 --- /dev/null +++ b/qemu/hw/net/milkymist-minimac2.c @@ -0,0 +1,540 @@ +/* + * QEMU model of the Milkymist minimac2 block. + * + * Copyright (c) 2011 Michael Walle <michael@walle.cc> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + * + * Specification available at: + * not available yet + * + */ + +#include "hw/hw.h" +#include "hw/sysbus.h" +#include "trace.h" +#include "net/net.h" +#include "qemu/error-report.h" + +#include <zlib.h> + +enum { + R_SETUP = 0, + R_MDIO, + R_STATE0, + R_COUNT0, + R_STATE1, + R_COUNT1, + R_TXCOUNT, + R_MAX +}; + +enum { + SETUP_PHY_RST = (1<<0), +}; + +enum { + MDIO_DO = (1<<0), + MDIO_DI = (1<<1), + MDIO_OE = (1<<2), + MDIO_CLK = (1<<3), +}; + +enum { + STATE_EMPTY = 0, + STATE_LOADED = 1, + STATE_PENDING = 2, +}; + +enum { + MDIO_OP_WRITE = 1, + MDIO_OP_READ = 2, +}; + +enum mdio_state { + MDIO_STATE_IDLE, + MDIO_STATE_READING, + MDIO_STATE_WRITING, +}; + +enum { + R_PHY_ID1 = 2, + R_PHY_ID2 = 3, + R_PHY_MAX = 32 +}; + +#define MINIMAC2_MTU 1530 +#define MINIMAC2_BUFFER_SIZE 2048 + +struct MilkymistMinimac2MdioState { + int last_clk; + int count; + uint32_t data; + uint16_t data_out; + int state; + + uint8_t phy_addr; + uint8_t reg_addr; +}; +typedef struct MilkymistMinimac2MdioState MilkymistMinimac2MdioState; + +#define TYPE_MILKYMIST_MINIMAC2 "milkymist-minimac2" +#define MILKYMIST_MINIMAC2(obj) \ + OBJECT_CHECK(MilkymistMinimac2State, (obj), TYPE_MILKYMIST_MINIMAC2) + +struct MilkymistMinimac2State { + SysBusDevice parent_obj; + + NICState *nic; + NICConf conf; + char *phy_model; + MemoryRegion buffers; + MemoryRegion regs_region; + + qemu_irq rx_irq; + qemu_irq tx_irq; + + uint32_t regs[R_MAX]; + + MilkymistMinimac2MdioState mdio; + + uint16_t phy_regs[R_PHY_MAX]; + + uint8_t *rx0_buf; + uint8_t *rx1_buf; + uint8_t *tx_buf; +}; +typedef struct MilkymistMinimac2State MilkymistMinimac2State; + +static const uint8_t preamble_sfd[] = { + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xd5 +}; + +static void minimac2_mdio_write_reg(MilkymistMinimac2State *s, + uint8_t phy_addr, uint8_t reg_addr, uint16_t value) +{ + trace_milkymist_minimac2_mdio_write(phy_addr, reg_addr, value); + + /* nop */ +} + +static uint16_t minimac2_mdio_read_reg(MilkymistMinimac2State *s, + uint8_t phy_addr, uint8_t reg_addr) +{ + uint16_t r = s->phy_regs[reg_addr]; + + trace_milkymist_minimac2_mdio_read(phy_addr, reg_addr, r); + + return r; +} + +static void minimac2_update_mdio(MilkymistMinimac2State *s) +{ + MilkymistMinimac2MdioState *m = &s->mdio; + + /* detect rising clk edge */ + if (m->last_clk == 0 && (s->regs[R_MDIO] & MDIO_CLK)) { + /* shift data in */ + int bit = ((s->regs[R_MDIO] & MDIO_DO) + && (s->regs[R_MDIO] & MDIO_OE)) ? 1 : 0; + m->data = (m->data << 1) | bit; + + /* check for sync */ + if (m->data == 0xffffffff) { + m->count = 32; + } + + if (m->count == 16) { + uint8_t start = (m->data >> 14) & 0x3; + uint8_t op = (m->data >> 12) & 0x3; + uint8_t ta = (m->data) & 0x3; + + if (start == 1 && op == MDIO_OP_WRITE && ta == 2) { + m->state = MDIO_STATE_WRITING; + } else if (start == 1 && op == MDIO_OP_READ && (ta & 1) == 0) { + m->state = MDIO_STATE_READING; + } else { + m->state = MDIO_STATE_IDLE; + } + + if (m->state != MDIO_STATE_IDLE) { + m->phy_addr = (m->data >> 7) & 0x1f; + m->reg_addr = (m->data >> 2) & 0x1f; + } + + if (m->state == MDIO_STATE_READING) { + m->data_out = minimac2_mdio_read_reg(s, m->phy_addr, + m->reg_addr); + } + } + + if (m->count < 16 && m->state == MDIO_STATE_READING) { + int bit = (m->data_out & 0x8000) ? 1 : 0; + m->data_out <<= 1; + + if (bit) { + s->regs[R_MDIO] |= MDIO_DI; + } else { + s->regs[R_MDIO] &= ~MDIO_DI; + } + } + + if (m->count == 0 && m->state) { + if (m->state == MDIO_STATE_WRITING) { + uint16_t data = m->data & 0xffff; + minimac2_mdio_write_reg(s, m->phy_addr, m->reg_addr, data); + } + m->state = MDIO_STATE_IDLE; + } + m->count--; + } + + m->last_clk = (s->regs[R_MDIO] & MDIO_CLK) ? 1 : 0; +} + +static size_t assemble_frame(uint8_t *buf, size_t size, + const uint8_t *payload, size_t payload_size) +{ + uint32_t crc; + + if (size < payload_size + 12) { + error_report("milkymist_minimac2: received too big ethernet frame"); + return 0; + } + + /* prepend preamble and sfd */ + memcpy(buf, preamble_sfd, 8); + + /* now copy the payload */ + memcpy(buf + 8, payload, payload_size); + + /* pad frame if needed */ + if (payload_size < 60) { + memset(buf + payload_size + 8, 0, 60 - payload_size); + payload_size = 60; + } + + /* append fcs */ + crc = cpu_to_le32(crc32(0, buf + 8, payload_size)); + memcpy(buf + payload_size + 8, &crc, 4); + + return payload_size + 12; +} + +static void minimac2_tx(MilkymistMinimac2State *s) +{ + uint32_t txcount = s->regs[R_TXCOUNT]; + uint8_t *buf = s->tx_buf; + + if (txcount < 64) { + error_report("milkymist_minimac2: ethernet frame too small (%u < %u)", + txcount, 64); + goto err; + } + + if (txcount > MINIMAC2_MTU) { + error_report("milkymist_minimac2: MTU exceeded (%u > %u)", + txcount, MINIMAC2_MTU); + goto err; + } + + if (memcmp(buf, preamble_sfd, 8) != 0) { + error_report("milkymist_minimac2: frame doesn't contain the preamble " + "and/or the SFD (%02x %02x %02x %02x %02x %02x %02x %02x)", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + goto err; + } + + trace_milkymist_minimac2_tx_frame(txcount - 12); + + /* send packet, skipping preamble and sfd */ + qemu_send_packet_raw(qemu_get_queue(s->nic), buf + 8, txcount - 12); + + s->regs[R_TXCOUNT] = 0; + +err: + trace_milkymist_minimac2_pulse_irq_tx(); + qemu_irq_pulse(s->tx_irq); +} + +static void update_rx_interrupt(MilkymistMinimac2State *s) +{ + if (s->regs[R_STATE0] == STATE_PENDING + || s->regs[R_STATE1] == STATE_PENDING) { + trace_milkymist_minimac2_raise_irq_rx(); + qemu_irq_raise(s->rx_irq); + } else { + trace_milkymist_minimac2_lower_irq_rx(); + qemu_irq_lower(s->rx_irq); + } +} + +static ssize_t minimac2_rx(NetClientState *nc, const uint8_t *buf, size_t size) +{ + MilkymistMinimac2State *s = qemu_get_nic_opaque(nc); + + uint32_t r_count; + uint32_t r_state; + uint8_t *rx_buf; + + size_t frame_size; + + trace_milkymist_minimac2_rx_frame(buf, size); + + /* choose appropriate slot */ + if (s->regs[R_STATE0] == STATE_LOADED) { + r_count = R_COUNT0; + r_state = R_STATE0; + rx_buf = s->rx0_buf; + } else if (s->regs[R_STATE1] == STATE_LOADED) { + r_count = R_COUNT1; + r_state = R_STATE1; + rx_buf = s->rx1_buf; + } else { + return 0; + } + + /* assemble frame */ + frame_size = assemble_frame(rx_buf, MINIMAC2_BUFFER_SIZE, buf, size); + + if (frame_size == 0) { + return size; + } + + trace_milkymist_minimac2_rx_transfer(rx_buf, frame_size); + + /* update slot */ + s->regs[r_count] = frame_size; + s->regs[r_state] = STATE_PENDING; + + update_rx_interrupt(s); + + return size; +} + +static uint64_t +minimac2_read(void *opaque, hwaddr addr, unsigned size) +{ + MilkymistMinimac2State *s = opaque; + uint32_t r = 0; + + addr >>= 2; + switch (addr) { + case R_SETUP: + case R_MDIO: + case R_STATE0: + case R_COUNT0: + case R_STATE1: + case R_COUNT1: + case R_TXCOUNT: + r = s->regs[addr]; + break; + + default: + error_report("milkymist_minimac2: read access to unknown register 0x" + TARGET_FMT_plx, addr << 2); + break; + } + + trace_milkymist_minimac2_memory_read(addr << 2, r); + + return r; +} + +static int minimac2_can_rx(MilkymistMinimac2State *s) +{ + if (s->regs[R_STATE0] == STATE_LOADED) { + return 1; + } + if (s->regs[R_STATE1] == STATE_LOADED) { + return 1; + } + + return 0; +} + +static void +minimac2_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + MilkymistMinimac2State *s = opaque; + + trace_milkymist_minimac2_memory_write(addr, value); + + addr >>= 2; + switch (addr) { + case R_MDIO: + { + /* MDIO_DI is read only */ + int mdio_di = (s->regs[R_MDIO] & MDIO_DI); + s->regs[R_MDIO] = value; + if (mdio_di) { + s->regs[R_MDIO] |= mdio_di; + } else { + s->regs[R_MDIO] &= ~mdio_di; + } + + minimac2_update_mdio(s); + } break; + case R_TXCOUNT: + s->regs[addr] = value; + if (value > 0) { + minimac2_tx(s); + } + break; + case R_STATE0: + case R_STATE1: + s->regs[addr] = value; + update_rx_interrupt(s); + if (minimac2_can_rx(s)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + case R_SETUP: + case R_COUNT0: + case R_COUNT1: + s->regs[addr] = value; + break; + + default: + error_report("milkymist_minimac2: write access to unknown register 0x" + TARGET_FMT_plx, addr << 2); + break; + } +} + +static const MemoryRegionOps minimac2_ops = { + .read = minimac2_read, + .write = minimac2_write, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void milkymist_minimac2_reset(DeviceState *d) +{ + MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(d); + int i; + + for (i = 0; i < R_MAX; i++) { + s->regs[i] = 0; + } + for (i = 0; i < R_PHY_MAX; i++) { + s->phy_regs[i] = 0; + } + + /* defaults */ + s->phy_regs[R_PHY_ID1] = 0x0022; /* Micrel KSZ8001L */ + s->phy_regs[R_PHY_ID2] = 0x161a; +} + +static NetClientInfo net_milkymist_minimac2_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = minimac2_rx, +}; + +static int milkymist_minimac2_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(dev); + size_t buffers_size = TARGET_PAGE_ALIGN(3 * MINIMAC2_BUFFER_SIZE); + + sysbus_init_irq(sbd, &s->rx_irq); + sysbus_init_irq(sbd, &s->tx_irq); + + memory_region_init_io(&s->regs_region, OBJECT(dev), &minimac2_ops, s, + "milkymist-minimac2", R_MAX * 4); + sysbus_init_mmio(sbd, &s->regs_region); + + /* register buffers memory */ + memory_region_init_ram(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers", + buffers_size, &error_abort); + vmstate_register_ram_global(&s->buffers); + s->rx0_buf = memory_region_get_ram_ptr(&s->buffers); + s->rx1_buf = s->rx0_buf + MINIMAC2_BUFFER_SIZE; + s->tx_buf = s->rx1_buf + MINIMAC2_BUFFER_SIZE; + + sysbus_init_mmio(sbd, &s->buffers); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_milkymist_minimac2_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + return 0; +} + +static const VMStateDescription vmstate_milkymist_minimac2_mdio = { + .name = "milkymist-minimac2-mdio", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT32(last_clk, MilkymistMinimac2MdioState), + VMSTATE_INT32(count, MilkymistMinimac2MdioState), + VMSTATE_UINT32(data, MilkymistMinimac2MdioState), + VMSTATE_UINT16(data_out, MilkymistMinimac2MdioState), + VMSTATE_INT32(state, MilkymistMinimac2MdioState), + VMSTATE_UINT8(phy_addr, MilkymistMinimac2MdioState), + VMSTATE_UINT8(reg_addr, MilkymistMinimac2MdioState), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_milkymist_minimac2 = { + .name = "milkymist-minimac2", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, MilkymistMinimac2State, R_MAX), + VMSTATE_UINT16_ARRAY(phy_regs, MilkymistMinimac2State, R_PHY_MAX), + VMSTATE_STRUCT(mdio, MilkymistMinimac2State, 0, + vmstate_milkymist_minimac2_mdio, MilkymistMinimac2MdioState), + VMSTATE_END_OF_LIST() + } +}; + +static Property milkymist_minimac2_properties[] = { + DEFINE_NIC_PROPERTIES(MilkymistMinimac2State, conf), + DEFINE_PROP_STRING("phy_model", MilkymistMinimac2State, phy_model), + DEFINE_PROP_END_OF_LIST(), +}; + +static void milkymist_minimac2_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = milkymist_minimac2_init; + dc->reset = milkymist_minimac2_reset; + dc->vmsd = &vmstate_milkymist_minimac2; + dc->props = milkymist_minimac2_properties; +} + +static const TypeInfo milkymist_minimac2_info = { + .name = TYPE_MILKYMIST_MINIMAC2, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MilkymistMinimac2State), + .class_init = milkymist_minimac2_class_init, +}; + +static void milkymist_minimac2_register_types(void) +{ + type_register_static(&milkymist_minimac2_info); +} + +type_init(milkymist_minimac2_register_types) diff --git a/qemu/hw/net/mipsnet.c b/qemu/hw/net/mipsnet.c new file mode 100644 index 000000000..f261011a2 --- /dev/null +++ b/qemu/hw/net/mipsnet.c @@ -0,0 +1,286 @@ +#include "hw/hw.h" +#include "net/net.h" +#include "trace.h" +#include "hw/sysbus.h" + +/* MIPSnet register offsets */ + +#define MIPSNET_DEV_ID 0x00 +#define MIPSNET_BUSY 0x08 +#define MIPSNET_RX_DATA_COUNT 0x0c +#define MIPSNET_TX_DATA_COUNT 0x10 +#define MIPSNET_INT_CTL 0x14 +# define MIPSNET_INTCTL_TXDONE 0x00000001 +# define MIPSNET_INTCTL_RXDONE 0x00000002 +# define MIPSNET_INTCTL_TESTBIT 0x80000000 +#define MIPSNET_INTERRUPT_INFO 0x18 +#define MIPSNET_RX_DATA_BUFFER 0x1c +#define MIPSNET_TX_DATA_BUFFER 0x20 + +#define MAX_ETH_FRAME_SIZE 1514 + +#define TYPE_MIPS_NET "mipsnet" +#define MIPS_NET(obj) OBJECT_CHECK(MIPSnetState, (obj), TYPE_MIPS_NET) + +typedef struct MIPSnetState { + SysBusDevice parent_obj; + + uint32_t busy; + uint32_t rx_count; + uint32_t rx_read; + uint32_t tx_count; + uint32_t tx_written; + uint32_t intctl; + uint8_t rx_buffer[MAX_ETH_FRAME_SIZE]; + uint8_t tx_buffer[MAX_ETH_FRAME_SIZE]; + MemoryRegion io; + qemu_irq irq; + NICState *nic; + NICConf conf; +} MIPSnetState; + +static void mipsnet_reset(MIPSnetState *s) +{ + s->busy = 1; + s->rx_count = 0; + s->rx_read = 0; + s->tx_count = 0; + s->tx_written = 0; + s->intctl = 0; + memset(s->rx_buffer, 0, MAX_ETH_FRAME_SIZE); + memset(s->tx_buffer, 0, MAX_ETH_FRAME_SIZE); +} + +static void mipsnet_update_irq(MIPSnetState *s) +{ + int isr = !!s->intctl; + trace_mipsnet_irq(isr, s->intctl); + qemu_set_irq(s->irq, isr); +} + +static int mipsnet_buffer_full(MIPSnetState *s) +{ + if (s->rx_count >= MAX_ETH_FRAME_SIZE) + return 1; + return 0; +} + +static int mipsnet_can_receive(NetClientState *nc) +{ + MIPSnetState *s = qemu_get_nic_opaque(nc); + + if (s->busy) + return 0; + return !mipsnet_buffer_full(s); +} + +static ssize_t mipsnet_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + MIPSnetState *s = qemu_get_nic_opaque(nc); + + trace_mipsnet_receive(size); + if (!mipsnet_can_receive(nc)) + return 0; + + s->busy = 1; + + /* Just accept everything. */ + + /* Write packet data. */ + memcpy(s->rx_buffer, buf, size); + + s->rx_count = size; + s->rx_read = 0; + + /* Now we can signal we have received something. */ + s->intctl |= MIPSNET_INTCTL_RXDONE; + mipsnet_update_irq(s); + + return size; +} + +static uint64_t mipsnet_ioport_read(void *opaque, hwaddr addr, + unsigned int size) +{ + MIPSnetState *s = opaque; + int ret = 0; + + addr &= 0x3f; + switch (addr) { + case MIPSNET_DEV_ID: + ret = be32_to_cpu(0x4d495053); /* MIPS */ + break; + case MIPSNET_DEV_ID + 4: + ret = be32_to_cpu(0x4e455430); /* NET0 */ + break; + case MIPSNET_BUSY: + ret = s->busy; + break; + case MIPSNET_RX_DATA_COUNT: + ret = s->rx_count; + break; + case MIPSNET_TX_DATA_COUNT: + ret = s->tx_count; + break; + case MIPSNET_INT_CTL: + ret = s->intctl; + s->intctl &= ~MIPSNET_INTCTL_TESTBIT; + break; + case MIPSNET_INTERRUPT_INFO: + /* XXX: This seems to be a per-VPE interrupt number. */ + ret = 0; + break; + case MIPSNET_RX_DATA_BUFFER: + if (s->rx_count) { + s->rx_count--; + ret = s->rx_buffer[s->rx_read++]; + if (mipsnet_can_receive(s->nic->ncs)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + } + break; + /* Reads as zero. */ + case MIPSNET_TX_DATA_BUFFER: + default: + break; + } + trace_mipsnet_read(addr, ret); + return ret; +} + +static void mipsnet_ioport_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + MIPSnetState *s = opaque; + + addr &= 0x3f; + trace_mipsnet_write(addr, val); + switch (addr) { + case MIPSNET_TX_DATA_COUNT: + s->tx_count = (val <= MAX_ETH_FRAME_SIZE) ? val : 0; + s->tx_written = 0; + break; + case MIPSNET_INT_CTL: + if (val & MIPSNET_INTCTL_TXDONE) { + s->intctl &= ~MIPSNET_INTCTL_TXDONE; + } else if (val & MIPSNET_INTCTL_RXDONE) { + s->intctl &= ~MIPSNET_INTCTL_RXDONE; + } else if (val & MIPSNET_INTCTL_TESTBIT) { + mipsnet_reset(s); + s->intctl |= MIPSNET_INTCTL_TESTBIT; + } else if (!val) { + /* ACK testbit interrupt, flag was cleared on read. */ + } + s->busy = !!s->intctl; + mipsnet_update_irq(s); + if (mipsnet_can_receive(s->nic->ncs)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + case MIPSNET_TX_DATA_BUFFER: + s->tx_buffer[s->tx_written++] = val; + if (s->tx_written == s->tx_count) { + /* Send buffer. */ + trace_mipsnet_send(s->tx_count); + qemu_send_packet(qemu_get_queue(s->nic), s->tx_buffer, s->tx_count); + s->tx_count = s->tx_written = 0; + s->intctl |= MIPSNET_INTCTL_TXDONE; + s->busy = 1; + mipsnet_update_irq(s); + } + break; + /* Read-only registers */ + case MIPSNET_DEV_ID: + case MIPSNET_BUSY: + case MIPSNET_RX_DATA_COUNT: + case MIPSNET_INTERRUPT_INFO: + case MIPSNET_RX_DATA_BUFFER: + default: + break; + } +} + +static const VMStateDescription vmstate_mipsnet = { + .name = "mipsnet", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(busy, MIPSnetState), + VMSTATE_UINT32(rx_count, MIPSnetState), + VMSTATE_UINT32(rx_read, MIPSnetState), + VMSTATE_UINT32(tx_count, MIPSnetState), + VMSTATE_UINT32(tx_written, MIPSnetState), + VMSTATE_UINT32(intctl, MIPSnetState), + VMSTATE_BUFFER(rx_buffer, MIPSnetState), + VMSTATE_BUFFER(tx_buffer, MIPSnetState), + VMSTATE_END_OF_LIST() + } +}; + +static NetClientInfo net_mipsnet_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = mipsnet_receive, +}; + +static const MemoryRegionOps mipsnet_ioport_ops = { + .read = mipsnet_ioport_read, + .write = mipsnet_ioport_write, + .impl.min_access_size = 1, + .impl.max_access_size = 4, +}; + +static int mipsnet_sysbus_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + MIPSnetState *s = MIPS_NET(dev); + + memory_region_init_io(&s->io, OBJECT(dev), &mipsnet_ioport_ops, s, + "mipsnet-io", 36); + sysbus_init_mmio(sbd, &s->io); + sysbus_init_irq(sbd, &s->irq); + + s->nic = qemu_new_nic(&net_mipsnet_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + return 0; +} + +static void mipsnet_sysbus_reset(DeviceState *dev) +{ + MIPSnetState *s = MIPS_NET(dev); + mipsnet_reset(s); +} + +static Property mipsnet_properties[] = { + DEFINE_NIC_PROPERTIES(MIPSnetState, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void mipsnet_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = mipsnet_sysbus_init; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->desc = "MIPS Simulator network device"; + dc->reset = mipsnet_sysbus_reset; + dc->vmsd = &vmstate_mipsnet; + dc->props = mipsnet_properties; +} + +static const TypeInfo mipsnet_info = { + .name = TYPE_MIPS_NET, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MIPSnetState), + .class_init = mipsnet_class_init, +}; + +static void mipsnet_register_types(void) +{ + type_register_static(&mipsnet_info); +} + +type_init(mipsnet_register_types) diff --git a/qemu/hw/net/ne2000-isa.c b/qemu/hw/net/ne2000-isa.c new file mode 100644 index 000000000..17e7199f7 --- /dev/null +++ b/qemu/hw/net/ne2000-isa.c @@ -0,0 +1,152 @@ +/* + * QEMU NE2000 emulation -- isa bus windup + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "hw/isa/isa.h" +#include "hw/qdev.h" +#include "net/net.h" +#include "ne2000.h" +#include "exec/address-spaces.h" +#include "qapi/visitor.h" + +#define TYPE_ISA_NE2000 "ne2k_isa" +#define ISA_NE2000(obj) OBJECT_CHECK(ISANE2000State, (obj), TYPE_ISA_NE2000) + +typedef struct ISANE2000State { + ISADevice parent_obj; + + uint32_t iobase; + uint32_t isairq; + NE2000State ne2000; +} ISANE2000State; + +static NetClientInfo net_ne2000_isa_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = ne2000_can_receive, + .receive = ne2000_receive, +}; + +static const VMStateDescription vmstate_isa_ne2000 = { + .name = "ne2000", + .version_id = 2, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(ne2000, ISANE2000State, 0, vmstate_ne2000, NE2000State), + VMSTATE_END_OF_LIST() + } +}; + +static void isa_ne2000_realizefn(DeviceState *dev, Error **errp) +{ + ISADevice *isadev = ISA_DEVICE(dev); + ISANE2000State *isa = ISA_NE2000(dev); + NE2000State *s = &isa->ne2000; + + ne2000_setup_io(s, DEVICE(isadev), 0x20); + isa_register_ioport(isadev, &s->io, isa->iobase); + + isa_init_irq(isadev, &s->irq, isa->isairq); + + qemu_macaddr_default_if_unset(&s->c.macaddr); + ne2000_reset(s); + + s->nic = qemu_new_nic(&net_ne2000_isa_info, &s->c, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a); +} + +static Property ne2000_isa_properties[] = { + DEFINE_PROP_UINT32("iobase", ISANE2000State, iobase, 0x300), + DEFINE_PROP_UINT32("irq", ISANE2000State, isairq, 9), + DEFINE_NIC_PROPERTIES(ISANE2000State, ne2000.c), + DEFINE_PROP_END_OF_LIST(), +}; + +static void isa_ne2000_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = isa_ne2000_realizefn; + dc->props = ne2000_isa_properties; + dc->vmsd = &vmstate_isa_ne2000; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static void isa_ne2000_get_bootindex(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + ISANE2000State *isa = ISA_NE2000(obj); + NE2000State *s = &isa->ne2000; + + visit_type_int32(v, &s->c.bootindex, name, errp); +} + +static void isa_ne2000_set_bootindex(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + ISANE2000State *isa = ISA_NE2000(obj); + NE2000State *s = &isa->ne2000; + int32_t boot_index; + Error *local_err = NULL; + + visit_type_int32(v, &boot_index, name, &local_err); + if (local_err) { + goto out; + } + /* check whether bootindex is present in fw_boot_order list */ + check_boot_index(boot_index, &local_err); + if (local_err) { + goto out; + } + /* change bootindex to a new one */ + s->c.bootindex = boot_index; + +out: + if (local_err) { + error_propagate(errp, local_err); + } +} + +static void isa_ne2000_instance_init(Object *obj) +{ + object_property_add(obj, "bootindex", "int32", + isa_ne2000_get_bootindex, + isa_ne2000_set_bootindex, NULL, NULL, NULL); + object_property_set_int(obj, -1, "bootindex", NULL); +} +static const TypeInfo ne2000_isa_info = { + .name = TYPE_ISA_NE2000, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(ISANE2000State), + .class_init = isa_ne2000_class_initfn, + .instance_init = isa_ne2000_instance_init, +}; + +static void ne2000_isa_register_types(void) +{ + type_register_static(&ne2000_isa_info); +} + +type_init(ne2000_isa_register_types) diff --git a/qemu/hw/net/ne2000.c b/qemu/hw/net/ne2000.c new file mode 100644 index 000000000..3492db366 --- /dev/null +++ b/qemu/hw/net/ne2000.c @@ -0,0 +1,788 @@ +/* + * QEMU NE2000 emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "net/net.h" +#include "ne2000.h" +#include "hw/loader.h" +#include "sysemu/sysemu.h" + +/* debug NE2000 card */ +//#define DEBUG_NE2000 + +#define MAX_ETH_FRAME_SIZE 1514 + +#define E8390_CMD 0x00 /* The command register (for all pages) */ +/* Page 0 register offsets. */ +#define EN0_CLDALO 0x01 /* Low byte of current local dma addr RD */ +#define EN0_STARTPG 0x01 /* Starting page of ring bfr WR */ +#define EN0_CLDAHI 0x02 /* High byte of current local dma addr RD */ +#define EN0_STOPPG 0x02 /* Ending page +1 of ring bfr WR */ +#define EN0_BOUNDARY 0x03 /* Boundary page of ring bfr RD WR */ +#define EN0_TSR 0x04 /* Transmit status reg RD */ +#define EN0_TPSR 0x04 /* Transmit starting page WR */ +#define EN0_NCR 0x05 /* Number of collision reg RD */ +#define EN0_TCNTLO 0x05 /* Low byte of tx byte count WR */ +#define EN0_FIFO 0x06 /* FIFO RD */ +#define EN0_TCNTHI 0x06 /* High byte of tx byte count WR */ +#define EN0_ISR 0x07 /* Interrupt status reg RD WR */ +#define EN0_CRDALO 0x08 /* low byte of current remote dma address RD */ +#define EN0_RSARLO 0x08 /* Remote start address reg 0 */ +#define EN0_CRDAHI 0x09 /* high byte, current remote dma address RD */ +#define EN0_RSARHI 0x09 /* Remote start address reg 1 */ +#define EN0_RCNTLO 0x0a /* Remote byte count reg WR */ +#define EN0_RTL8029ID0 0x0a /* Realtek ID byte #1 RD */ +#define EN0_RCNTHI 0x0b /* Remote byte count reg WR */ +#define EN0_RTL8029ID1 0x0b /* Realtek ID byte #2 RD */ +#define EN0_RSR 0x0c /* rx status reg RD */ +#define EN0_RXCR 0x0c /* RX configuration reg WR */ +#define EN0_TXCR 0x0d /* TX configuration reg WR */ +#define EN0_COUNTER0 0x0d /* Rcv alignment error counter RD */ +#define EN0_DCFG 0x0e /* Data configuration reg WR */ +#define EN0_COUNTER1 0x0e /* Rcv CRC error counter RD */ +#define EN0_IMR 0x0f /* Interrupt mask reg WR */ +#define EN0_COUNTER2 0x0f /* Rcv missed frame error counter RD */ + +#define EN1_PHYS 0x11 +#define EN1_CURPAG 0x17 +#define EN1_MULT 0x18 + +#define EN2_STARTPG 0x21 /* Starting page of ring bfr RD */ +#define EN2_STOPPG 0x22 /* Ending page +1 of ring bfr RD */ + +#define EN3_CONFIG0 0x33 +#define EN3_CONFIG1 0x34 +#define EN3_CONFIG2 0x35 +#define EN3_CONFIG3 0x36 + +/* Register accessed at EN_CMD, the 8390 base addr. */ +#define E8390_STOP 0x01 /* Stop and reset the chip */ +#define E8390_START 0x02 /* Start the chip, clear reset */ +#define E8390_TRANS 0x04 /* Transmit a frame */ +#define E8390_RREAD 0x08 /* Remote read */ +#define E8390_RWRITE 0x10 /* Remote write */ +#define E8390_NODMA 0x20 /* Remote DMA */ +#define E8390_PAGE0 0x00 /* Select page chip registers */ +#define E8390_PAGE1 0x40 /* using the two high-order bits */ +#define E8390_PAGE2 0x80 /* Page 3 is invalid. */ + +/* Bits in EN0_ISR - Interrupt status register */ +#define ENISR_RX 0x01 /* Receiver, no error */ +#define ENISR_TX 0x02 /* Transmitter, no error */ +#define ENISR_RX_ERR 0x04 /* Receiver, with error */ +#define ENISR_TX_ERR 0x08 /* Transmitter, with error */ +#define ENISR_OVER 0x10 /* Receiver overwrote the ring */ +#define ENISR_COUNTERS 0x20 /* Counters need emptying */ +#define ENISR_RDC 0x40 /* remote dma complete */ +#define ENISR_RESET 0x80 /* Reset completed */ +#define ENISR_ALL 0x3f /* Interrupts we will enable */ + +/* Bits in received packet status byte and EN0_RSR*/ +#define ENRSR_RXOK 0x01 /* Received a good packet */ +#define ENRSR_CRC 0x02 /* CRC error */ +#define ENRSR_FAE 0x04 /* frame alignment error */ +#define ENRSR_FO 0x08 /* FIFO overrun */ +#define ENRSR_MPA 0x10 /* missed pkt */ +#define ENRSR_PHY 0x20 /* physical/multicast address */ +#define ENRSR_DIS 0x40 /* receiver disable. set in monitor mode */ +#define ENRSR_DEF 0x80 /* deferring */ + +/* Transmitted packet status, EN0_TSR. */ +#define ENTSR_PTX 0x01 /* Packet transmitted without error */ +#define ENTSR_ND 0x02 /* The transmit wasn't deferred. */ +#define ENTSR_COL 0x04 /* The transmit collided at least once. */ +#define ENTSR_ABT 0x08 /* The transmit collided 16 times, and was deferred. */ +#define ENTSR_CRS 0x10 /* The carrier sense was lost. */ +#define ENTSR_FU 0x20 /* A "FIFO underrun" occurred during transmit. */ +#define ENTSR_CDH 0x40 /* The collision detect "heartbeat" signal was lost. */ +#define ENTSR_OWC 0x80 /* There was an out-of-window collision. */ + +typedef struct PCINE2000State { + PCIDevice dev; + NE2000State ne2000; +} PCINE2000State; + +void ne2000_reset(NE2000State *s) +{ + int i; + + s->isr = ENISR_RESET; + memcpy(s->mem, &s->c.macaddr, 6); + s->mem[14] = 0x57; + s->mem[15] = 0x57; + + /* duplicate prom data */ + for(i = 15;i >= 0; i--) { + s->mem[2 * i] = s->mem[i]; + s->mem[2 * i + 1] = s->mem[i]; + } +} + +static void ne2000_update_irq(NE2000State *s) +{ + int isr; + isr = (s->isr & s->imr) & 0x7f; +#if defined(DEBUG_NE2000) + printf("NE2000: Set IRQ to %d (%02x %02x)\n", + isr ? 1 : 0, s->isr, s->imr); +#endif + qemu_set_irq(s->irq, (isr != 0)); +} + +static int ne2000_buffer_full(NE2000State *s) +{ + int avail, index, boundary; + + index = s->curpag << 8; + boundary = s->boundary << 8; + if (index < boundary) + avail = boundary - index; + else + avail = (s->stop - s->start) - (index - boundary); + if (avail < (MAX_ETH_FRAME_SIZE + 4)) + return 1; + return 0; +} + +int ne2000_can_receive(NetClientState *nc) +{ + NE2000State *s = qemu_get_nic_opaque(nc); + + if (s->cmd & E8390_STOP) + return 1; + return !ne2000_buffer_full(s); +} + +#define MIN_BUF_SIZE 60 + +ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_) +{ + NE2000State *s = qemu_get_nic_opaque(nc); + int size = size_; + uint8_t *p; + unsigned int total_len, next, avail, len, index, mcast_idx; + uint8_t buf1[60]; + static const uint8_t broadcast_macaddr[6] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +#if defined(DEBUG_NE2000) + printf("NE2000: received len=%d\n", size); +#endif + + if (s->cmd & E8390_STOP || ne2000_buffer_full(s)) + return -1; + + /* XXX: check this */ + if (s->rxcr & 0x10) { + /* promiscuous: receive all */ + } else { + if (!memcmp(buf, broadcast_macaddr, 6)) { + /* broadcast address */ + if (!(s->rxcr & 0x04)) + return size; + } else if (buf[0] & 0x01) { + /* multicast */ + if (!(s->rxcr & 0x08)) + return size; + mcast_idx = compute_mcast_idx(buf); + if (!(s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7)))) + return size; + } else if (s->mem[0] == buf[0] && + s->mem[2] == buf[1] && + s->mem[4] == buf[2] && + s->mem[6] == buf[3] && + s->mem[8] == buf[4] && + s->mem[10] == buf[5]) { + /* match */ + } else { + return size; + } + } + + + /* if too small buffer, then expand it */ + if (size < MIN_BUF_SIZE) { + memcpy(buf1, buf, size); + memset(buf1 + size, 0, MIN_BUF_SIZE - size); + buf = buf1; + size = MIN_BUF_SIZE; + } + + index = s->curpag << 8; + /* 4 bytes for header */ + total_len = size + 4; + /* address for next packet (4 bytes for CRC) */ + next = index + ((total_len + 4 + 255) & ~0xff); + if (next >= s->stop) + next -= (s->stop - s->start); + /* prepare packet header */ + p = s->mem + index; + s->rsr = ENRSR_RXOK; /* receive status */ + /* XXX: check this */ + if (buf[0] & 0x01) + s->rsr |= ENRSR_PHY; + p[0] = s->rsr; + p[1] = next >> 8; + p[2] = total_len; + p[3] = total_len >> 8; + index += 4; + + /* write packet data */ + while (size > 0) { + if (index <= s->stop) + avail = s->stop - index; + else + avail = 0; + len = size; + if (len > avail) + len = avail; + memcpy(s->mem + index, buf, len); + buf += len; + index += len; + if (index == s->stop) + index = s->start; + size -= len; + } + s->curpag = next >> 8; + + /* now we can signal we have received something */ + s->isr |= ENISR_RX; + ne2000_update_irq(s); + + return size_; +} + +static void ne2000_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + NE2000State *s = opaque; + int offset, page, index; + + addr &= 0xf; +#ifdef DEBUG_NE2000 + printf("NE2000: write addr=0x%x val=0x%02x\n", addr, val); +#endif + if (addr == E8390_CMD) { + /* control register */ + s->cmd = val; + if (!(val & E8390_STOP)) { /* START bit makes no sense on RTL8029... */ + s->isr &= ~ENISR_RESET; + /* test specific case: zero length transfer */ + if ((val & (E8390_RREAD | E8390_RWRITE)) && + s->rcnt == 0) { + s->isr |= ENISR_RDC; + ne2000_update_irq(s); + } + if (val & E8390_TRANS) { + index = (s->tpsr << 8); + /* XXX: next 2 lines are a hack to make netware 3.11 work */ + if (index >= NE2000_PMEM_END) + index -= NE2000_PMEM_SIZE; + /* fail safe: check range on the transmitted length */ + if (index + s->tcnt <= NE2000_PMEM_END) { + qemu_send_packet(qemu_get_queue(s->nic), s->mem + index, + s->tcnt); + } + /* signal end of transfer */ + s->tsr = ENTSR_PTX; + s->isr |= ENISR_TX; + s->cmd &= ~E8390_TRANS; + ne2000_update_irq(s); + } + } + } else { + page = s->cmd >> 6; + offset = addr | (page << 4); + switch(offset) { + case EN0_STARTPG: + s->start = val << 8; + break; + case EN0_STOPPG: + s->stop = val << 8; + break; + case EN0_BOUNDARY: + s->boundary = val; + break; + case EN0_IMR: + s->imr = val; + ne2000_update_irq(s); + break; + case EN0_TPSR: + s->tpsr = val; + break; + case EN0_TCNTLO: + s->tcnt = (s->tcnt & 0xff00) | val; + break; + case EN0_TCNTHI: + s->tcnt = (s->tcnt & 0x00ff) | (val << 8); + break; + case EN0_RSARLO: + s->rsar = (s->rsar & 0xff00) | val; + break; + case EN0_RSARHI: + s->rsar = (s->rsar & 0x00ff) | (val << 8); + break; + case EN0_RCNTLO: + s->rcnt = (s->rcnt & 0xff00) | val; + break; + case EN0_RCNTHI: + s->rcnt = (s->rcnt & 0x00ff) | (val << 8); + break; + case EN0_RXCR: + s->rxcr = val; + break; + case EN0_DCFG: + s->dcfg = val; + break; + case EN0_ISR: + s->isr &= ~(val & 0x7f); + ne2000_update_irq(s); + break; + case EN1_PHYS ... EN1_PHYS + 5: + s->phys[offset - EN1_PHYS] = val; + break; + case EN1_CURPAG: + s->curpag = val; + break; + case EN1_MULT ... EN1_MULT + 7: + s->mult[offset - EN1_MULT] = val; + break; + } + } +} + +static uint32_t ne2000_ioport_read(void *opaque, uint32_t addr) +{ + NE2000State *s = opaque; + int offset, page, ret; + + addr &= 0xf; + if (addr == E8390_CMD) { + ret = s->cmd; + } else { + page = s->cmd >> 6; + offset = addr | (page << 4); + switch(offset) { + case EN0_TSR: + ret = s->tsr; + break; + case EN0_BOUNDARY: + ret = s->boundary; + break; + case EN0_ISR: + ret = s->isr; + break; + case EN0_RSARLO: + ret = s->rsar & 0x00ff; + break; + case EN0_RSARHI: + ret = s->rsar >> 8; + break; + case EN1_PHYS ... EN1_PHYS + 5: + ret = s->phys[offset - EN1_PHYS]; + break; + case EN1_CURPAG: + ret = s->curpag; + break; + case EN1_MULT ... EN1_MULT + 7: + ret = s->mult[offset - EN1_MULT]; + break; + case EN0_RSR: + ret = s->rsr; + break; + case EN2_STARTPG: + ret = s->start >> 8; + break; + case EN2_STOPPG: + ret = s->stop >> 8; + break; + case EN0_RTL8029ID0: + ret = 0x50; + break; + case EN0_RTL8029ID1: + ret = 0x43; + break; + case EN3_CONFIG0: + ret = 0; /* 10baseT media */ + break; + case EN3_CONFIG2: + ret = 0x40; /* 10baseT active */ + break; + case EN3_CONFIG3: + ret = 0x40; /* Full duplex */ + break; + default: + ret = 0x00; + break; + } + } +#ifdef DEBUG_NE2000 + printf("NE2000: read addr=0x%x val=%02x\n", addr, ret); +#endif + return ret; +} + +static inline void ne2000_mem_writeb(NE2000State *s, uint32_t addr, + uint32_t val) +{ + if (addr < 32 || + (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) { + s->mem[addr] = val; + } +} + +static inline void ne2000_mem_writew(NE2000State *s, uint32_t addr, + uint32_t val) +{ + addr &= ~1; /* XXX: check exact behaviour if not even */ + if (addr < 32 || + (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) { + *(uint16_t *)(s->mem + addr) = cpu_to_le16(val); + } +} + +static inline void ne2000_mem_writel(NE2000State *s, uint32_t addr, + uint32_t val) +{ + addr &= ~1; /* XXX: check exact behaviour if not even */ + if (addr < 32 || + (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) { + stl_le_p(s->mem + addr, val); + } +} + +static inline uint32_t ne2000_mem_readb(NE2000State *s, uint32_t addr) +{ + if (addr < 32 || + (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) { + return s->mem[addr]; + } else { + return 0xff; + } +} + +static inline uint32_t ne2000_mem_readw(NE2000State *s, uint32_t addr) +{ + addr &= ~1; /* XXX: check exact behaviour if not even */ + if (addr < 32 || + (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) { + return le16_to_cpu(*(uint16_t *)(s->mem + addr)); + } else { + return 0xffff; + } +} + +static inline uint32_t ne2000_mem_readl(NE2000State *s, uint32_t addr) +{ + addr &= ~1; /* XXX: check exact behaviour if not even */ + if (addr < 32 || + (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) { + return ldl_le_p(s->mem + addr); + } else { + return 0xffffffff; + } +} + +static inline void ne2000_dma_update(NE2000State *s, int len) +{ + s->rsar += len; + /* wrap */ + /* XXX: check what to do if rsar > stop */ + if (s->rsar == s->stop) + s->rsar = s->start; + + if (s->rcnt <= len) { + s->rcnt = 0; + /* signal end of transfer */ + s->isr |= ENISR_RDC; + ne2000_update_irq(s); + } else { + s->rcnt -= len; + } +} + +static void ne2000_asic_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + NE2000State *s = opaque; + +#ifdef DEBUG_NE2000 + printf("NE2000: asic write val=0x%04x\n", val); +#endif + if (s->rcnt == 0) + return; + if (s->dcfg & 0x01) { + /* 16 bit access */ + ne2000_mem_writew(s, s->rsar, val); + ne2000_dma_update(s, 2); + } else { + /* 8 bit access */ + ne2000_mem_writeb(s, s->rsar, val); + ne2000_dma_update(s, 1); + } +} + +static uint32_t ne2000_asic_ioport_read(void *opaque, uint32_t addr) +{ + NE2000State *s = opaque; + int ret; + + if (s->dcfg & 0x01) { + /* 16 bit access */ + ret = ne2000_mem_readw(s, s->rsar); + ne2000_dma_update(s, 2); + } else { + /* 8 bit access */ + ret = ne2000_mem_readb(s, s->rsar); + ne2000_dma_update(s, 1); + } +#ifdef DEBUG_NE2000 + printf("NE2000: asic read val=0x%04x\n", ret); +#endif + return ret; +} + +static void ne2000_asic_ioport_writel(void *opaque, uint32_t addr, uint32_t val) +{ + NE2000State *s = opaque; + +#ifdef DEBUG_NE2000 + printf("NE2000: asic writel val=0x%04x\n", val); +#endif + if (s->rcnt == 0) + return; + /* 32 bit access */ + ne2000_mem_writel(s, s->rsar, val); + ne2000_dma_update(s, 4); +} + +static uint32_t ne2000_asic_ioport_readl(void *opaque, uint32_t addr) +{ + NE2000State *s = opaque; + int ret; + + /* 32 bit access */ + ret = ne2000_mem_readl(s, s->rsar); + ne2000_dma_update(s, 4); +#ifdef DEBUG_NE2000 + printf("NE2000: asic readl val=0x%04x\n", ret); +#endif + return ret; +} + +static void ne2000_reset_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + /* nothing to do (end of reset pulse) */ +} + +static uint32_t ne2000_reset_ioport_read(void *opaque, uint32_t addr) +{ + NE2000State *s = opaque; + ne2000_reset(s); + return 0; +} + +static int ne2000_post_load(void* opaque, int version_id) +{ + NE2000State* s = opaque; + + if (version_id < 2) { + s->rxcr = 0x0c; + } + return 0; +} + +const VMStateDescription vmstate_ne2000 = { + .name = "ne2000", + .version_id = 2, + .minimum_version_id = 0, + .post_load = ne2000_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8_V(rxcr, NE2000State, 2), + VMSTATE_UINT8(cmd, NE2000State), + VMSTATE_UINT32(start, NE2000State), + VMSTATE_UINT32(stop, NE2000State), + VMSTATE_UINT8(boundary, NE2000State), + VMSTATE_UINT8(tsr, NE2000State), + VMSTATE_UINT8(tpsr, NE2000State), + VMSTATE_UINT16(tcnt, NE2000State), + VMSTATE_UINT16(rcnt, NE2000State), + VMSTATE_UINT32(rsar, NE2000State), + VMSTATE_UINT8(rsr, NE2000State), + VMSTATE_UINT8(isr, NE2000State), + VMSTATE_UINT8(dcfg, NE2000State), + VMSTATE_UINT8(imr, NE2000State), + VMSTATE_BUFFER(phys, NE2000State), + VMSTATE_UINT8(curpag, NE2000State), + VMSTATE_BUFFER(mult, NE2000State), + VMSTATE_UNUSED(4), /* was irq */ + VMSTATE_BUFFER(mem, NE2000State), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_pci_ne2000 = { + .name = "ne2000", + .version_id = 3, + .minimum_version_id = 3, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(dev, PCINE2000State), + VMSTATE_STRUCT(ne2000, PCINE2000State, 0, vmstate_ne2000, NE2000State), + VMSTATE_END_OF_LIST() + } +}; + +static uint64_t ne2000_read(void *opaque, hwaddr addr, + unsigned size) +{ + NE2000State *s = opaque; + + if (addr < 0x10 && size == 1) { + return ne2000_ioport_read(s, addr); + } else if (addr == 0x10) { + if (size <= 2) { + return ne2000_asic_ioport_read(s, addr); + } else { + return ne2000_asic_ioport_readl(s, addr); + } + } else if (addr == 0x1f && size == 1) { + return ne2000_reset_ioport_read(s, addr); + } + return ((uint64_t)1 << (size * 8)) - 1; +} + +static void ne2000_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + NE2000State *s = opaque; + + if (addr < 0x10 && size == 1) { + ne2000_ioport_write(s, addr, data); + } else if (addr == 0x10) { + if (size <= 2) { + ne2000_asic_ioport_write(s, addr, data); + } else { + ne2000_asic_ioport_writel(s, addr, data); + } + } else if (addr == 0x1f && size == 1) { + ne2000_reset_ioport_write(s, addr, data); + } +} + +static const MemoryRegionOps ne2000_ops = { + .read = ne2000_read, + .write = ne2000_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +/***********************************************************/ +/* PCI NE2000 definitions */ + +void ne2000_setup_io(NE2000State *s, DeviceState *dev, unsigned size) +{ + memory_region_init_io(&s->io, OBJECT(dev), &ne2000_ops, s, "ne2000", size); +} + +static NetClientInfo net_ne2000_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = ne2000_can_receive, + .receive = ne2000_receive, +}; + +static void pci_ne2000_realize(PCIDevice *pci_dev, Error **errp) +{ + PCINE2000State *d = DO_UPCAST(PCINE2000State, dev, pci_dev); + NE2000State *s; + uint8_t *pci_conf; + + pci_conf = d->dev.config; + pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ + + s = &d->ne2000; + ne2000_setup_io(s, DEVICE(pci_dev), 0x100); + pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io); + s->irq = pci_allocate_irq(&d->dev); + + qemu_macaddr_default_if_unset(&s->c.macaddr); + ne2000_reset(s); + + s->nic = qemu_new_nic(&net_ne2000_info, &s->c, + object_get_typename(OBJECT(pci_dev)), pci_dev->qdev.id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a); +} + +static void pci_ne2000_exit(PCIDevice *pci_dev) +{ + PCINE2000State *d = DO_UPCAST(PCINE2000State, dev, pci_dev); + NE2000State *s = &d->ne2000; + + qemu_del_nic(s->nic); + qemu_free_irq(s->irq); +} + +static void ne2000_instance_init(Object *obj) +{ + PCIDevice *pci_dev = PCI_DEVICE(obj); + PCINE2000State *d = DO_UPCAST(PCINE2000State, dev, pci_dev); + NE2000State *s = &d->ne2000; + + device_add_bootindex_property(obj, &s->c.bootindex, + "bootindex", "/ethernet-phy@0", + &pci_dev->qdev, NULL); +} + +static Property ne2000_properties[] = { + DEFINE_NIC_PROPERTIES(PCINE2000State, ne2000.c), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ne2000_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = pci_ne2000_realize; + k->exit = pci_ne2000_exit; + k->romfile = "efi-ne2k_pci.rom", + k->vendor_id = PCI_VENDOR_ID_REALTEK; + k->device_id = PCI_DEVICE_ID_REALTEK_8029; + k->class_id = PCI_CLASS_NETWORK_ETHERNET; + dc->vmsd = &vmstate_pci_ne2000; + dc->props = ne2000_properties; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static const TypeInfo ne2000_info = { + .name = "ne2k_pci", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCINE2000State), + .class_init = ne2000_class_init, + .instance_init = ne2000_instance_init, +}; + +static void ne2000_register_types(void) +{ + type_register_static(&ne2000_info); +} + +type_init(ne2000_register_types) diff --git a/qemu/hw/net/ne2000.h b/qemu/hw/net/ne2000.h new file mode 100644 index 000000000..e500306aa --- /dev/null +++ b/qemu/hw/net/ne2000.h @@ -0,0 +1,40 @@ +#ifndef HW_NE2000_H +#define HW_NE2000_H 1 + +#define NE2000_PMEM_SIZE (32*1024) +#define NE2000_PMEM_START (16*1024) +#define NE2000_PMEM_END (NE2000_PMEM_SIZE+NE2000_PMEM_START) +#define NE2000_MEM_SIZE NE2000_PMEM_END + +typedef struct NE2000State { + MemoryRegion io; + uint8_t cmd; + uint32_t start; + uint32_t stop; + uint8_t boundary; + uint8_t tsr; + uint8_t tpsr; + uint16_t tcnt; + uint16_t rcnt; + uint32_t rsar; + uint8_t rsr; + uint8_t rxcr; + uint8_t isr; + uint8_t dcfg; + uint8_t imr; + uint8_t phys[6]; /* mac address */ + uint8_t curpag; + uint8_t mult[8]; /* multicast mask array */ + qemu_irq irq; + NICState *nic; + NICConf c; + uint8_t mem[NE2000_MEM_SIZE]; +} NE2000State; + +void ne2000_setup_io(NE2000State *s, DeviceState *dev, unsigned size); +extern const VMStateDescription vmstate_ne2000; +void ne2000_reset(NE2000State *s); +int ne2000_can_receive(NetClientState *nc); +ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_); + +#endif diff --git a/qemu/hw/net/opencores_eth.c b/qemu/hw/net/opencores_eth.c new file mode 100644 index 000000000..3642046ef --- /dev/null +++ b/qemu/hw/net/opencores_eth.c @@ -0,0 +1,764 @@ +/* + * OpenCores Ethernet MAC 10/100 + subset of + * National Semiconductors DP83848C 10/100 PHY + * + * http://opencores.org/svnget,ethmac?file=%2Ftrunk%2F%2Fdoc%2Feth_speci.pdf + * http://cache.national.com/ds/DP/DP83848C.pdf + * + * Copyright (c) 2011, Max Filippov, Open Source and Linux Lab. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Open Source and Linux Lab nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "hw/hw.h" +#include "hw/sysbus.h" +#include "net/net.h" +#include "sysemu/sysemu.h" +#include "trace.h" + +/* RECSMALL is not used because it breaks tap networking in linux: + * incoming ARP responses are too short + */ +#undef USE_RECSMALL + +#define GET_FIELD(v, field) (((v) & (field)) >> (field ## _LBN)) +#define GET_REGBIT(s, reg, field) ((s)->regs[reg] & (reg ## _ ## field)) +#define GET_REGFIELD(s, reg, field) \ + GET_FIELD((s)->regs[reg], reg ## _ ## field) + +#define SET_FIELD(v, field, data) \ + ((v) = (((v) & ~(field)) | (((data) << (field ## _LBN)) & (field)))) +#define SET_REGFIELD(s, reg, field, data) \ + SET_FIELD((s)->regs[reg], reg ## _ ## field, data) + +/* PHY MII registers */ +enum { + MII_BMCR, + MII_BMSR, + MII_PHYIDR1, + MII_PHYIDR2, + MII_ANAR, + MII_ANLPAR, + MII_REG_MAX = 16, +}; + +typedef struct Mii { + uint16_t regs[MII_REG_MAX]; + bool link_ok; +} Mii; + +static void mii_set_link(Mii *s, bool link_ok) +{ + if (link_ok) { + s->regs[MII_BMSR] |= 0x4; + s->regs[MII_ANLPAR] |= 0x01e1; + } else { + s->regs[MII_BMSR] &= ~0x4; + s->regs[MII_ANLPAR] &= 0x01ff; + } + s->link_ok = link_ok; +} + +static void mii_reset(Mii *s) +{ + memset(s->regs, 0, sizeof(s->regs)); + s->regs[MII_BMCR] = 0x1000; + s->regs[MII_BMSR] = 0x7848; /* no ext regs */ + s->regs[MII_PHYIDR1] = 0x2000; + s->regs[MII_PHYIDR2] = 0x5c90; + s->regs[MII_ANAR] = 0x01e1; + mii_set_link(s, s->link_ok); +} + +static void mii_ro(Mii *s, uint16_t v) +{ +} + +static void mii_write_bmcr(Mii *s, uint16_t v) +{ + if (v & 0x8000) { + mii_reset(s); + } else { + s->regs[MII_BMCR] = v; + } +} + +static void mii_write_host(Mii *s, unsigned idx, uint16_t v) +{ + static void (*reg_write[MII_REG_MAX])(Mii *s, uint16_t v) = { + [MII_BMCR] = mii_write_bmcr, + [MII_BMSR] = mii_ro, + [MII_PHYIDR1] = mii_ro, + [MII_PHYIDR2] = mii_ro, + }; + + if (idx < MII_REG_MAX) { + trace_open_eth_mii_write(idx, v); + if (reg_write[idx]) { + reg_write[idx](s, v); + } else { + s->regs[idx] = v; + } + } +} + +static uint16_t mii_read_host(Mii *s, unsigned idx) +{ + trace_open_eth_mii_read(idx, s->regs[idx]); + return s->regs[idx]; +} + +/* OpenCores Ethernet registers */ +enum { + MODER, + INT_SOURCE, + INT_MASK, + IPGT, + IPGR1, + IPGR2, + PACKETLEN, + COLLCONF, + TX_BD_NUM, + CTRLMODER, + MIIMODER, + MIICOMMAND, + MIIADDRESS, + MIITX_DATA, + MIIRX_DATA, + MIISTATUS, + MAC_ADDR0, + MAC_ADDR1, + HASH0, + HASH1, + TXCTRL, + REG_MAX, +}; + +enum { + MODER_RECSMALL = 0x10000, + MODER_PAD = 0x8000, + MODER_HUGEN = 0x4000, + MODER_RST = 0x800, + MODER_LOOPBCK = 0x80, + MODER_PRO = 0x20, + MODER_IAM = 0x10, + MODER_BRO = 0x8, + MODER_TXEN = 0x2, + MODER_RXEN = 0x1, +}; + +enum { + INT_SOURCE_BUSY = 0x10, + INT_SOURCE_RXB = 0x4, + INT_SOURCE_TXB = 0x1, +}; + +enum { + PACKETLEN_MINFL = 0xffff0000, + PACKETLEN_MINFL_LBN = 16, + PACKETLEN_MAXFL = 0xffff, + PACKETLEN_MAXFL_LBN = 0, +}; + +enum { + MIICOMMAND_WCTRLDATA = 0x4, + MIICOMMAND_RSTAT = 0x2, + MIICOMMAND_SCANSTAT = 0x1, +}; + +enum { + MIIADDRESS_RGAD = 0x1f00, + MIIADDRESS_RGAD_LBN = 8, + MIIADDRESS_FIAD = 0x1f, + MIIADDRESS_FIAD_LBN = 0, +}; + +enum { + MIITX_DATA_CTRLDATA = 0xffff, + MIITX_DATA_CTRLDATA_LBN = 0, +}; + +enum { + MIIRX_DATA_PRSD = 0xffff, + MIIRX_DATA_PRSD_LBN = 0, +}; + +enum { + MIISTATUS_LINKFAIL = 0x1, + MIISTATUS_LINKFAIL_LBN = 0, +}; + +enum { + MAC_ADDR0_BYTE2 = 0xff000000, + MAC_ADDR0_BYTE2_LBN = 24, + MAC_ADDR0_BYTE3 = 0xff0000, + MAC_ADDR0_BYTE3_LBN = 16, + MAC_ADDR0_BYTE4 = 0xff00, + MAC_ADDR0_BYTE4_LBN = 8, + MAC_ADDR0_BYTE5 = 0xff, + MAC_ADDR0_BYTE5_LBN = 0, +}; + +enum { + MAC_ADDR1_BYTE0 = 0xff00, + MAC_ADDR1_BYTE0_LBN = 8, + MAC_ADDR1_BYTE1 = 0xff, + MAC_ADDR1_BYTE1_LBN = 0, +}; + +enum { + TXD_LEN = 0xffff0000, + TXD_LEN_LBN = 16, + TXD_RD = 0x8000, + TXD_IRQ = 0x4000, + TXD_WR = 0x2000, + TXD_PAD = 0x1000, + TXD_CRC = 0x800, + TXD_UR = 0x100, + TXD_RTRY = 0xf0, + TXD_RTRY_LBN = 4, + TXD_RL = 0x8, + TXD_LC = 0x4, + TXD_DF = 0x2, + TXD_CS = 0x1, +}; + +enum { + RXD_LEN = 0xffff0000, + RXD_LEN_LBN = 16, + RXD_E = 0x8000, + RXD_IRQ = 0x4000, + RXD_WRAP = 0x2000, + RXD_CF = 0x100, + RXD_M = 0x80, + RXD_OR = 0x40, + RXD_IS = 0x20, + RXD_DN = 0x10, + RXD_TL = 0x8, + RXD_SF = 0x4, + RXD_CRC = 0x2, + RXD_LC = 0x1, +}; + +typedef struct desc { + uint32_t len_flags; + uint32_t buf_ptr; +} desc; + +#define DEFAULT_PHY 1 + +#define TYPE_OPEN_ETH "open_eth" +#define OPEN_ETH(obj) OBJECT_CHECK(OpenEthState, (obj), TYPE_OPEN_ETH) + +typedef struct OpenEthState { + SysBusDevice parent_obj; + + NICState *nic; + NICConf conf; + MemoryRegion reg_io; + MemoryRegion desc_io; + qemu_irq irq; + + Mii mii; + uint32_t regs[REG_MAX]; + unsigned tx_desc; + unsigned rx_desc; + desc desc[128]; +} OpenEthState; + +static desc *rx_desc(OpenEthState *s) +{ + return s->desc + s->rx_desc; +} + +static desc *tx_desc(OpenEthState *s) +{ + return s->desc + s->tx_desc; +} + +static void open_eth_update_irq(OpenEthState *s, + uint32_t old, uint32_t new) +{ + if (!old != !new) { + trace_open_eth_update_irq(new); + qemu_set_irq(s->irq, new); + } +} + +static void open_eth_int_source_write(OpenEthState *s, + uint32_t val) +{ + uint32_t old_val = s->regs[INT_SOURCE]; + + s->regs[INT_SOURCE] = val; + open_eth_update_irq(s, old_val & s->regs[INT_MASK], + s->regs[INT_SOURCE] & s->regs[INT_MASK]); +} + +static void open_eth_set_link_status(NetClientState *nc) +{ + OpenEthState *s = qemu_get_nic_opaque(nc); + + if (GET_REGBIT(s, MIICOMMAND, SCANSTAT)) { + SET_REGFIELD(s, MIISTATUS, LINKFAIL, nc->link_down); + } + mii_set_link(&s->mii, !nc->link_down); +} + +static void open_eth_reset(void *opaque) +{ + OpenEthState *s = opaque; + + memset(s->regs, 0, sizeof(s->regs)); + s->regs[MODER] = 0xa000; + s->regs[IPGT] = 0x12; + s->regs[IPGR1] = 0xc; + s->regs[IPGR2] = 0x12; + s->regs[PACKETLEN] = 0x400600; + s->regs[COLLCONF] = 0xf003f; + s->regs[TX_BD_NUM] = 0x40; + s->regs[MIIMODER] = 0x64; + + s->tx_desc = 0; + s->rx_desc = 0x40; + + mii_reset(&s->mii); + open_eth_set_link_status(qemu_get_queue(s->nic)); +} + +static int open_eth_can_receive(NetClientState *nc) +{ + OpenEthState *s = qemu_get_nic_opaque(nc); + + return GET_REGBIT(s, MODER, RXEN) && + (s->regs[TX_BD_NUM] < 0x80); +} + +static ssize_t open_eth_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + OpenEthState *s = qemu_get_nic_opaque(nc); + size_t maxfl = GET_REGFIELD(s, PACKETLEN, MAXFL); + size_t minfl = GET_REGFIELD(s, PACKETLEN, MINFL); + size_t fcsl = 4; + bool miss = true; + + trace_open_eth_receive((unsigned)size); + + if (size >= 6) { + static const uint8_t bcast_addr[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + if (memcmp(buf, bcast_addr, sizeof(bcast_addr)) == 0) { + miss = GET_REGBIT(s, MODER, BRO); + } else if ((buf[0] & 0x1) || GET_REGBIT(s, MODER, IAM)) { + unsigned mcast_idx = compute_mcast_idx(buf); + miss = !(s->regs[HASH0 + mcast_idx / 32] & + (1 << (mcast_idx % 32))); + trace_open_eth_receive_mcast( + mcast_idx, s->regs[HASH0], s->regs[HASH1]); + } else { + miss = GET_REGFIELD(s, MAC_ADDR1, BYTE0) != buf[0] || + GET_REGFIELD(s, MAC_ADDR1, BYTE1) != buf[1] || + GET_REGFIELD(s, MAC_ADDR0, BYTE2) != buf[2] || + GET_REGFIELD(s, MAC_ADDR0, BYTE3) != buf[3] || + GET_REGFIELD(s, MAC_ADDR0, BYTE4) != buf[4] || + GET_REGFIELD(s, MAC_ADDR0, BYTE5) != buf[5]; + } + } + + if (miss && !GET_REGBIT(s, MODER, PRO)) { + trace_open_eth_receive_reject(); + return size; + } + +#ifdef USE_RECSMALL + if (GET_REGBIT(s, MODER, RECSMALL) || size >= minfl) { +#else + { +#endif + static const uint8_t zero[64] = {0}; + desc *desc = rx_desc(s); + size_t copy_size = GET_REGBIT(s, MODER, HUGEN) ? 65536 : maxfl; + + if (!(desc->len_flags & RXD_E)) { + open_eth_int_source_write(s, + s->regs[INT_SOURCE] | INT_SOURCE_BUSY); + return size; + } + + desc->len_flags &= ~(RXD_CF | RXD_M | RXD_OR | + RXD_IS | RXD_DN | RXD_TL | RXD_SF | RXD_CRC | RXD_LC); + + if (copy_size > size) { + copy_size = size; + } else { + fcsl = 0; + } + if (miss) { + desc->len_flags |= RXD_M; + } + if (GET_REGBIT(s, MODER, HUGEN) && size > maxfl) { + desc->len_flags |= RXD_TL; + } +#ifdef USE_RECSMALL + if (size < minfl) { + desc->len_flags |= RXD_SF; + } +#endif + + cpu_physical_memory_write(desc->buf_ptr, buf, copy_size); + + if (GET_REGBIT(s, MODER, PAD) && copy_size < minfl) { + if (minfl - copy_size > fcsl) { + fcsl = 0; + } else { + fcsl -= minfl - copy_size; + } + while (copy_size < minfl) { + size_t zero_sz = minfl - copy_size < sizeof(zero) ? + minfl - copy_size : sizeof(zero); + + cpu_physical_memory_write(desc->buf_ptr + copy_size, + zero, zero_sz); + copy_size += zero_sz; + } + } + + /* There's no FCS in the frames handed to us by the QEMU, zero fill it. + * Don't do it if the frame is cut at the MAXFL or padded with 4 or + * more bytes to the MINFL. + */ + cpu_physical_memory_write(desc->buf_ptr + copy_size, zero, fcsl); + copy_size += fcsl; + + SET_FIELD(desc->len_flags, RXD_LEN, copy_size); + + if ((desc->len_flags & RXD_WRAP) || s->rx_desc == 0x7f) { + s->rx_desc = s->regs[TX_BD_NUM]; + } else { + ++s->rx_desc; + } + desc->len_flags &= ~RXD_E; + + trace_open_eth_receive_desc(desc->buf_ptr, desc->len_flags); + + if (desc->len_flags & RXD_IRQ) { + open_eth_int_source_write(s, + s->regs[INT_SOURCE] | INT_SOURCE_RXB); + } + } + return size; +} + +static NetClientInfo net_open_eth_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = open_eth_can_receive, + .receive = open_eth_receive, + .link_status_changed = open_eth_set_link_status, +}; + +static void open_eth_start_xmit(OpenEthState *s, desc *tx) +{ + uint8_t buf[65536]; + unsigned len = GET_FIELD(tx->len_flags, TXD_LEN); + unsigned tx_len = len; + + if ((tx->len_flags & TXD_PAD) && + tx_len < GET_REGFIELD(s, PACKETLEN, MINFL)) { + tx_len = GET_REGFIELD(s, PACKETLEN, MINFL); + } + if (!GET_REGBIT(s, MODER, HUGEN) && + tx_len > GET_REGFIELD(s, PACKETLEN, MAXFL)) { + tx_len = GET_REGFIELD(s, PACKETLEN, MAXFL); + } + + trace_open_eth_start_xmit(tx->buf_ptr, len, tx_len); + + if (len > tx_len) { + len = tx_len; + } + cpu_physical_memory_read(tx->buf_ptr, buf, len); + if (tx_len > len) { + memset(buf + len, 0, tx_len - len); + } + qemu_send_packet(qemu_get_queue(s->nic), buf, tx_len); + + if (tx->len_flags & TXD_WR) { + s->tx_desc = 0; + } else { + ++s->tx_desc; + if (s->tx_desc >= s->regs[TX_BD_NUM]) { + s->tx_desc = 0; + } + } + tx->len_flags &= ~(TXD_RD | TXD_UR | + TXD_RTRY | TXD_RL | TXD_LC | TXD_DF | TXD_CS); + if (tx->len_flags & TXD_IRQ) { + open_eth_int_source_write(s, s->regs[INT_SOURCE] | INT_SOURCE_TXB); + } + +} + +static void open_eth_check_start_xmit(OpenEthState *s) +{ + desc *tx = tx_desc(s); + if (GET_REGBIT(s, MODER, TXEN) && s->regs[TX_BD_NUM] > 0 && + (tx->len_flags & TXD_RD) && + GET_FIELD(tx->len_flags, TXD_LEN) > 4) { + open_eth_start_xmit(s, tx); + } +} + +static uint64_t open_eth_reg_read(void *opaque, + hwaddr addr, unsigned int size) +{ + static uint32_t (*reg_read[REG_MAX])(OpenEthState *s) = { + }; + OpenEthState *s = opaque; + unsigned idx = addr / 4; + uint64_t v = 0; + + if (idx < REG_MAX) { + if (reg_read[idx]) { + v = reg_read[idx](s); + } else { + v = s->regs[idx]; + } + } + trace_open_eth_reg_read((uint32_t)addr, (uint32_t)v); + return v; +} + +static void open_eth_notify_can_receive(OpenEthState *s) +{ + NetClientState *nc = qemu_get_queue(s->nic); + + if (open_eth_can_receive(nc)) { + qemu_flush_queued_packets(nc); + } +} + +static void open_eth_ro(OpenEthState *s, uint32_t val) +{ +} + +static void open_eth_moder_host_write(OpenEthState *s, uint32_t val) +{ + uint32_t set = val & ~s->regs[MODER]; + + if (set & MODER_RST) { + open_eth_reset(s); + } + + s->regs[MODER] = val; + + if (set & MODER_RXEN) { + s->rx_desc = s->regs[TX_BD_NUM]; + open_eth_notify_can_receive(s); + } + if (set & MODER_TXEN) { + s->tx_desc = 0; + open_eth_check_start_xmit(s); + } +} + +static void open_eth_int_source_host_write(OpenEthState *s, uint32_t val) +{ + uint32_t old = s->regs[INT_SOURCE]; + + s->regs[INT_SOURCE] &= ~val; + open_eth_update_irq(s, old & s->regs[INT_MASK], + s->regs[INT_SOURCE] & s->regs[INT_MASK]); +} + +static void open_eth_int_mask_host_write(OpenEthState *s, uint32_t val) +{ + uint32_t old = s->regs[INT_MASK]; + + s->regs[INT_MASK] = val; + open_eth_update_irq(s, s->regs[INT_SOURCE] & old, + s->regs[INT_SOURCE] & s->regs[INT_MASK]); +} + +static void open_eth_tx_bd_num_host_write(OpenEthState *s, uint32_t val) +{ + if (val < 0x80) { + bool enable = s->regs[TX_BD_NUM] == 0x80; + + s->regs[TX_BD_NUM] = val; + if (enable) { + open_eth_notify_can_receive(s); + } + } +} + +static void open_eth_mii_command_host_write(OpenEthState *s, uint32_t val) +{ + unsigned fiad = GET_REGFIELD(s, MIIADDRESS, FIAD); + unsigned rgad = GET_REGFIELD(s, MIIADDRESS, RGAD); + + if (val & MIICOMMAND_WCTRLDATA) { + if (fiad == DEFAULT_PHY) { + mii_write_host(&s->mii, rgad, + GET_REGFIELD(s, MIITX_DATA, CTRLDATA)); + } + } + if (val & MIICOMMAND_RSTAT) { + if (fiad == DEFAULT_PHY) { + SET_REGFIELD(s, MIIRX_DATA, PRSD, + mii_read_host(&s->mii, rgad)); + } else { + s->regs[MIIRX_DATA] = 0xffff; + } + SET_REGFIELD(s, MIISTATUS, LINKFAIL, qemu_get_queue(s->nic)->link_down); + } +} + +static void open_eth_mii_tx_host_write(OpenEthState *s, uint32_t val) +{ + SET_REGFIELD(s, MIITX_DATA, CTRLDATA, val); + if (GET_REGFIELD(s, MIIADDRESS, FIAD) == DEFAULT_PHY) { + mii_write_host(&s->mii, GET_REGFIELD(s, MIIADDRESS, RGAD), + GET_REGFIELD(s, MIITX_DATA, CTRLDATA)); + } +} + +static void open_eth_reg_write(void *opaque, + hwaddr addr, uint64_t val, unsigned int size) +{ + static void (*reg_write[REG_MAX])(OpenEthState *s, uint32_t val) = { + [MODER] = open_eth_moder_host_write, + [INT_SOURCE] = open_eth_int_source_host_write, + [INT_MASK] = open_eth_int_mask_host_write, + [TX_BD_NUM] = open_eth_tx_bd_num_host_write, + [MIICOMMAND] = open_eth_mii_command_host_write, + [MIITX_DATA] = open_eth_mii_tx_host_write, + [MIISTATUS] = open_eth_ro, + }; + OpenEthState *s = opaque; + unsigned idx = addr / 4; + + if (idx < REG_MAX) { + trace_open_eth_reg_write((uint32_t)addr, (uint32_t)val); + if (reg_write[idx]) { + reg_write[idx](s, val); + } else { + s->regs[idx] = val; + } + } +} + +static uint64_t open_eth_desc_read(void *opaque, + hwaddr addr, unsigned int size) +{ + OpenEthState *s = opaque; + uint64_t v = 0; + + addr &= 0x3ff; + memcpy(&v, (uint8_t *)s->desc + addr, size); + trace_open_eth_desc_read((uint32_t)addr, (uint32_t)v); + return v; +} + +static void open_eth_desc_write(void *opaque, + hwaddr addr, uint64_t val, unsigned int size) +{ + OpenEthState *s = opaque; + + addr &= 0x3ff; + trace_open_eth_desc_write((uint32_t)addr, (uint32_t)val); + memcpy((uint8_t *)s->desc + addr, &val, size); + open_eth_check_start_xmit(s); +} + + +static const MemoryRegionOps open_eth_reg_ops = { + .read = open_eth_reg_read, + .write = open_eth_reg_write, +}; + +static const MemoryRegionOps open_eth_desc_ops = { + .read = open_eth_desc_read, + .write = open_eth_desc_write, +}; + +static int sysbus_open_eth_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + OpenEthState *s = OPEN_ETH(dev); + + memory_region_init_io(&s->reg_io, OBJECT(dev), &open_eth_reg_ops, s, + "open_eth.regs", 0x54); + sysbus_init_mmio(sbd, &s->reg_io); + + memory_region_init_io(&s->desc_io, OBJECT(dev), &open_eth_desc_ops, s, + "open_eth.desc", 0x400); + sysbus_init_mmio(sbd, &s->desc_io); + + sysbus_init_irq(sbd, &s->irq); + + s->nic = qemu_new_nic(&net_open_eth_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + return 0; +} + +static void qdev_open_eth_reset(DeviceState *dev) +{ + OpenEthState *d = OPEN_ETH(dev); + + open_eth_reset(d); +} + +static Property open_eth_properties[] = { + DEFINE_NIC_PROPERTIES(OpenEthState, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void open_eth_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = sysbus_open_eth_init; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->desc = "Opencores 10/100 Mbit Ethernet"; + dc->reset = qdev_open_eth_reset; + dc->props = open_eth_properties; +} + +static const TypeInfo open_eth_info = { + .name = TYPE_OPEN_ETH, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(OpenEthState), + .class_init = open_eth_class_init, +}; + +static void open_eth_register_types(void) +{ + type_register_static(&open_eth_info); +} + +type_init(open_eth_register_types) diff --git a/qemu/hw/net/pcnet-pci.c b/qemu/hw/net/pcnet-pci.c new file mode 100644 index 000000000..b4d60b812 --- /dev/null +++ b/qemu/hw/net/pcnet-pci.c @@ -0,0 +1,374 @@ +/* + * QEMU AMD PC-Net II (Am79C970A) PCI emulation + * + * Copyright (c) 2004 Antony T Curtis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This software was written to be compatible with the specification: + * AMD Am79C970A PCnet-PCI II Ethernet Controller Data-Sheet + * AMD Publication# 19436 Rev:E Amendment/0 Issue Date: June 2000 + */ + +#include "hw/pci/pci.h" +#include "net/net.h" +#include "hw/loader.h" +#include "qemu/timer.h" +#include "sysemu/dma.h" +#include "sysemu/sysemu.h" +#include "trace.h" + +#include "pcnet.h" + +//#define PCNET_DEBUG +//#define PCNET_DEBUG_IO +//#define PCNET_DEBUG_BCR +//#define PCNET_DEBUG_CSR +//#define PCNET_DEBUG_RMD +//#define PCNET_DEBUG_TMD +//#define PCNET_DEBUG_MATCH + +#define TYPE_PCI_PCNET "pcnet" + +#define PCI_PCNET(obj) \ + OBJECT_CHECK(PCIPCNetState, (obj), TYPE_PCI_PCNET) + +typedef struct { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + PCNetState state; + MemoryRegion io_bar; +} PCIPCNetState; + +static void pcnet_aprom_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCNetState *s = opaque; + + trace_pcnet_aprom_writeb(opaque, addr, val); + if (BCR_APROMWE(s)) { + s->prom[addr & 15] = val; + } +} + +static uint32_t pcnet_aprom_readb(void *opaque, uint32_t addr) +{ + PCNetState *s = opaque; + uint32_t val = s->prom[addr & 15]; + + trace_pcnet_aprom_readb(opaque, addr, val); + return val; +} + +static uint64_t pcnet_ioport_read(void *opaque, hwaddr addr, + unsigned size) +{ + PCNetState *d = opaque; + + trace_pcnet_ioport_read(opaque, addr, size); + if (addr < 0x10) { + if (!BCR_DWIO(d) && size == 1) { + return pcnet_aprom_readb(d, addr); + } else if (!BCR_DWIO(d) && (addr & 1) == 0 && size == 2) { + return pcnet_aprom_readb(d, addr) | + (pcnet_aprom_readb(d, addr + 1) << 8); + } else if (BCR_DWIO(d) && (addr & 3) == 0 && size == 4) { + return pcnet_aprom_readb(d, addr) | + (pcnet_aprom_readb(d, addr + 1) << 8) | + (pcnet_aprom_readb(d, addr + 2) << 16) | + (pcnet_aprom_readb(d, addr + 3) << 24); + } + } else { + if (size == 2) { + return pcnet_ioport_readw(d, addr); + } else if (size == 4) { + return pcnet_ioport_readl(d, addr); + } + } + return ((uint64_t)1 << (size * 8)) - 1; +} + +static void pcnet_ioport_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + PCNetState *d = opaque; + + trace_pcnet_ioport_write(opaque, addr, data, size); + if (addr < 0x10) { + if (!BCR_DWIO(d) && size == 1) { + pcnet_aprom_writeb(d, addr, data); + } else if (!BCR_DWIO(d) && (addr & 1) == 0 && size == 2) { + pcnet_aprom_writeb(d, addr, data & 0xff); + pcnet_aprom_writeb(d, addr + 1, data >> 8); + } else if (BCR_DWIO(d) && (addr & 3) == 0 && size == 4) { + pcnet_aprom_writeb(d, addr, data & 0xff); + pcnet_aprom_writeb(d, addr + 1, (data >> 8) & 0xff); + pcnet_aprom_writeb(d, addr + 2, (data >> 16) & 0xff); + pcnet_aprom_writeb(d, addr + 3, data >> 24); + } + } else { + if (size == 2) { + pcnet_ioport_writew(d, addr, data); + } else if (size == 4) { + pcnet_ioport_writel(d, addr, data); + } + } +} + +static const MemoryRegionOps pcnet_io_ops = { + .read = pcnet_ioport_read, + .write = pcnet_ioport_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void pcnet_mmio_writeb(void *opaque, hwaddr addr, uint32_t val) +{ + PCNetState *d = opaque; + + trace_pcnet_mmio_writeb(opaque, addr, val); + if (!(addr & 0x10)) + pcnet_aprom_writeb(d, addr & 0x0f, val); +} + +static uint32_t pcnet_mmio_readb(void *opaque, hwaddr addr) +{ + PCNetState *d = opaque; + uint32_t val = -1; + + if (!(addr & 0x10)) + val = pcnet_aprom_readb(d, addr & 0x0f); + trace_pcnet_mmio_readb(opaque, addr, val); + return val; +} + +static void pcnet_mmio_writew(void *opaque, hwaddr addr, uint32_t val) +{ + PCNetState *d = opaque; + + trace_pcnet_mmio_writew(opaque, addr, val); + if (addr & 0x10) + pcnet_ioport_writew(d, addr & 0x0f, val); + else { + addr &= 0x0f; + pcnet_aprom_writeb(d, addr, val & 0xff); + pcnet_aprom_writeb(d, addr+1, (val & 0xff00) >> 8); + } +} + +static uint32_t pcnet_mmio_readw(void *opaque, hwaddr addr) +{ + PCNetState *d = opaque; + uint32_t val = -1; + + if (addr & 0x10) + val = pcnet_ioport_readw(d, addr & 0x0f); + else { + addr &= 0x0f; + val = pcnet_aprom_readb(d, addr+1); + val <<= 8; + val |= pcnet_aprom_readb(d, addr); + } + trace_pcnet_mmio_readw(opaque, addr, val); + return val; +} + +static void pcnet_mmio_writel(void *opaque, hwaddr addr, uint32_t val) +{ + PCNetState *d = opaque; + + trace_pcnet_mmio_writel(opaque, addr, val); + if (addr & 0x10) + pcnet_ioport_writel(d, addr & 0x0f, val); + else { + addr &= 0x0f; + pcnet_aprom_writeb(d, addr, val & 0xff); + pcnet_aprom_writeb(d, addr+1, (val & 0xff00) >> 8); + pcnet_aprom_writeb(d, addr+2, (val & 0xff0000) >> 16); + pcnet_aprom_writeb(d, addr+3, (val & 0xff000000) >> 24); + } +} + +static uint32_t pcnet_mmio_readl(void *opaque, hwaddr addr) +{ + PCNetState *d = opaque; + uint32_t val; + + if (addr & 0x10) + val = pcnet_ioport_readl(d, addr & 0x0f); + else { + addr &= 0x0f; + val = pcnet_aprom_readb(d, addr+3); + val <<= 8; + val |= pcnet_aprom_readb(d, addr+2); + val <<= 8; + val |= pcnet_aprom_readb(d, addr+1); + val <<= 8; + val |= pcnet_aprom_readb(d, addr); + } + trace_pcnet_mmio_readl(opaque, addr, val); + return val; +} + +static const VMStateDescription vmstate_pci_pcnet = { + .name = "pcnet", + .version_id = 3, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PCIPCNetState), + VMSTATE_STRUCT(state, PCIPCNetState, 0, vmstate_pcnet, PCNetState), + VMSTATE_END_OF_LIST() + } +}; + +/* PCI interface */ + +static const MemoryRegionOps pcnet_mmio_ops = { + .old_mmio = { + .read = { pcnet_mmio_readb, pcnet_mmio_readw, pcnet_mmio_readl }, + .write = { pcnet_mmio_writeb, pcnet_mmio_writew, pcnet_mmio_writel }, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void pci_physical_memory_write(void *dma_opaque, hwaddr addr, + uint8_t *buf, int len, int do_bswap) +{ + pci_dma_write(dma_opaque, addr, buf, len); +} + +static void pci_physical_memory_read(void *dma_opaque, hwaddr addr, + uint8_t *buf, int len, int do_bswap) +{ + pci_dma_read(dma_opaque, addr, buf, len); +} + +static void pci_pcnet_uninit(PCIDevice *dev) +{ + PCIPCNetState *d = PCI_PCNET(dev); + + qemu_free_irq(d->state.irq); + timer_del(d->state.poll_timer); + timer_free(d->state.poll_timer); + qemu_del_nic(d->state.nic); +} + +static NetClientInfo net_pci_pcnet_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = pcnet_receive, + .link_status_changed = pcnet_set_link_status, +}; + +static void pci_pcnet_realize(PCIDevice *pci_dev, Error **errp) +{ + PCIPCNetState *d = PCI_PCNET(pci_dev); + PCNetState *s = &d->state; + uint8_t *pci_conf; + +#if 0 + printf("sizeof(RMD)=%d, sizeof(TMD)=%d\n", + sizeof(struct pcnet_RMD), sizeof(struct pcnet_TMD)); +#endif + + pci_conf = pci_dev->config; + + pci_set_word(pci_conf + PCI_STATUS, + PCI_STATUS_FAST_BACK | PCI_STATUS_DEVSEL_MEDIUM); + + pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, 0x0); + pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0); + + pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ + pci_conf[PCI_MIN_GNT] = 0x06; + pci_conf[PCI_MAX_LAT] = 0xff; + + /* Handler for memory-mapped I/O */ + memory_region_init_io(&d->state.mmio, OBJECT(d), &pcnet_mmio_ops, s, + "pcnet-mmio", PCNET_PNPMMIO_SIZE); + + memory_region_init_io(&d->io_bar, OBJECT(d), &pcnet_io_ops, s, "pcnet-io", + PCNET_IOPORT_SIZE); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &d->io_bar); + + pci_register_bar(pci_dev, 1, 0, &s->mmio); + + s->irq = pci_allocate_irq(pci_dev); + s->phys_mem_read = pci_physical_memory_read; + s->phys_mem_write = pci_physical_memory_write; + s->dma_opaque = pci_dev; + + pcnet_common_init(DEVICE(pci_dev), s, &net_pci_pcnet_info); +} + +static void pci_reset(DeviceState *dev) +{ + PCIPCNetState *d = PCI_PCNET(dev); + + pcnet_h_reset(&d->state); +} + +static void pcnet_instance_init(Object *obj) +{ + PCIPCNetState *d = PCI_PCNET(obj); + PCNetState *s = &d->state; + + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static Property pcnet_properties[] = { + DEFINE_NIC_PROPERTIES(PCIPCNetState, state.conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pcnet_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = pci_pcnet_realize; + k->exit = pci_pcnet_uninit; + k->romfile = "efi-pcnet.rom", + k->vendor_id = PCI_VENDOR_ID_AMD; + k->device_id = PCI_DEVICE_ID_AMD_LANCE; + k->revision = 0x10; + k->class_id = PCI_CLASS_NETWORK_ETHERNET; + dc->reset = pci_reset; + dc->vmsd = &vmstate_pci_pcnet; + dc->props = pcnet_properties; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static const TypeInfo pcnet_info = { + .name = TYPE_PCI_PCNET, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIPCNetState), + .class_init = pcnet_class_init, + .instance_init = pcnet_instance_init, +}; + +static void pci_pcnet_register_types(void) +{ + type_register_static(&pcnet_info); +} + +type_init(pci_pcnet_register_types) diff --git a/qemu/hw/net/pcnet.c b/qemu/hw/net/pcnet.c new file mode 100644 index 000000000..34373767d --- /dev/null +++ b/qemu/hw/net/pcnet.c @@ -0,0 +1,1753 @@ +/* + * QEMU AMD PC-Net II (Am79C970A) emulation + * + * Copyright (c) 2004 Antony T Curtis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This software was written to be compatible with the specification: + * AMD Am79C970A PCnet-PCI II Ethernet Controller Data-Sheet + * AMD Publication# 19436 Rev:E Amendment/0 Issue Date: June 2000 + */ + +/* + * On Sparc32, this is the Lance (Am7990) part of chip STP2000 (Master I/O), also + * produced as NCR89C100. See + * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR89C100.txt + * and + * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR92C990.txt + */ + +#include "hw/qdev.h" +#include "net/net.h" +#include "qemu/timer.h" +#include "qemu/sockets.h" +#include "sysemu/sysemu.h" +#include "trace.h" + +#include "pcnet.h" + +//#define PCNET_DEBUG +//#define PCNET_DEBUG_IO +//#define PCNET_DEBUG_BCR +//#define PCNET_DEBUG_CSR +//#define PCNET_DEBUG_RMD +//#define PCNET_DEBUG_TMD +//#define PCNET_DEBUG_MATCH + + +struct qemu_ether_header { + uint8_t ether_dhost[6]; + uint8_t ether_shost[6]; + uint16_t ether_type; +}; + +#define CSR_INIT(S) !!(((S)->csr[0])&0x0001) +#define CSR_STRT(S) !!(((S)->csr[0])&0x0002) +#define CSR_STOP(S) !!(((S)->csr[0])&0x0004) +#define CSR_TDMD(S) !!(((S)->csr[0])&0x0008) +#define CSR_TXON(S) !!(((S)->csr[0])&0x0010) +#define CSR_RXON(S) !!(((S)->csr[0])&0x0020) +#define CSR_INEA(S) !!(((S)->csr[0])&0x0040) +#define CSR_BSWP(S) !!(((S)->csr[3])&0x0004) +#define CSR_LAPPEN(S) !!(((S)->csr[3])&0x0020) +#define CSR_DXSUFLO(S) !!(((S)->csr[3])&0x0040) +#define CSR_ASTRP_RCV(S) !!(((S)->csr[4])&0x0800) +#define CSR_DPOLL(S) !!(((S)->csr[4])&0x1000) +#define CSR_SPND(S) !!(((S)->csr[5])&0x0001) +#define CSR_LTINTEN(S) !!(((S)->csr[5])&0x4000) +#define CSR_TOKINTD(S) !!(((S)->csr[5])&0x8000) +#define CSR_DRX(S) !!(((S)->csr[15])&0x0001) +#define CSR_DTX(S) !!(((S)->csr[15])&0x0002) +#define CSR_LOOP(S) !!(((S)->csr[15])&0x0004) +#define CSR_DXMTFCS(S) !!(((S)->csr[15])&0x0008) +#define CSR_INTL(S) !!(((S)->csr[15])&0x0040) +#define CSR_DRCVPA(S) !!(((S)->csr[15])&0x2000) +#define CSR_DRCVBC(S) !!(((S)->csr[15])&0x4000) +#define CSR_PROM(S) !!(((S)->csr[15])&0x8000) + +#define CSR_CRBC(S) ((S)->csr[40]) +#define CSR_CRST(S) ((S)->csr[41]) +#define CSR_CXBC(S) ((S)->csr[42]) +#define CSR_CXST(S) ((S)->csr[43]) +#define CSR_NRBC(S) ((S)->csr[44]) +#define CSR_NRST(S) ((S)->csr[45]) +#define CSR_POLL(S) ((S)->csr[46]) +#define CSR_PINT(S) ((S)->csr[47]) +#define CSR_RCVRC(S) ((S)->csr[72]) +#define CSR_XMTRC(S) ((S)->csr[74]) +#define CSR_RCVRL(S) ((S)->csr[76]) +#define CSR_XMTRL(S) ((S)->csr[78]) +#define CSR_MISSC(S) ((S)->csr[112]) + +#define CSR_IADR(S) ((S)->csr[ 1] | ((uint32_t)(S)->csr[ 2] << 16)) +#define CSR_CRBA(S) ((S)->csr[18] | ((uint32_t)(S)->csr[19] << 16)) +#define CSR_CXBA(S) ((S)->csr[20] | ((uint32_t)(S)->csr[21] << 16)) +#define CSR_NRBA(S) ((S)->csr[22] | ((uint32_t)(S)->csr[23] << 16)) +#define CSR_BADR(S) ((S)->csr[24] | ((uint32_t)(S)->csr[25] << 16)) +#define CSR_NRDA(S) ((S)->csr[26] | ((uint32_t)(S)->csr[27] << 16)) +#define CSR_CRDA(S) ((S)->csr[28] | ((uint32_t)(S)->csr[29] << 16)) +#define CSR_BADX(S) ((S)->csr[30] | ((uint32_t)(S)->csr[31] << 16)) +#define CSR_NXDA(S) ((S)->csr[32] | ((uint32_t)(S)->csr[33] << 16)) +#define CSR_CXDA(S) ((S)->csr[34] | ((uint32_t)(S)->csr[35] << 16)) +#define CSR_NNRD(S) ((S)->csr[36] | ((uint32_t)(S)->csr[37] << 16)) +#define CSR_NNXD(S) ((S)->csr[38] | ((uint32_t)(S)->csr[39] << 16)) +#define CSR_PXDA(S) ((S)->csr[60] | ((uint32_t)(S)->csr[61] << 16)) +#define CSR_NXBA(S) ((S)->csr[64] | ((uint32_t)(S)->csr[65] << 16)) + +#define PHYSADDR(S,A) \ + (BCR_SSIZE32(S) ? (A) : (A) | ((0xff00 & (uint32_t)(S)->csr[2])<<16)) + +struct pcnet_initblk16 { + uint16_t mode; + uint16_t padr[3]; + uint16_t ladrf[4]; + uint32_t rdra; + uint32_t tdra; +}; + +struct pcnet_initblk32 { + uint16_t mode; + uint8_t rlen; + uint8_t tlen; + uint16_t padr[3]; + uint16_t _res; + uint16_t ladrf[4]; + uint32_t rdra; + uint32_t tdra; +}; + +struct pcnet_TMD { + uint32_t tbadr; + int16_t length; + int16_t status; + uint32_t misc; + uint32_t res; +}; + +#define TMDL_BCNT_MASK 0x0fff +#define TMDL_BCNT_SH 0 +#define TMDL_ONES_MASK 0xf000 +#define TMDL_ONES_SH 12 + +#define TMDS_BPE_MASK 0x0080 +#define TMDS_BPE_SH 7 +#define TMDS_ENP_MASK 0x0100 +#define TMDS_ENP_SH 8 +#define TMDS_STP_MASK 0x0200 +#define TMDS_STP_SH 9 +#define TMDS_DEF_MASK 0x0400 +#define TMDS_DEF_SH 10 +#define TMDS_ONE_MASK 0x0800 +#define TMDS_ONE_SH 11 +#define TMDS_LTINT_MASK 0x1000 +#define TMDS_LTINT_SH 12 +#define TMDS_NOFCS_MASK 0x2000 +#define TMDS_NOFCS_SH 13 +#define TMDS_ADDFCS_MASK TMDS_NOFCS_MASK +#define TMDS_ADDFCS_SH TMDS_NOFCS_SH +#define TMDS_ERR_MASK 0x4000 +#define TMDS_ERR_SH 14 +#define TMDS_OWN_MASK 0x8000 +#define TMDS_OWN_SH 15 + +#define TMDM_TRC_MASK 0x0000000f +#define TMDM_TRC_SH 0 +#define TMDM_TDR_MASK 0x03ff0000 +#define TMDM_TDR_SH 16 +#define TMDM_RTRY_MASK 0x04000000 +#define TMDM_RTRY_SH 26 +#define TMDM_LCAR_MASK 0x08000000 +#define TMDM_LCAR_SH 27 +#define TMDM_LCOL_MASK 0x10000000 +#define TMDM_LCOL_SH 28 +#define TMDM_EXDEF_MASK 0x20000000 +#define TMDM_EXDEF_SH 29 +#define TMDM_UFLO_MASK 0x40000000 +#define TMDM_UFLO_SH 30 +#define TMDM_BUFF_MASK 0x80000000 +#define TMDM_BUFF_SH 31 + +struct pcnet_RMD { + uint32_t rbadr; + int16_t buf_length; + int16_t status; + uint32_t msg_length; + uint32_t res; +}; + +#define RMDL_BCNT_MASK 0x0fff +#define RMDL_BCNT_SH 0 +#define RMDL_ONES_MASK 0xf000 +#define RMDL_ONES_SH 12 + +#define RMDS_BAM_MASK 0x0010 +#define RMDS_BAM_SH 4 +#define RMDS_LFAM_MASK 0x0020 +#define RMDS_LFAM_SH 5 +#define RMDS_PAM_MASK 0x0040 +#define RMDS_PAM_SH 6 +#define RMDS_BPE_MASK 0x0080 +#define RMDS_BPE_SH 7 +#define RMDS_ENP_MASK 0x0100 +#define RMDS_ENP_SH 8 +#define RMDS_STP_MASK 0x0200 +#define RMDS_STP_SH 9 +#define RMDS_BUFF_MASK 0x0400 +#define RMDS_BUFF_SH 10 +#define RMDS_CRC_MASK 0x0800 +#define RMDS_CRC_SH 11 +#define RMDS_OFLO_MASK 0x1000 +#define RMDS_OFLO_SH 12 +#define RMDS_FRAM_MASK 0x2000 +#define RMDS_FRAM_SH 13 +#define RMDS_ERR_MASK 0x4000 +#define RMDS_ERR_SH 14 +#define RMDS_OWN_MASK 0x8000 +#define RMDS_OWN_SH 15 + +#define RMDM_MCNT_MASK 0x00000fff +#define RMDM_MCNT_SH 0 +#define RMDM_ZEROS_MASK 0x0000f000 +#define RMDM_ZEROS_SH 12 +#define RMDM_RPC_MASK 0x00ff0000 +#define RMDM_RPC_SH 16 +#define RMDM_RCC_MASK 0xff000000 +#define RMDM_RCC_SH 24 + +#define SET_FIELD(regp, name, field, value) \ + (*(regp) = (*(regp) & ~(name ## _ ## field ## _MASK)) \ + | ((value) << name ## _ ## field ## _SH)) + +#define GET_FIELD(reg, name, field) \ + (((reg) & name ## _ ## field ## _MASK) >> name ## _ ## field ## _SH) + +#define PRINT_TMD(T) printf( \ + "TMD0 : TBADR=0x%08x\n" \ + "TMD1 : OWN=%d, ERR=%d, FCS=%d, LTI=%d, " \ + "ONE=%d, DEF=%d, STP=%d, ENP=%d,\n" \ + " BPE=%d, BCNT=%d\n" \ + "TMD2 : BUF=%d, UFL=%d, EXD=%d, LCO=%d, " \ + "LCA=%d, RTR=%d,\n" \ + " TDR=%d, TRC=%d\n", \ + (T)->tbadr, \ + GET_FIELD((T)->status, TMDS, OWN), \ + GET_FIELD((T)->status, TMDS, ERR), \ + GET_FIELD((T)->status, TMDS, NOFCS), \ + GET_FIELD((T)->status, TMDS, LTINT), \ + GET_FIELD((T)->status, TMDS, ONE), \ + GET_FIELD((T)->status, TMDS, DEF), \ + GET_FIELD((T)->status, TMDS, STP), \ + GET_FIELD((T)->status, TMDS, ENP), \ + GET_FIELD((T)->status, TMDS, BPE), \ + 4096-GET_FIELD((T)->length, TMDL, BCNT), \ + GET_FIELD((T)->misc, TMDM, BUFF), \ + GET_FIELD((T)->misc, TMDM, UFLO), \ + GET_FIELD((T)->misc, TMDM, EXDEF), \ + GET_FIELD((T)->misc, TMDM, LCOL), \ + GET_FIELD((T)->misc, TMDM, LCAR), \ + GET_FIELD((T)->misc, TMDM, RTRY), \ + GET_FIELD((T)->misc, TMDM, TDR), \ + GET_FIELD((T)->misc, TMDM, TRC)) + +#define PRINT_RMD(R) printf( \ + "RMD0 : RBADR=0x%08x\n" \ + "RMD1 : OWN=%d, ERR=%d, FRAM=%d, OFLO=%d, " \ + "CRC=%d, BUFF=%d, STP=%d, ENP=%d,\n " \ + "BPE=%d, PAM=%d, LAFM=%d, BAM=%d, ONES=%d, BCNT=%d\n" \ + "RMD2 : RCC=%d, RPC=%d, MCNT=%d, ZEROS=%d\n", \ + (R)->rbadr, \ + GET_FIELD((R)->status, RMDS, OWN), \ + GET_FIELD((R)->status, RMDS, ERR), \ + GET_FIELD((R)->status, RMDS, FRAM), \ + GET_FIELD((R)->status, RMDS, OFLO), \ + GET_FIELD((R)->status, RMDS, CRC), \ + GET_FIELD((R)->status, RMDS, BUFF), \ + GET_FIELD((R)->status, RMDS, STP), \ + GET_FIELD((R)->status, RMDS, ENP), \ + GET_FIELD((R)->status, RMDS, BPE), \ + GET_FIELD((R)->status, RMDS, PAM), \ + GET_FIELD((R)->status, RMDS, LFAM), \ + GET_FIELD((R)->status, RMDS, BAM), \ + GET_FIELD((R)->buf_length, RMDL, ONES), \ + 4096-GET_FIELD((R)->buf_length, RMDL, BCNT), \ + GET_FIELD((R)->msg_length, RMDM, RCC), \ + GET_FIELD((R)->msg_length, RMDM, RPC), \ + GET_FIELD((R)->msg_length, RMDM, MCNT), \ + GET_FIELD((R)->msg_length, RMDM, ZEROS)) + +static inline void pcnet_tmd_load(PCNetState *s, struct pcnet_TMD *tmd, + hwaddr addr) +{ + if (!BCR_SSIZE32(s)) { + struct { + uint32_t tbadr; + int16_t length; + int16_t status; + } xda; + s->phys_mem_read(s->dma_opaque, addr, (void *)&xda, sizeof(xda), 0); + tmd->tbadr = le32_to_cpu(xda.tbadr) & 0xffffff; + tmd->length = le16_to_cpu(xda.length); + tmd->status = (le32_to_cpu(xda.tbadr) >> 16) & 0xff00; + tmd->misc = le16_to_cpu(xda.status) << 16; + tmd->res = 0; + } else { + s->phys_mem_read(s->dma_opaque, addr, (void *)tmd, sizeof(*tmd), 0); + le32_to_cpus(&tmd->tbadr); + le16_to_cpus((uint16_t *)&tmd->length); + le16_to_cpus((uint16_t *)&tmd->status); + le32_to_cpus(&tmd->misc); + le32_to_cpus(&tmd->res); + if (BCR_SWSTYLE(s) == 3) { + uint32_t tmp = tmd->tbadr; + tmd->tbadr = tmd->misc; + tmd->misc = tmp; + } + } +} + +static inline void pcnet_tmd_store(PCNetState *s, const struct pcnet_TMD *tmd, + hwaddr addr) +{ + if (!BCR_SSIZE32(s)) { + struct { + uint32_t tbadr; + int16_t length; + int16_t status; + } xda; + xda.tbadr = cpu_to_le32((tmd->tbadr & 0xffffff) | + ((tmd->status & 0xff00) << 16)); + xda.length = cpu_to_le16(tmd->length); + xda.status = cpu_to_le16(tmd->misc >> 16); + s->phys_mem_write(s->dma_opaque, addr, (void *)&xda, sizeof(xda), 0); + } else { + struct { + uint32_t tbadr; + int16_t length; + int16_t status; + uint32_t misc; + uint32_t res; + } xda; + xda.tbadr = cpu_to_le32(tmd->tbadr); + xda.length = cpu_to_le16(tmd->length); + xda.status = cpu_to_le16(tmd->status); + xda.misc = cpu_to_le32(tmd->misc); + xda.res = cpu_to_le32(tmd->res); + if (BCR_SWSTYLE(s) == 3) { + uint32_t tmp = xda.tbadr; + xda.tbadr = xda.misc; + xda.misc = tmp; + } + s->phys_mem_write(s->dma_opaque, addr, (void *)&xda, sizeof(xda), 0); + } +} + +static inline void pcnet_rmd_load(PCNetState *s, struct pcnet_RMD *rmd, + hwaddr addr) +{ + if (!BCR_SSIZE32(s)) { + struct { + uint32_t rbadr; + int16_t buf_length; + int16_t msg_length; + } rda; + s->phys_mem_read(s->dma_opaque, addr, (void *)&rda, sizeof(rda), 0); + rmd->rbadr = le32_to_cpu(rda.rbadr) & 0xffffff; + rmd->buf_length = le16_to_cpu(rda.buf_length); + rmd->status = (le32_to_cpu(rda.rbadr) >> 16) & 0xff00; + rmd->msg_length = le16_to_cpu(rda.msg_length); + rmd->res = 0; + } else { + s->phys_mem_read(s->dma_opaque, addr, (void *)rmd, sizeof(*rmd), 0); + le32_to_cpus(&rmd->rbadr); + le16_to_cpus((uint16_t *)&rmd->buf_length); + le16_to_cpus((uint16_t *)&rmd->status); + le32_to_cpus(&rmd->msg_length); + le32_to_cpus(&rmd->res); + if (BCR_SWSTYLE(s) == 3) { + uint32_t tmp = rmd->rbadr; + rmd->rbadr = rmd->msg_length; + rmd->msg_length = tmp; + } + } +} + +static inline void pcnet_rmd_store(PCNetState *s, struct pcnet_RMD *rmd, + hwaddr addr) +{ + if (!BCR_SSIZE32(s)) { + struct { + uint32_t rbadr; + int16_t buf_length; + int16_t msg_length; + } rda; + rda.rbadr = cpu_to_le32((rmd->rbadr & 0xffffff) | + ((rmd->status & 0xff00) << 16)); + rda.buf_length = cpu_to_le16(rmd->buf_length); + rda.msg_length = cpu_to_le16(rmd->msg_length); + s->phys_mem_write(s->dma_opaque, addr, (void *)&rda, sizeof(rda), 0); + } else { + struct { + uint32_t rbadr; + int16_t buf_length; + int16_t status; + uint32_t msg_length; + uint32_t res; + } rda; + rda.rbadr = cpu_to_le32(rmd->rbadr); + rda.buf_length = cpu_to_le16(rmd->buf_length); + rda.status = cpu_to_le16(rmd->status); + rda.msg_length = cpu_to_le32(rmd->msg_length); + rda.res = cpu_to_le32(rmd->res); + if (BCR_SWSTYLE(s) == 3) { + uint32_t tmp = rda.rbadr; + rda.rbadr = rda.msg_length; + rda.msg_length = tmp; + } + s->phys_mem_write(s->dma_opaque, addr, (void *)&rda, sizeof(rda), 0); + } +} + + +#define TMDLOAD(TMD,ADDR) pcnet_tmd_load(s,TMD,ADDR) + +#define TMDSTORE(TMD,ADDR) pcnet_tmd_store(s,TMD,ADDR) + +#define RMDLOAD(RMD,ADDR) pcnet_rmd_load(s,RMD,ADDR) + +#define RMDSTORE(RMD,ADDR) pcnet_rmd_store(s,RMD,ADDR) + +#if 1 + +#define CHECK_RMD(ADDR,RES) do { \ + struct pcnet_RMD rmd; \ + RMDLOAD(&rmd,(ADDR)); \ + (RES) |= (GET_FIELD(rmd.buf_length, RMDL, ONES) != 15) \ + || (GET_FIELD(rmd.msg_length, RMDM, ZEROS) != 0); \ +} while (0) + +#define CHECK_TMD(ADDR,RES) do { \ + struct pcnet_TMD tmd; \ + TMDLOAD(&tmd,(ADDR)); \ + (RES) |= (GET_FIELD(tmd.length, TMDL, ONES) != 15); \ +} while (0) + +#else + +#define CHECK_RMD(ADDR,RES) do { \ + switch (BCR_SWSTYLE(s)) { \ + case 0x00: \ + do { \ + uint16_t rda[4]; \ + s->phys_mem_read(s->dma_opaque, (ADDR), \ + (void *)&rda[0], sizeof(rda), 0); \ + (RES) |= (rda[2] & 0xf000)!=0xf000; \ + (RES) |= (rda[3] & 0xf000)!=0x0000; \ + } while (0); \ + break; \ + case 0x01: \ + case 0x02: \ + do { \ + uint32_t rda[4]; \ + s->phys_mem_read(s->dma_opaque, (ADDR), \ + (void *)&rda[0], sizeof(rda), 0); \ + (RES) |= (rda[1] & 0x0000f000L)!=0x0000f000L; \ + (RES) |= (rda[2] & 0x0000f000L)!=0x00000000L; \ + } while (0); \ + break; \ + case 0x03: \ + do { \ + uint32_t rda[4]; \ + s->phys_mem_read(s->dma_opaque, (ADDR), \ + (void *)&rda[0], sizeof(rda), 0); \ + (RES) |= (rda[0] & 0x0000f000L)!=0x00000000L; \ + (RES) |= (rda[1] & 0x0000f000L)!=0x0000f000L; \ + } while (0); \ + break; \ + } \ +} while (0) + +#define CHECK_TMD(ADDR,RES) do { \ + switch (BCR_SWSTYLE(s)) { \ + case 0x00: \ + do { \ + uint16_t xda[4]; \ + s->phys_mem_read(s->dma_opaque, (ADDR), \ + (void *)&xda[0], sizeof(xda), 0); \ + (RES) |= (xda[2] & 0xf000)!=0xf000; \ + } while (0); \ + break; \ + case 0x01: \ + case 0x02: \ + case 0x03: \ + do { \ + uint32_t xda[4]; \ + s->phys_mem_read(s->dma_opaque, (ADDR), \ + (void *)&xda[0], sizeof(xda), 0); \ + (RES) |= (xda[1] & 0x0000f000L)!=0x0000f000L; \ + } while (0); \ + break; \ + } \ +} while (0) + +#endif + +#define PRINT_PKTHDR(BUF) do { \ + struct qemu_ether_header *hdr = (void *)(BUF); \ + printf("packet dhost=%02x:%02x:%02x:%02x:%02x:%02x, " \ + "shost=%02x:%02x:%02x:%02x:%02x:%02x, " \ + "type=0x%04x\n", \ + hdr->ether_dhost[0],hdr->ether_dhost[1],hdr->ether_dhost[2], \ + hdr->ether_dhost[3],hdr->ether_dhost[4],hdr->ether_dhost[5], \ + hdr->ether_shost[0],hdr->ether_shost[1],hdr->ether_shost[2], \ + hdr->ether_shost[3],hdr->ether_shost[4],hdr->ether_shost[5], \ + be16_to_cpu(hdr->ether_type)); \ +} while (0) + +#define MULTICAST_FILTER_LEN 8 + +static inline uint32_t lnc_mchash(const uint8_t *ether_addr) +{ +#define LNC_POLYNOMIAL 0xEDB88320UL + uint32_t crc = 0xFFFFFFFF; + int idx, bit; + uint8_t data; + + for (idx = 0; idx < 6; idx++) { + for (data = *ether_addr++, bit = 0; bit < MULTICAST_FILTER_LEN; bit++) { + crc = (crc >> 1) ^ (((crc ^ data) & 1) ? LNC_POLYNOMIAL : 0); + data >>= 1; + } + } + return crc; +#undef LNC_POLYNOMIAL +} + +#define CRC(crc, ch) (crc = (crc >> 8) ^ crctab[(crc ^ (ch)) & 0xff]) + +/* generated using the AUTODIN II polynomial + * x^32 + x^26 + x^23 + x^22 + x^16 + + * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1 + */ +static const uint32_t crctab[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, +}; + +static inline int padr_match(PCNetState *s, const uint8_t *buf, int size) +{ + struct qemu_ether_header *hdr = (void *)buf; + uint8_t padr[6] = { + s->csr[12] & 0xff, s->csr[12] >> 8, + s->csr[13] & 0xff, s->csr[13] >> 8, + s->csr[14] & 0xff, s->csr[14] >> 8 + }; + int result = (!CSR_DRCVPA(s)) && !memcmp(hdr->ether_dhost, padr, 6); +#ifdef PCNET_DEBUG_MATCH + printf("packet dhost=%02x:%02x:%02x:%02x:%02x:%02x, " + "padr=%02x:%02x:%02x:%02x:%02x:%02x\n", + hdr->ether_dhost[0],hdr->ether_dhost[1],hdr->ether_dhost[2], + hdr->ether_dhost[3],hdr->ether_dhost[4],hdr->ether_dhost[5], + padr[0],padr[1],padr[2],padr[3],padr[4],padr[5]); + printf("padr_match result=%d\n", result); +#endif + return result; +} + +static inline int padr_bcast(PCNetState *s, const uint8_t *buf, int size) +{ + static const uint8_t BCAST[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + struct qemu_ether_header *hdr = (void *)buf; + int result = !CSR_DRCVBC(s) && !memcmp(hdr->ether_dhost, BCAST, 6); +#ifdef PCNET_DEBUG_MATCH + printf("padr_bcast result=%d\n", result); +#endif + return result; +} + +static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size) +{ + struct qemu_ether_header *hdr = (void *)buf; + if ((*(hdr->ether_dhost)&0x01) && + ((uint64_t *)&s->csr[8])[0] != 0LL) { + uint8_t ladr[8] = { + s->csr[8] & 0xff, s->csr[8] >> 8, + s->csr[9] & 0xff, s->csr[9] >> 8, + s->csr[10] & 0xff, s->csr[10] >> 8, + s->csr[11] & 0xff, s->csr[11] >> 8 + }; + int index = lnc_mchash(hdr->ether_dhost) >> 26; + return !!(ladr[index >> 3] & (1 << (index & 7))); + } + return 0; +} + +static inline hwaddr pcnet_rdra_addr(PCNetState *s, int idx) +{ + while (idx < 1) idx += CSR_RCVRL(s); + return s->rdra + ((CSR_RCVRL(s) - idx) * (BCR_SWSTYLE(s) ? 16 : 8)); +} + +static inline int64_t pcnet_get_next_poll_time(PCNetState *s, int64_t current_time) +{ + int64_t next_time = current_time + + muldiv64(65536 - (CSR_SPND(s) ? 0 : CSR_POLL(s)), + get_ticks_per_sec(), 33000000L); + if (next_time <= current_time) + next_time = current_time + 1; + return next_time; +} + +static void pcnet_poll(PCNetState *s); +static void pcnet_poll_timer(void *opaque); + +static uint32_t pcnet_csr_readw(PCNetState *s, uint32_t rap); +static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value); +static void pcnet_bcr_writew(PCNetState *s, uint32_t rap, uint32_t val); + +static void pcnet_s_reset(PCNetState *s) +{ + trace_pcnet_s_reset(s); + + s->rdra = 0; + s->tdra = 0; + s->rap = 0; + + s->bcr[BCR_BSBC] &= ~0x0080; + + s->csr[0] = 0x0004; + s->csr[3] = 0x0000; + s->csr[4] = 0x0115; + s->csr[5] = 0x0000; + s->csr[6] = 0x0000; + s->csr[8] = 0; + s->csr[9] = 0; + s->csr[10] = 0; + s->csr[11] = 0; + s->csr[12] = le16_to_cpu(((uint16_t *)&s->prom[0])[0]); + s->csr[13] = le16_to_cpu(((uint16_t *)&s->prom[0])[1]); + s->csr[14] = le16_to_cpu(((uint16_t *)&s->prom[0])[2]); + s->csr[15] &= 0x21c4; + s->csr[72] = 1; + s->csr[74] = 1; + s->csr[76] = 1; + s->csr[78] = 1; + s->csr[80] = 0x1410; + s->csr[88] = 0x1003; + s->csr[89] = 0x0262; + s->csr[94] = 0x0000; + s->csr[100] = 0x0200; + s->csr[103] = 0x0105; + s->csr[112] = 0x0000; + s->csr[114] = 0x0000; + s->csr[122] = 0x0000; + s->csr[124] = 0x0000; + + s->tx_busy = 0; +} + +static void pcnet_update_irq(PCNetState *s) +{ + int isr = 0; + s->csr[0] &= ~0x0080; + +#if 1 + if (((s->csr[0] & ~s->csr[3]) & 0x5f00) || + (((s->csr[4]>>1) & ~s->csr[4]) & 0x0115) || + (((s->csr[5]>>1) & s->csr[5]) & 0x0048)) +#else + if ((!(s->csr[3] & 0x4000) && !!(s->csr[0] & 0x4000)) /* BABL */ || + (!(s->csr[3] & 0x1000) && !!(s->csr[0] & 0x1000)) /* MISS */ || + (!(s->csr[3] & 0x0100) && !!(s->csr[0] & 0x0100)) /* IDON */ || + (!(s->csr[3] & 0x0200) && !!(s->csr[0] & 0x0200)) /* TINT */ || + (!(s->csr[3] & 0x0400) && !!(s->csr[0] & 0x0400)) /* RINT */ || + (!(s->csr[3] & 0x0800) && !!(s->csr[0] & 0x0800)) /* MERR */ || + (!(s->csr[4] & 0x0001) && !!(s->csr[4] & 0x0002)) /* JAB */ || + (!(s->csr[4] & 0x0004) && !!(s->csr[4] & 0x0008)) /* TXSTRT */ || + (!(s->csr[4] & 0x0010) && !!(s->csr[4] & 0x0020)) /* RCVO */ || + (!(s->csr[4] & 0x0100) && !!(s->csr[4] & 0x0200)) /* MFCO */ || + (!!(s->csr[5] & 0x0040) && !!(s->csr[5] & 0x0080)) /* EXDINT */ || + (!!(s->csr[5] & 0x0008) && !!(s->csr[5] & 0x0010)) /* MPINT */) +#endif + { + + isr = CSR_INEA(s); + s->csr[0] |= 0x0080; + } + + if (!!(s->csr[4] & 0x0080) && CSR_INEA(s)) { /* UINT */ + s->csr[4] &= ~0x0080; + s->csr[4] |= 0x0040; + s->csr[0] |= 0x0080; + isr = 1; + trace_pcnet_user_int(s); + } + +#if 1 + if (((s->csr[5]>>1) & s->csr[5]) & 0x0500) +#else + if ((!!(s->csr[5] & 0x0400) && !!(s->csr[5] & 0x0800)) /* SINT */ || + (!!(s->csr[5] & 0x0100) && !!(s->csr[5] & 0x0200)) /* SLPINT */ ) +#endif + { + isr = 1; + s->csr[0] |= 0x0080; + } + + if (isr != s->isr) { + trace_pcnet_isr_change(s, isr, s->isr); + } + qemu_set_irq(s->irq, isr); + s->isr = isr; +} + +static void pcnet_init(PCNetState *s) +{ + int rlen, tlen; + uint16_t padr[3], ladrf[4], mode; + uint32_t rdra, tdra; + + trace_pcnet_init(s, PHYSADDR(s, CSR_IADR(s))); + + if (BCR_SSIZE32(s)) { + struct pcnet_initblk32 initblk; + s->phys_mem_read(s->dma_opaque, PHYSADDR(s,CSR_IADR(s)), + (uint8_t *)&initblk, sizeof(initblk), 0); + mode = le16_to_cpu(initblk.mode); + rlen = initblk.rlen >> 4; + tlen = initblk.tlen >> 4; + ladrf[0] = le16_to_cpu(initblk.ladrf[0]); + ladrf[1] = le16_to_cpu(initblk.ladrf[1]); + ladrf[2] = le16_to_cpu(initblk.ladrf[2]); + ladrf[3] = le16_to_cpu(initblk.ladrf[3]); + padr[0] = le16_to_cpu(initblk.padr[0]); + padr[1] = le16_to_cpu(initblk.padr[1]); + padr[2] = le16_to_cpu(initblk.padr[2]); + rdra = le32_to_cpu(initblk.rdra); + tdra = le32_to_cpu(initblk.tdra); + } else { + struct pcnet_initblk16 initblk; + s->phys_mem_read(s->dma_opaque, PHYSADDR(s,CSR_IADR(s)), + (uint8_t *)&initblk, sizeof(initblk), 0); + mode = le16_to_cpu(initblk.mode); + ladrf[0] = le16_to_cpu(initblk.ladrf[0]); + ladrf[1] = le16_to_cpu(initblk.ladrf[1]); + ladrf[2] = le16_to_cpu(initblk.ladrf[2]); + ladrf[3] = le16_to_cpu(initblk.ladrf[3]); + padr[0] = le16_to_cpu(initblk.padr[0]); + padr[1] = le16_to_cpu(initblk.padr[1]); + padr[2] = le16_to_cpu(initblk.padr[2]); + rdra = le32_to_cpu(initblk.rdra); + tdra = le32_to_cpu(initblk.tdra); + rlen = rdra >> 29; + tlen = tdra >> 29; + rdra &= 0x00ffffff; + tdra &= 0x00ffffff; + } + + trace_pcnet_rlen_tlen(s, rlen, tlen); + + CSR_RCVRL(s) = (rlen < 9) ? (1 << rlen) : 512; + CSR_XMTRL(s) = (tlen < 9) ? (1 << tlen) : 512; + s->csr[ 6] = (tlen << 12) | (rlen << 8); + s->csr[15] = mode; + s->csr[ 8] = ladrf[0]; + s->csr[ 9] = ladrf[1]; + s->csr[10] = ladrf[2]; + s->csr[11] = ladrf[3]; + s->csr[12] = padr[0]; + s->csr[13] = padr[1]; + s->csr[14] = padr[2]; + s->rdra = PHYSADDR(s, rdra); + s->tdra = PHYSADDR(s, tdra); + + CSR_RCVRC(s) = CSR_RCVRL(s); + CSR_XMTRC(s) = CSR_XMTRL(s); + + trace_pcnet_ss32_rdra_tdra(s, BCR_SSIZE32(s), + s->rdra, CSR_RCVRL(s), s->tdra, CSR_XMTRL(s)); + + s->csr[0] |= 0x0101; + s->csr[0] &= ~0x0004; /* clear STOP bit */ + + qemu_flush_queued_packets(qemu_get_queue(s->nic)); +} + +static void pcnet_start(PCNetState *s) +{ +#ifdef PCNET_DEBUG + printf("pcnet_start\n"); +#endif + + if (!CSR_DTX(s)) + s->csr[0] |= 0x0010; /* set TXON */ + + if (!CSR_DRX(s)) + s->csr[0] |= 0x0020; /* set RXON */ + + s->csr[0] &= ~0x0004; /* clear STOP bit */ + s->csr[0] |= 0x0002; + pcnet_poll_timer(s); + + qemu_flush_queued_packets(qemu_get_queue(s->nic)); +} + +static void pcnet_stop(PCNetState *s) +{ +#ifdef PCNET_DEBUG + printf("pcnet_stop\n"); +#endif + s->csr[0] &= ~0xffeb; + s->csr[0] |= 0x0014; + s->csr[4] &= ~0x02c2; + s->csr[5] &= ~0x0011; + pcnet_poll_timer(s); +} + +static void pcnet_rdte_poll(PCNetState *s) +{ + s->csr[28] = s->csr[29] = 0; + if (s->rdra) { + int bad = 0; +#if 1 + hwaddr crda = pcnet_rdra_addr(s, CSR_RCVRC(s)); + hwaddr nrda = pcnet_rdra_addr(s, -1 + CSR_RCVRC(s)); + hwaddr nnrd = pcnet_rdra_addr(s, -2 + CSR_RCVRC(s)); +#else + hwaddr crda = s->rdra + + (CSR_RCVRL(s) - CSR_RCVRC(s)) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + int nrdc = CSR_RCVRC(s)<=1 ? CSR_RCVRL(s) : CSR_RCVRC(s)-1; + hwaddr nrda = s->rdra + + (CSR_RCVRL(s) - nrdc) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + int nnrc = nrdc<=1 ? CSR_RCVRL(s) : nrdc-1; + hwaddr nnrd = s->rdra + + (CSR_RCVRL(s) - nnrc) * + (BCR_SWSTYLE(s) ? 16 : 8 ); +#endif + + CHECK_RMD(crda, bad); + if (!bad) { + CHECK_RMD(nrda, bad); + if (bad || (nrda == crda)) nrda = 0; + CHECK_RMD(nnrd, bad); + if (bad || (nnrd == crda)) nnrd = 0; + + s->csr[28] = crda & 0xffff; + s->csr[29] = crda >> 16; + s->csr[26] = nrda & 0xffff; + s->csr[27] = nrda >> 16; + s->csr[36] = nnrd & 0xffff; + s->csr[37] = nnrd >> 16; +#ifdef PCNET_DEBUG + if (bad) { + printf("pcnet: BAD RMD RECORDS AFTER 0x" TARGET_FMT_plx "\n", + crda); + } + } else { + printf("pcnet: BAD RMD RDA=0x" TARGET_FMT_plx "\n", + crda); +#endif + } + } + + if (CSR_CRDA(s)) { + struct pcnet_RMD rmd; + RMDLOAD(&rmd, PHYSADDR(s,CSR_CRDA(s))); + CSR_CRBC(s) = GET_FIELD(rmd.buf_length, RMDL, BCNT); + CSR_CRST(s) = rmd.status; +#ifdef PCNET_DEBUG_RMD_X + printf("CRDA=0x%08x CRST=0x%04x RCVRC=%d RMDL=0x%04x RMDS=0x%04x RMDM=0x%08x\n", + PHYSADDR(s,CSR_CRDA(s)), CSR_CRST(s), CSR_RCVRC(s), + rmd.buf_length, rmd.status, rmd.msg_length); + PRINT_RMD(&rmd); +#endif + } else { + CSR_CRBC(s) = CSR_CRST(s) = 0; + } + + if (CSR_NRDA(s)) { + struct pcnet_RMD rmd; + RMDLOAD(&rmd, PHYSADDR(s,CSR_NRDA(s))); + CSR_NRBC(s) = GET_FIELD(rmd.buf_length, RMDL, BCNT); + CSR_NRST(s) = rmd.status; + } else { + CSR_NRBC(s) = CSR_NRST(s) = 0; + } + +} + +static int pcnet_tdte_poll(PCNetState *s) +{ + s->csr[34] = s->csr[35] = 0; + if (s->tdra) { + hwaddr cxda = s->tdra + + (CSR_XMTRL(s) - CSR_XMTRC(s)) * + (BCR_SWSTYLE(s) ? 16 : 8); + int bad = 0; + CHECK_TMD(cxda, bad); + if (!bad) { + if (CSR_CXDA(s) != cxda) { + s->csr[60] = s->csr[34]; + s->csr[61] = s->csr[35]; + s->csr[62] = CSR_CXBC(s); + s->csr[63] = CSR_CXST(s); + } + s->csr[34] = cxda & 0xffff; + s->csr[35] = cxda >> 16; +#ifdef PCNET_DEBUG_X + printf("pcnet: BAD TMD XDA=0x%08x\n", cxda); +#endif + } + } + + if (CSR_CXDA(s)) { + struct pcnet_TMD tmd; + + TMDLOAD(&tmd, PHYSADDR(s,CSR_CXDA(s))); + + CSR_CXBC(s) = GET_FIELD(tmd.length, TMDL, BCNT); + CSR_CXST(s) = tmd.status; + } else { + CSR_CXBC(s) = CSR_CXST(s) = 0; + } + + return !!(CSR_CXST(s) & 0x8000); +} + +#define MIN_BUF_SIZE 60 + +ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_) +{ + PCNetState *s = qemu_get_nic_opaque(nc); + int is_padr = 0, is_bcast = 0, is_ladr = 0; + uint8_t buf1[60]; + int remaining; + int crc_err = 0; + int size = size_; + + if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size || + (CSR_LOOP(s) && !s->looptest)) { + return -1; + } +#ifdef PCNET_DEBUG + printf("pcnet_receive size=%d\n", size); +#endif + + /* if too small buffer, then expand it */ + if (size < MIN_BUF_SIZE) { + memcpy(buf1, buf, size); + memset(buf1 + size, 0, MIN_BUF_SIZE - size); + buf = buf1; + size = MIN_BUF_SIZE; + } + + if (CSR_PROM(s) + || (is_padr=padr_match(s, buf, size)) + || (is_bcast=padr_bcast(s, buf, size)) + || (is_ladr=ladr_match(s, buf, size))) { + + pcnet_rdte_poll(s); + + if (!(CSR_CRST(s) & 0x8000) && s->rdra) { + struct pcnet_RMD rmd; + int rcvrc = CSR_RCVRC(s)-1,i; + hwaddr nrda; + for (i = CSR_RCVRL(s)-1; i > 0; i--, rcvrc--) { + if (rcvrc <= 1) + rcvrc = CSR_RCVRL(s); + nrda = s->rdra + + (CSR_RCVRL(s) - rcvrc) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + RMDLOAD(&rmd, nrda); + if (GET_FIELD(rmd.status, RMDS, OWN)) { +#ifdef PCNET_DEBUG_RMD + printf("pcnet - scan buffer: RCVRC=%d PREV_RCVRC=%d\n", + rcvrc, CSR_RCVRC(s)); +#endif + CSR_RCVRC(s) = rcvrc; + pcnet_rdte_poll(s); + break; + } + } + } + + if (!(CSR_CRST(s) & 0x8000)) { +#ifdef PCNET_DEBUG_RMD + printf("pcnet - no buffer: RCVRC=%d\n", CSR_RCVRC(s)); +#endif + s->csr[0] |= 0x1000; /* Set MISS flag */ + CSR_MISSC(s)++; + } else { + uint8_t *src = s->buffer; + hwaddr crda = CSR_CRDA(s); + struct pcnet_RMD rmd; + int pktcount = 0; + + if (!s->looptest) { + memcpy(src, buf, size); + /* no need to compute the CRC */ + src[size] = 0; + src[size + 1] = 0; + src[size + 2] = 0; + src[size + 3] = 0; + size += 4; + } else if (s->looptest == PCNET_LOOPTEST_CRC || + !CSR_DXMTFCS(s) || size < MIN_BUF_SIZE+4) { + uint32_t fcs = ~0; + uint8_t *p = src; + + while (p != &src[size]) + CRC(fcs, *p++); + *(uint32_t *)p = htonl(fcs); + size += 4; + } else { + uint32_t fcs = ~0; + uint8_t *p = src; + + while (p != &src[size-4]) + CRC(fcs, *p++); + crc_err = (*(uint32_t *)p != htonl(fcs)); + } + +#ifdef PCNET_DEBUG_MATCH + PRINT_PKTHDR(buf); +#endif + + RMDLOAD(&rmd, PHYSADDR(s,crda)); + /*if (!CSR_LAPPEN(s))*/ + SET_FIELD(&rmd.status, RMDS, STP, 1); + +#define PCNET_RECV_STORE() do { \ + int count = MIN(4096 - GET_FIELD(rmd.buf_length, RMDL, BCNT),remaining); \ + hwaddr rbadr = PHYSADDR(s, rmd.rbadr); \ + s->phys_mem_write(s->dma_opaque, rbadr, src, count, CSR_BSWP(s)); \ + src += count; remaining -= count; \ + SET_FIELD(&rmd.status, RMDS, OWN, 0); \ + RMDSTORE(&rmd, PHYSADDR(s,crda)); \ + pktcount++; \ +} while (0) + + remaining = size; + PCNET_RECV_STORE(); + if ((remaining > 0) && CSR_NRDA(s)) { + hwaddr nrda = CSR_NRDA(s); +#ifdef PCNET_DEBUG_RMD + PRINT_RMD(&rmd); +#endif + RMDLOAD(&rmd, PHYSADDR(s,nrda)); + if (GET_FIELD(rmd.status, RMDS, OWN)) { + crda = nrda; + PCNET_RECV_STORE(); +#ifdef PCNET_DEBUG_RMD + PRINT_RMD(&rmd); +#endif + if ((remaining > 0) && (nrda=CSR_NNRD(s))) { + RMDLOAD(&rmd, PHYSADDR(s,nrda)); + if (GET_FIELD(rmd.status, RMDS, OWN)) { + crda = nrda; + PCNET_RECV_STORE(); + } + } + } + } + +#undef PCNET_RECV_STORE + + RMDLOAD(&rmd, PHYSADDR(s,crda)); + if (remaining == 0) { + SET_FIELD(&rmd.msg_length, RMDM, MCNT, size); + SET_FIELD(&rmd.status, RMDS, ENP, 1); + SET_FIELD(&rmd.status, RMDS, PAM, !CSR_PROM(s) && is_padr); + SET_FIELD(&rmd.status, RMDS, LFAM, !CSR_PROM(s) && is_ladr); + SET_FIELD(&rmd.status, RMDS, BAM, !CSR_PROM(s) && is_bcast); + if (crc_err) { + SET_FIELD(&rmd.status, RMDS, CRC, 1); + SET_FIELD(&rmd.status, RMDS, ERR, 1); + } + } else { + SET_FIELD(&rmd.status, RMDS, OFLO, 1); + SET_FIELD(&rmd.status, RMDS, BUFF, 1); + SET_FIELD(&rmd.status, RMDS, ERR, 1); + } + RMDSTORE(&rmd, PHYSADDR(s,crda)); + s->csr[0] |= 0x0400; + +#ifdef PCNET_DEBUG + printf("RCVRC=%d CRDA=0x%08x BLKS=%d\n", + CSR_RCVRC(s), PHYSADDR(s,CSR_CRDA(s)), pktcount); +#endif +#ifdef PCNET_DEBUG_RMD + PRINT_RMD(&rmd); +#endif + + while (pktcount--) { + if (CSR_RCVRC(s) <= 1) + CSR_RCVRC(s) = CSR_RCVRL(s); + else + CSR_RCVRC(s)--; + } + + pcnet_rdte_poll(s); + + } + } + + pcnet_poll(s); + pcnet_update_irq(s); + + return size_; +} + +void pcnet_set_link_status(NetClientState *nc) +{ + PCNetState *d = qemu_get_nic_opaque(nc); + + d->lnkst = nc->link_down ? 0 : 0x40; +} + +static void pcnet_transmit(PCNetState *s) +{ + hwaddr xmit_cxda = 0; + int count = CSR_XMTRL(s)-1; + int add_crc = 0; + int bcnt; + s->xmit_pos = -1; + + if (!CSR_TXON(s)) { + s->csr[0] &= ~0x0008; + return; + } + + s->tx_busy = 1; + + txagain: + if (pcnet_tdte_poll(s)) { + struct pcnet_TMD tmd; + + TMDLOAD(&tmd, PHYSADDR(s,CSR_CXDA(s))); + +#ifdef PCNET_DEBUG_TMD + printf(" TMDLOAD 0x%08x\n", PHYSADDR(s,CSR_CXDA(s))); + PRINT_TMD(&tmd); +#endif + if (GET_FIELD(tmd.status, TMDS, STP)) { + s->xmit_pos = 0; + xmit_cxda = PHYSADDR(s,CSR_CXDA(s)); + if (BCR_SWSTYLE(s) != 1) + add_crc = GET_FIELD(tmd.status, TMDS, ADDFCS); + } + if (s->lnkst == 0 && + (!CSR_LOOP(s) || (!CSR_INTL(s) && !BCR_TMAULOOP(s)))) { + SET_FIELD(&tmd.misc, TMDM, LCAR, 1); + SET_FIELD(&tmd.status, TMDS, ERR, 1); + SET_FIELD(&tmd.status, TMDS, OWN, 0); + s->csr[0] |= 0xa000; /* ERR | CERR */ + s->xmit_pos = -1; + goto txdone; + } + + if (s->xmit_pos < 0) { + goto txdone; + } + + bcnt = 4096 - GET_FIELD(tmd.length, TMDL, BCNT); + + /* if multi-tmd packet outsizes s->buffer then skip it silently. + Note: this is not what real hw does */ + if (s->xmit_pos + bcnt > sizeof(s->buffer)) { + s->xmit_pos = -1; + goto txdone; + } + + s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr), + s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s)); + s->xmit_pos += bcnt; + + if (!GET_FIELD(tmd.status, TMDS, ENP)) { + goto txdone; + } + +#ifdef PCNET_DEBUG + printf("pcnet_transmit size=%d\n", s->xmit_pos); +#endif + if (CSR_LOOP(s)) { + if (BCR_SWSTYLE(s) == 1) + add_crc = !GET_FIELD(tmd.status, TMDS, NOFCS); + s->looptest = add_crc ? PCNET_LOOPTEST_CRC : PCNET_LOOPTEST_NOCRC; + pcnet_receive(qemu_get_queue(s->nic), s->buffer, s->xmit_pos); + s->looptest = 0; + } else { + if (s->nic) { + qemu_send_packet(qemu_get_queue(s->nic), s->buffer, + s->xmit_pos); + } + } + + s->csr[0] &= ~0x0008; /* clear TDMD */ + s->csr[4] |= 0x0004; /* set TXSTRT */ + s->xmit_pos = -1; + + txdone: + SET_FIELD(&tmd.status, TMDS, OWN, 0); + TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); + if (!CSR_TOKINTD(s) || (CSR_LTINTEN(s) && GET_FIELD(tmd.status, TMDS, LTINT))) + s->csr[0] |= 0x0200; /* set TINT */ + + if (CSR_XMTRC(s)<=1) + CSR_XMTRC(s) = CSR_XMTRL(s); + else + CSR_XMTRC(s)--; + if (count--) + goto txagain; + + } else + if (s->xmit_pos >= 0) { + struct pcnet_TMD tmd; + TMDLOAD(&tmd, xmit_cxda); + SET_FIELD(&tmd.misc, TMDM, BUFF, 1); + SET_FIELD(&tmd.misc, TMDM, UFLO, 1); + SET_FIELD(&tmd.status, TMDS, ERR, 1); + SET_FIELD(&tmd.status, TMDS, OWN, 0); + TMDSTORE(&tmd, xmit_cxda); + s->csr[0] |= 0x0200; /* set TINT */ + if (!CSR_DXSUFLO(s)) { + s->csr[0] &= ~0x0010; + } else + if (count--) + goto txagain; + } + + s->tx_busy = 0; +} + +static void pcnet_poll(PCNetState *s) +{ + if (CSR_RXON(s)) { + pcnet_rdte_poll(s); + } + + if (CSR_TDMD(s) || + (CSR_TXON(s) && !CSR_DPOLL(s) && pcnet_tdte_poll(s))) + { + /* prevent recursion */ + if (s->tx_busy) + return; + + pcnet_transmit(s); + } +} + +static void pcnet_poll_timer(void *opaque) +{ + PCNetState *s = opaque; + + timer_del(s->poll_timer); + + if (CSR_TDMD(s)) { + pcnet_transmit(s); + } + + pcnet_update_irq(s); + + if (!CSR_STOP(s) && !CSR_SPND(s) && !CSR_DPOLL(s)) { + uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) * 33; + if (!s->timer || !now) + s->timer = now; + else { + uint64_t t = now - s->timer + CSR_POLL(s); + if (t > 0xffffLL) { + pcnet_poll(s); + CSR_POLL(s) = CSR_PINT(s); + } else + CSR_POLL(s) = t; + } + timer_mod(s->poll_timer, + pcnet_get_next_poll_time(s,qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL))); + } +} + + +static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value) +{ + uint16_t val = new_value; +#ifdef PCNET_DEBUG_CSR + printf("pcnet_csr_writew rap=%d val=0x%04x\n", rap, val); +#endif + switch (rap) { + case 0: + s->csr[0] &= ~(val & 0x7f00); /* Clear any interrupt flags */ + + s->csr[0] = (s->csr[0] & ~0x0040) | (val & 0x0048); + + val = (val & 0x007f) | (s->csr[0] & 0x7f00); + + /* IFF STOP, STRT and INIT are set, clear STRT and INIT */ + if ((val&7) == 7) + val &= ~3; + + if (!CSR_STOP(s) && (val & 4)) + pcnet_stop(s); + + if (!CSR_INIT(s) && (val & 1)) + pcnet_init(s); + + if (!CSR_STRT(s) && (val & 2)) + pcnet_start(s); + + if (CSR_TDMD(s)) + pcnet_transmit(s); + + return; + case 1: + case 2: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 18: /* CRBAL */ + case 19: /* CRBAU */ + case 20: /* CXBAL */ + case 21: /* CXBAU */ + case 22: /* NRBAU */ + case 23: /* NRBAU */ + case 24: + case 25: + case 26: + case 27: + case 28: + case 29: + case 30: + case 31: + case 32: + case 33: + case 34: + case 35: + case 36: + case 37: + case 38: + case 39: + case 40: /* CRBC */ + case 41: + case 42: /* CXBC */ + case 43: + case 44: + case 45: + case 46: /* POLL */ + case 47: /* POLLINT */ + case 72: + case 74: + case 76: /* RCVRL */ + case 78: /* XMTRL */ + case 112: + if (CSR_STOP(s) || CSR_SPND(s)) + break; + return; + case 3: + break; + case 4: + s->csr[4] &= ~(val & 0x026a); + val &= ~0x026a; val |= s->csr[4] & 0x026a; + break; + case 5: + s->csr[5] &= ~(val & 0x0a90); + val &= ~0x0a90; val |= s->csr[5] & 0x0a90; + break; + case 16: + pcnet_csr_writew(s,1,val); + return; + case 17: + pcnet_csr_writew(s,2,val); + return; + case 58: + pcnet_bcr_writew(s,BCR_SWS,val); + break; + default: + return; + } + s->csr[rap] = val; +} + +static uint32_t pcnet_csr_readw(PCNetState *s, uint32_t rap) +{ + uint32_t val; + switch (rap) { + case 0: + pcnet_update_irq(s); + val = s->csr[0]; + val |= (val & 0x7800) ? 0x8000 : 0; + break; + case 16: + return pcnet_csr_readw(s,1); + case 17: + return pcnet_csr_readw(s,2); + case 58: + return pcnet_bcr_readw(s,BCR_SWS); + case 88: + val = s->csr[89]; + val <<= 16; + val |= s->csr[88]; + break; + default: + val = s->csr[rap]; + } +#ifdef PCNET_DEBUG_CSR + printf("pcnet_csr_readw rap=%d val=0x%04x\n", rap, val); +#endif + return val; +} + +static void pcnet_bcr_writew(PCNetState *s, uint32_t rap, uint32_t val) +{ + rap &= 127; +#ifdef PCNET_DEBUG_BCR + printf("pcnet_bcr_writew rap=%d val=0x%04x\n", rap, val); +#endif + switch (rap) { + case BCR_SWS: + if (!(CSR_STOP(s) || CSR_SPND(s))) + return; + val &= ~0x0300; + switch (val & 0x00ff) { + case 0: + val |= 0x0200; + break; + case 1: + val |= 0x0100; + break; + case 2: + case 3: + val |= 0x0300; + break; + default: + printf("Bad SWSTYLE=0x%02x\n", val & 0xff); + val = 0x0200; + break; + } +#ifdef PCNET_DEBUG + printf("BCR_SWS=0x%04x\n", val); +#endif + /* fall through */ + case BCR_LNKST: + case BCR_LED1: + case BCR_LED2: + case BCR_LED3: + case BCR_MC: + case BCR_FDC: + case BCR_BSBC: + case BCR_EECAS: + case BCR_PLAT: + s->bcr[rap] = val; + break; + default: + break; + } +} + +uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap) +{ + uint32_t val; + rap &= 127; + switch (rap) { + case BCR_LNKST: + case BCR_LED1: + case BCR_LED2: + case BCR_LED3: + val = s->bcr[rap] & ~0x8000; + val |= (val & 0x017f & s->lnkst) ? 0x8000 : 0; + break; + default: + val = rap < 32 ? s->bcr[rap] : 0; + break; + } +#ifdef PCNET_DEBUG_BCR + printf("pcnet_bcr_readw rap=%d val=0x%04x\n", rap, val); +#endif + return val; +} + +void pcnet_h_reset(void *opaque) +{ + PCNetState *s = opaque; + + s->bcr[BCR_MSRDA] = 0x0005; + s->bcr[BCR_MSWRA] = 0x0005; + s->bcr[BCR_MC ] = 0x0002; + s->bcr[BCR_LNKST] = 0x00c0; + s->bcr[BCR_LED1 ] = 0x0084; + s->bcr[BCR_LED2 ] = 0x0088; + s->bcr[BCR_LED3 ] = 0x0090; + s->bcr[BCR_FDC ] = 0x0000; + s->bcr[BCR_BSBC ] = 0x9001; + s->bcr[BCR_EECAS] = 0x0002; + s->bcr[BCR_SWS ] = 0x0200; + s->bcr[BCR_PLAT ] = 0xff06; + + pcnet_s_reset(s); + pcnet_update_irq(s); + pcnet_poll_timer(s); +} + +void pcnet_ioport_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCNetState *s = opaque; + pcnet_poll_timer(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_writew addr=0x%08x val=0x%04x\n", addr, val); +#endif + if (!BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + pcnet_csr_writew(s, s->rap, val); + break; + case 0x02: + s->rap = val & 0x7f; + break; + case 0x06: + pcnet_bcr_writew(s, s->rap, val); + break; + } + } + pcnet_update_irq(s); +} + +uint32_t pcnet_ioport_readw(void *opaque, uint32_t addr) +{ + PCNetState *s = opaque; + uint32_t val = -1; + pcnet_poll_timer(s); + if (!BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + val = pcnet_csr_readw(s, s->rap); + break; + case 0x02: + val = s->rap; + break; + case 0x04: + pcnet_s_reset(s); + val = 0; + break; + case 0x06: + val = pcnet_bcr_readw(s, s->rap); + break; + } + } + pcnet_update_irq(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_readw addr=0x%08x val=0x%04x\n", addr, val & 0xffff); +#endif + return val; +} + +void pcnet_ioport_writel(void *opaque, uint32_t addr, uint32_t val) +{ + PCNetState *s = opaque; + pcnet_poll_timer(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_writel addr=0x%08x val=0x%08x\n", addr, val); +#endif + if (BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + pcnet_csr_writew(s, s->rap, val & 0xffff); + break; + case 0x04: + s->rap = val & 0x7f; + break; + case 0x0c: + pcnet_bcr_writew(s, s->rap, val & 0xffff); + break; + } + } else + if ((addr & 0x0f) == 0) { + /* switch device to dword i/o mode */ + pcnet_bcr_writew(s, BCR_BSBC, pcnet_bcr_readw(s, BCR_BSBC) | 0x0080); +#ifdef PCNET_DEBUG_IO + printf("device switched into dword i/o mode\n"); +#endif + } + pcnet_update_irq(s); +} + +uint32_t pcnet_ioport_readl(void *opaque, uint32_t addr) +{ + PCNetState *s = opaque; + uint32_t val = -1; + pcnet_poll_timer(s); + if (BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + val = pcnet_csr_readw(s, s->rap); + break; + case 0x04: + val = s->rap; + break; + case 0x08: + pcnet_s_reset(s); + val = 0; + break; + case 0x0c: + val = pcnet_bcr_readw(s, s->rap); + break; + } + } + pcnet_update_irq(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_readl addr=0x%08x val=0x%08x\n", addr, val); +#endif + return val; +} + +static bool is_version_2(void *opaque, int version_id) +{ + return version_id == 2; +} + +const VMStateDescription vmstate_pcnet = { + .name = "pcnet", + .version_id = 3, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_INT32(rap, PCNetState), + VMSTATE_INT32(isr, PCNetState), + VMSTATE_INT32(lnkst, PCNetState), + VMSTATE_UINT32(rdra, PCNetState), + VMSTATE_UINT32(tdra, PCNetState), + VMSTATE_BUFFER(prom, PCNetState), + VMSTATE_UINT16_ARRAY(csr, PCNetState, 128), + VMSTATE_UINT16_ARRAY(bcr, PCNetState, 32), + VMSTATE_UINT64(timer, PCNetState), + VMSTATE_INT32(xmit_pos, PCNetState), + VMSTATE_BUFFER(buffer, PCNetState), + VMSTATE_UNUSED_TEST(is_version_2, 4), + VMSTATE_INT32(tx_busy, PCNetState), + VMSTATE_TIMER_PTR(poll_timer, PCNetState), + VMSTATE_END_OF_LIST() + } +}; + +void pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info) +{ + int i; + uint16_t checksum; + + s->poll_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pcnet_poll_timer, s); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + /* Initialize the PROM */ + + /* + Datasheet: http://pdfdata.datasheetsite.com/web/24528/AM79C970A.pdf + page 95 + */ + memcpy(s->prom, s->conf.macaddr.a, 6); + /* Reserved Location: must be 00h */ + s->prom[6] = s->prom[7] = 0x00; + /* Reserved Location: must be 00h */ + s->prom[8] = 0x00; + /* Hardware ID: must be 11h if compatibility to AMD drivers is desired */ + s->prom[9] = 0x11; + /* User programmable space, init with 0 */ + s->prom[10] = s->prom[11] = 0x00; + /* LSByte of two-byte checksum, which is the sum of bytes 00h-0Bh + and bytes 0Eh and 0Fh, must therefore be initialized with 0! */ + s->prom[12] = s->prom[13] = 0x00; + /* Must be ASCII W (57h) if compatibility to AMD + driver software is desired */ + s->prom[14] = s->prom[15] = 0x57; + + for (i = 0, checksum = 0; i < 16; i++) { + checksum += s->prom[i]; + } + *(uint16_t *)&s->prom[12] = cpu_to_le16(checksum); + + s->lnkst = 0x40; /* initial link state: up */ +} diff --git a/qemu/hw/net/pcnet.h b/qemu/hw/net/pcnet.h new file mode 100644 index 000000000..dec8de834 --- /dev/null +++ b/qemu/hw/net/pcnet.h @@ -0,0 +1,67 @@ +#ifndef HW_PCNET_H +#define HW_PCNET_H 1 + +#define PCNET_IOPORT_SIZE 0x20 +#define PCNET_PNPMMIO_SIZE 0x20 + +#define PCNET_LOOPTEST_CRC 1 +#define PCNET_LOOPTEST_NOCRC 2 + +#include "exec/memory.h" + +/* BUS CONFIGURATION REGISTERS */ +#define BCR_MSRDA 0 +#define BCR_MSWRA 1 +#define BCR_MC 2 +#define BCR_LNKST 4 +#define BCR_LED1 5 +#define BCR_LED2 6 +#define BCR_LED3 7 +#define BCR_FDC 9 +#define BCR_BSBC 18 +#define BCR_EECAS 19 +#define BCR_SWS 20 +#define BCR_PLAT 22 + +#define BCR_TMAULOOP(S) !!((S)->bcr[BCR_MC ] & 0x4000) +#define BCR_APROMWE(S) !!((S)->bcr[BCR_MC ] & 0x0100) +#define BCR_DWIO(S) !!((S)->bcr[BCR_BSBC] & 0x0080) +#define BCR_SSIZE32(S) !!((S)->bcr[BCR_SWS ] & 0x0100) +#define BCR_SWSTYLE(S) ((S)->bcr[BCR_SWS ] & 0x00FF) + +typedef struct PCNetState_st PCNetState; + +struct PCNetState_st { + NICState *nic; + NICConf conf; + QEMUTimer *poll_timer; + int rap, isr, lnkst; + uint32_t rdra, tdra; + uint8_t prom[16]; + uint16_t csr[128]; + uint16_t bcr[32]; + int xmit_pos; + uint64_t timer; + MemoryRegion mmio; + uint8_t buffer[4096]; + qemu_irq irq; + void (*phys_mem_read)(void *dma_opaque, hwaddr addr, + uint8_t *buf, int len, int do_bswap); + void (*phys_mem_write)(void *dma_opaque, hwaddr addr, + uint8_t *buf, int len, int do_bswap); + void *dma_opaque; + int tx_busy; + int looptest; +}; + +void pcnet_h_reset(void *opaque); +void pcnet_ioport_writew(void *opaque, uint32_t addr, uint32_t val); +uint32_t pcnet_ioport_readw(void *opaque, uint32_t addr); +void pcnet_ioport_writel(void *opaque, uint32_t addr, uint32_t val); +uint32_t pcnet_ioport_readl(void *opaque, uint32_t addr); +uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap); +ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_); +void pcnet_set_link_status(NetClientState *nc); +void pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info); +extern const VMStateDescription vmstate_pcnet; +#endif diff --git a/qemu/hw/net/rocker/qmp-norocker.c b/qemu/hw/net/rocker/qmp-norocker.c new file mode 100644 index 000000000..49b498b64 --- /dev/null +++ b/qemu/hw/net/rocker/qmp-norocker.c @@ -0,0 +1,50 @@ +/* + * QMP Target options - Commands handled based on a target config + * versus a host config + * + * Copyright (c) 2015 David Ahern <dsahern@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu-common.h" +#include "qmp-commands.h" +#include "qapi/qmp/qerror.h" + +RockerSwitch *qmp_query_rocker(const char *name, Error **errp) +{ + error_setg(errp, QERR_FEATURE_DISABLED, "rocker"); + return NULL; +}; + +RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp) +{ + error_setg(errp, QERR_FEATURE_DISABLED, "rocker"); + return NULL; +}; + +RockerOfDpaFlowList *qmp_query_rocker_of_dpa_flows(const char *name, + bool has_tbl_id, + uint32_t tbl_id, + Error **errp) +{ + error_setg(errp, QERR_FEATURE_DISABLED, "rocker"); + return NULL; +}; + +RockerOfDpaGroupList *qmp_query_rocker_of_dpa_groups(const char *name, + bool has_type, + uint8_t type, + Error **errp) +{ + error_setg(errp, QERR_FEATURE_DISABLED, "rocker"); + return NULL; +}; diff --git a/qemu/hw/net/rocker/rocker.c b/qemu/hw/net/rocker/rocker.c new file mode 100644 index 000000000..47d080fd3 --- /dev/null +++ b/qemu/hw/net/rocker/rocker.c @@ -0,0 +1,1553 @@ +/* + * QEMU rocker switch emulation - PCI device + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/pci/msix.h" +#include "net/net.h" +#include "net/eth.h" +#include "qemu/iov.h" +#include "qemu/bitops.h" +#include "qmp-commands.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_desc.h" +#include "rocker_tlv.h" +#include "rocker_world.h" +#include "rocker_of_dpa.h" + +struct rocker { + /* private */ + PCIDevice parent_obj; + /* public */ + + MemoryRegion mmio; + MemoryRegion msix_bar; + + /* switch configuration */ + char *name; /* switch name */ + uint32_t fp_ports; /* front-panel port count */ + NICPeers *fp_ports_peers; + MACAddr fp_start_macaddr; /* front-panel port 0 mac addr */ + uint64_t switch_id; /* switch id */ + + /* front-panel ports */ + FpPort *fp_port[ROCKER_FP_PORTS_MAX]; + + /* register backings */ + uint32_t test_reg; + uint64_t test_reg64; + dma_addr_t test_dma_addr; + uint32_t test_dma_size; + uint64_t lower32; /* lower 32-bit val in 2-part 64-bit access */ + + /* desc rings */ + DescRing **rings; + + /* switch worlds */ + World *worlds[ROCKER_WORLD_TYPE_MAX]; + World *world_dflt; + + QLIST_ENTRY(rocker) next; +}; + +#define ROCKER "rocker" + +#define to_rocker(obj) \ + OBJECT_CHECK(Rocker, (obj), ROCKER) + +static QLIST_HEAD(, rocker) rockers; + +Rocker *rocker_find(const char *name) +{ + Rocker *r; + + QLIST_FOREACH(r, &rockers, next) + if (strcmp(r->name, name) == 0) { + return r; + } + + return NULL; +} + +World *rocker_get_world(Rocker *r, enum rocker_world_type type) +{ + if (type < ROCKER_WORLD_TYPE_MAX) { + return r->worlds[type]; + } + return NULL; +} + +RockerSwitch *qmp_query_rocker(const char *name, Error **errp) +{ + RockerSwitch *rocker; + Rocker *r; + + r = rocker_find(name); + if (!r) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "rocker %s not found", name); + return NULL; + } + + rocker = g_new0(RockerSwitch, 1); + rocker->name = g_strdup(r->name); + rocker->id = r->switch_id; + rocker->ports = r->fp_ports; + + return rocker; +} + +RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp) +{ + RockerPortList *list = NULL; + Rocker *r; + int i; + + r = rocker_find(name); + if (!r) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "rocker %s not found", name); + return NULL; + } + + for (i = r->fp_ports - 1; i >= 0; i--) { + RockerPortList *info = g_malloc0(sizeof(*info)); + info->value = g_malloc0(sizeof(*info->value)); + struct fp_port *port = r->fp_port[i]; + + fp_port_get_info(port, info); + info->next = list; + list = info; + } + + return list; +} + +uint32_t rocker_fp_ports(Rocker *r) +{ + return r->fp_ports; +} + +static uint32_t rocker_get_pport_by_tx_ring(Rocker *r, + DescRing *ring) +{ + return (desc_ring_index(ring) - 2) / 2 + 1; +} + +static int tx_consume(Rocker *r, DescInfo *info) +{ + PCIDevice *dev = PCI_DEVICE(r); + char *buf = desc_get_buf(info, true); + RockerTlv *tlv_frag; + RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1]; + struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, }; + uint32_t pport; + uint32_t port; + uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE; + uint16_t tx_l3_csum_off = 0; + uint16_t tx_tso_mss = 0; + uint16_t tx_tso_hdr_len = 0; + int iovcnt = 0; + int err = ROCKER_OK; + int rem; + int i; + + if (!buf) { + return -ROCKER_ENXIO; + } + + rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_TX_FRAGS]) { + return -ROCKER_EINVAL; + } + + pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info)); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + + if (tlvs[ROCKER_TLV_TX_OFFLOAD]) { + tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]); + } + + switch (tx_offload) { + case ROCKER_TX_OFFLOAD_L3_CSUM: + if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { + return -ROCKER_EINVAL; + } + break; + case ROCKER_TX_OFFLOAD_TSO: + if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || + !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { + return -ROCKER_EINVAL; + } + break; + } + + if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { + tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]); + } + + if (tlvs[ROCKER_TLV_TX_TSO_MSS]) { + tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]); + } + + if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { + tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]); + } + + rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) { + hwaddr frag_addr; + uint16_t frag_len; + + if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) { + err = -ROCKER_EINVAL; + goto err_bad_attr; + } + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag); + + if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] || + !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) { + err = -ROCKER_EINVAL; + goto err_bad_attr; + } + + frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]); + frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]); + + iov[iovcnt].iov_len = frag_len; + iov[iovcnt].iov_base = g_malloc(frag_len); + if (!iov[iovcnt].iov_base) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base, + iov[iovcnt].iov_len)) { + err = -ROCKER_ENXIO; + goto err_bad_io; + } + + if (++iovcnt > ROCKER_TX_FRAGS_MAX) { + goto err_too_many_frags; + } + } + + if (iovcnt) { + /* XXX perform Tx offloads */ + /* XXX silence compiler for now */ + tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0; + } + + err = fp_port_eg(r->fp_port[port], iov, iovcnt); + +err_too_many_frags: +err_bad_io: +err_no_mem: +err_bad_attr: + for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) { + if (iov[i].iov_base) { + g_free(iov[i].iov_base); + } + } + + return err; +} + +static int cmd_get_port_settings(Rocker *r, + DescInfo *info, char *buf, + RockerTlv *cmd_info_tlv) +{ + RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + RockerTlv *nest; + FpPort *fp_port; + uint32_t pport; + uint32_t port; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + char *phys_name; + MACAddr macaddr; + enum rocker_world_type mode; + size_t tlv_size; + int pos; + int err; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + cmd_info_tlv); + + if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { + return -ROCKER_EINVAL; + } + + pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + + err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg); + if (err) { + return err; + } + + fp_port_get_macaddr(fp_port, &macaddr); + mode = world_type(fp_port_get_world(fp_port)); + learning = fp_port_get_learning(fp_port); + phys_name = fp_port_get_name(fp_port); + + tlv_size = rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* speed */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* duplex */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* autoneg */ + rocker_tlv_total_size(sizeof(macaddr.a)) + /* macaddr */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* mode */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* learning */ + rocker_tlv_total_size(strlen(phys_name)); + + if (tlv_size > desc_buf_size(info)) { + return -ROCKER_EMSGSIZE; + } + + pos = 0; + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg); + rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, + sizeof(macaddr.a), macaddr.a); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, + learning); + rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, + strlen(phys_name), phys_name); + rocker_tlv_nest_end(buf, &pos, nest); + + return desc_set_buf(info, tlv_size); +} + +static int cmd_set_port_settings(Rocker *r, + RockerTlv *cmd_info_tlv) +{ + RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + FpPort *fp_port; + uint32_t pport; + uint32_t port; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + MACAddr macaddr; + enum rocker_world_type mode; + int err; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + cmd_info_tlv); + + if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { + return -ROCKER_EINVAL; + } + + pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] && + tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] && + tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) { + + speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]); + duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]); + autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]); + + err = fp_port_set_settings(fp_port, speed, duplex, autoneg); + if (err) { + return err; + } + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) { + if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) != + sizeof(macaddr.a)) { + return -ROCKER_EINVAL; + } + memcpy(macaddr.a, + rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]), + sizeof(macaddr.a)); + fp_port_set_macaddr(fp_port, &macaddr); + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) { + mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]); + fp_port_set_world(fp_port, r->worlds[mode]); + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) { + learning = + rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]); + fp_port_set_learning(fp_port, learning); + } + + return ROCKER_OK; +} + +static int cmd_consume(Rocker *r, DescInfo *info) +{ + char *buf = desc_get_buf(info, false); + RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1]; + RockerTlv *info_tlv; + World *world; + uint16_t cmd; + int err; + + if (!buf) { + return -ROCKER_ENXIO; + } + + rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) { + return -ROCKER_EINVAL; + } + + cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]); + info_tlv = tlvs[ROCKER_TLV_CMD_INFO]; + + /* This might be reworked to something like this: + * Every world will have an array of command handlers from + * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is + * up to each world to implement whatever command it want. + * It can reference "generic" commands as cmd_set_port_settings or + * cmd_get_port_settings + */ + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; + err = world_do_cmd(world, info, buf, cmd, info_tlv); + break; + case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS: + err = cmd_get_port_settings(r, info, buf, info_tlv); + break; + case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS: + err = cmd_set_port_settings(r, info_tlv); + break; + default: + err = -ROCKER_EINVAL; + break; + } + + return err; +} + +static void rocker_msix_irq(Rocker *r, unsigned vector) +{ + PCIDevice *dev = PCI_DEVICE(r); + + DPRINTF("MSI-X notify request for vector %d\n", vector); + if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) { + DPRINTF("incorrect vector %d\n", vector); + return; + } + msix_notify(dev, vector); +} + +int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up) +{ + DescRing *ring = r->rings[ROCKER_RING_EVENT]; + DescInfo *info = desc_ring_fetch_desc(ring); + RockerTlv *nest; + char *buf; + size_t tlv_size; + int pos; + int err; + + if (!info) { + return -ROCKER_ENOBUFS; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ + rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(sizeof(uint8_t)); /* link up */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto err_too_big; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED); + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, + link_up ? 1 : 0); + rocker_tlv_nest_end(buf, &pos, nest); + + err = desc_set_buf(info, tlv_size); + +err_too_big: +err_no_mem: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); + } + + return err; +} + +int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, + uint16_t vlan_id) +{ + DescRing *ring = r->rings[ROCKER_RING_EVENT]; + DescInfo *info; + FpPort *fp_port; + uint32_t port; + RockerTlv *nest; + char *buf; + size_t tlv_size; + int pos; + int err; + + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + if (!fp_port_get_learning(fp_port)) { + return ROCKER_OK; + } + + info = desc_ring_fetch_desc(ring); + if (!info) { + return -ROCKER_ENOBUFS; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ + rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(ETH_ALEN) + /* mac addr */ + rocker_tlv_total_size(sizeof(uint16_t)); /* vlan_id */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto err_too_big; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN); + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport); + rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr); + rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id); + rocker_tlv_nest_end(buf, &pos, nest); + + err = desc_set_buf(info, tlv_size); + +err_too_big: +err_no_mem: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); + } + + return err; +} + +static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, + uint32_t pport) +{ + return r->rings[(pport - 1) * 2 + 3]; +} + +int rx_produce(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) +{ + Rocker *r = world_rocker(world); + PCIDevice *dev = (PCIDevice *)r; + DescRing *ring = rocker_get_rx_ring_by_pport(r, pport); + DescInfo *info = desc_ring_fetch_desc(ring); + char *data; + size_t data_size = iov_size(iov, iovcnt); + char *buf; + uint16_t rx_flags = 0; + uint16_t rx_csum = 0; + size_t tlv_size; + RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1]; + hwaddr frag_addr; + uint16_t frag_max_len; + int pos; + int err; + + if (!info) { + return -ROCKER_ENOBUFS; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENXIO; + goto out; + } + rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] || + !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) { + err = -ROCKER_EINVAL; + goto out; + } + + frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]); + frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]); + + if (data_size > frag_max_len) { + err = -ROCKER_EMSGSIZE; + goto out; + } + + if (copy_to_cpu) { + rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; + } + + /* XXX calc rx flags/csum */ + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ + rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */ + rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */ + rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */ + rocker_tlv_total_size(sizeof(uint16_t)); /* frag len */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto out; + } + + /* TODO: + * iov dma write can be optimized in similar way e1000 does it in + * e1000_receive_iov. But maybe if would make sense to introduce + * generic helper iov_dma_write. + */ + + data = g_malloc(data_size); + if (!data) { + err = -ROCKER_ENOMEM; + goto out; + } + iov_to_buf(iov, iovcnt, 0, data, data_size); + pci_dma_write(dev, frag_addr, data, data_size); + g_free(data); + + pos = 0; + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size); + + err = desc_set_buf(info, tlv_size); + +out: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1)); + } + + return err; +} + +int rocker_port_eg(Rocker *r, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + FpPort *fp_port; + uint32_t port; + + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + + fp_port = r->fp_port[port]; + + return fp_port_eg(fp_port, iov, iovcnt); +} + +static void rocker_test_dma_ctrl(Rocker *r, uint32_t val) +{ + PCIDevice *dev = PCI_DEVICE(r); + char *buf; + int i; + + buf = g_malloc(r->test_dma_size); + + if (!buf) { + DPRINTF("test dma buffer alloc failed"); + return; + } + + switch (val) { + case ROCKER_TEST_DMA_CTRL_CLEAR: + memset(buf, 0, r->test_dma_size); + break; + case ROCKER_TEST_DMA_CTRL_FILL: + memset(buf, 0x96, r->test_dma_size); + break; + case ROCKER_TEST_DMA_CTRL_INVERT: + pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size); + for (i = 0; i < r->test_dma_size; i++) { + buf[i] = ~buf[i]; + } + break; + default: + DPRINTF("not test dma control val=0x%08x\n", val); + goto err_out; + } + pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size); + + rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST); + +err_out: + g_free(buf); +} + +static void rocker_reset(DeviceState *dev); + +static void rocker_control(Rocker *r, uint32_t val) +{ + if (val & ROCKER_CONTROL_RESET) { + rocker_reset(DEVICE(r)); + } +} + +static int rocker_pci_ring_count(Rocker *r) +{ + /* There are: + * - command ring + * - event ring + * - tx and rx ring per each port + */ + return 2 + (2 * r->fp_ports); +} + +static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr) +{ + hwaddr start = ROCKER_DMA_DESC_BASE; + hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r)); + + return addr >= start && addr < end; +} + +static void rocker_port_phys_enable_write(Rocker *r, uint64_t new) +{ + int i; + bool old_enabled; + bool new_enabled; + FpPort *fp_port; + + for (i = 0; i < r->fp_ports; i++) { + fp_port = r->fp_port[i]; + old_enabled = fp_port_enabled(fp_port); + new_enabled = (new >> (i + 1)) & 0x1; + if (new_enabled == old_enabled) { + continue; + } + if (new_enabled) { + fp_port_enable(r->fp_port[i]); + } else { + fp_port_disable(r->fp_port[i]); + } + } +} + +static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + r->lower32 = (uint64_t)val; + break; + case ROCKER_DMA_DESC_ADDR_OFFSET + 4: + desc_ring_set_base_addr(r->rings[index], + ((uint64_t)val) << 32 | r->lower32); + r->lower32 = 0; + break; + case ROCKER_DMA_DESC_SIZE_OFFSET: + desc_ring_set_size(r->rings[index], val); + break; + case ROCKER_DMA_DESC_HEAD_OFFSET: + if (desc_ring_set_head(r->rings[index], val)) { + rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); + } + break; + case ROCKER_DMA_DESC_CTRL_OFFSET: + desc_ring_set_ctrl(r->rings[index], val); + break; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + if (desc_ring_ret_credits(r->rings[index], val)) { + rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); + } + break; + default: + DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx + " val=0x%08x (ring %d, addr=0x%02x)\n", + addr, val, index, offset); + break; + } + return; + } + + switch (addr) { + case ROCKER_TEST_REG: + r->test_reg = val; + break; + case ROCKER_TEST_REG64: + case ROCKER_TEST_DMA_ADDR: + case ROCKER_PORT_PHYS_ENABLE: + r->lower32 = (uint64_t)val; + break; + case ROCKER_TEST_REG64 + 4: + r->test_reg64 = ((uint64_t)val) << 32 | r->lower32; + r->lower32 = 0; + break; + case ROCKER_TEST_IRQ: + rocker_msix_irq(r, val); + break; + case ROCKER_TEST_DMA_SIZE: + r->test_dma_size = val; + break; + case ROCKER_TEST_DMA_ADDR + 4: + r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32; + r->lower32 = 0; + break; + case ROCKER_TEST_DMA_CTRL: + rocker_test_dma_ctrl(r, val); + break; + case ROCKER_CONTROL: + rocker_control(r, val); + break; + case ROCKER_PORT_PHYS_ENABLE + 4: + rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32); + r->lower32 = 0; + break; + default: + DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx + " val=0x%08x\n", addr, val); + break; + } +} + +static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + desc_ring_set_base_addr(r->rings[index], val); + break; + default: + DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx + " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n", + addr, val, index, offset); + break; + } + return; + } + + switch (addr) { + case ROCKER_TEST_REG64: + r->test_reg64 = val; + break; + case ROCKER_TEST_DMA_ADDR: + r->test_dma_addr = val; + break; + case ROCKER_PORT_PHYS_ENABLE: + rocker_port_phys_enable_write(r, val); + break; + default: + DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx + " val=0x" TARGET_FMT_plx "\n", addr, val); + break; + } +} + +#ifdef DEBUG_ROCKER +#define regname(reg) case (reg): return #reg +static const char *rocker_reg_name(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + static char buf[100]; + char ring_name[10]; + + switch (index) { + case 0: + sprintf(ring_name, "cmd"); + break; + case 1: + sprintf(ring_name, "event"); + break; + default: + sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx", + (index - 2) / 2); + } + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + sprintf(buf, "Ring[%s] ADDR", ring_name); + return buf; + case ROCKER_DMA_DESC_ADDR_OFFSET+4: + sprintf(buf, "Ring[%s] ADDR+4", ring_name); + return buf; + case ROCKER_DMA_DESC_SIZE_OFFSET: + sprintf(buf, "Ring[%s] SIZE", ring_name); + return buf; + case ROCKER_DMA_DESC_HEAD_OFFSET: + sprintf(buf, "Ring[%s] HEAD", ring_name); + return buf; + case ROCKER_DMA_DESC_TAIL_OFFSET: + sprintf(buf, "Ring[%s] TAIL", ring_name); + return buf; + case ROCKER_DMA_DESC_CTRL_OFFSET: + sprintf(buf, "Ring[%s] CTRL", ring_name); + return buf; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + sprintf(buf, "Ring[%s] CREDITS", ring_name); + return buf; + default: + sprintf(buf, "Ring[%s] ???", ring_name); + return buf; + } + } else { + switch (addr) { + regname(ROCKER_BOGUS_REG0); + regname(ROCKER_BOGUS_REG1); + regname(ROCKER_BOGUS_REG2); + regname(ROCKER_BOGUS_REG3); + regname(ROCKER_TEST_REG); + regname(ROCKER_TEST_REG64); + regname(ROCKER_TEST_REG64+4); + regname(ROCKER_TEST_IRQ); + regname(ROCKER_TEST_DMA_ADDR); + regname(ROCKER_TEST_DMA_ADDR+4); + regname(ROCKER_TEST_DMA_SIZE); + regname(ROCKER_TEST_DMA_CTRL); + regname(ROCKER_CONTROL); + regname(ROCKER_PORT_PHYS_COUNT); + regname(ROCKER_PORT_PHYS_LINK_STATUS); + regname(ROCKER_PORT_PHYS_LINK_STATUS+4); + regname(ROCKER_PORT_PHYS_ENABLE); + regname(ROCKER_PORT_PHYS_ENABLE+4); + regname(ROCKER_SWITCH_ID); + regname(ROCKER_SWITCH_ID+4); + } + } + return "???"; +} +#else +static const char *rocker_reg_name(void *opaque, hwaddr addr) +{ + return NULL; +} +#endif + +static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + DPRINTF("Write %s addr " TARGET_FMT_plx + ", size %u, val " TARGET_FMT_plx "\n", + rocker_reg_name(opaque, addr), addr, size, val); + + switch (size) { + case 4: + rocker_io_writel(opaque, addr, val); + break; + case 8: + rocker_io_writeq(opaque, addr, val); + break; + } +} + +static uint64_t rocker_port_phys_link_status(Rocker *r) +{ + int i; + uint64_t status = 0; + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + if (fp_port_get_link_up(port)) { + status |= 1 << (i + 1); + } + } + return status; +} + +static uint64_t rocker_port_phys_enable_read(Rocker *r) +{ + int i; + uint64_t ret = 0; + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + if (fp_port_enabled(port)) { + ret |= 1 << (i + 1); + } + } + return ret; +} + +static uint32_t rocker_io_readl(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + uint32_t ret; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]); + break; + case ROCKER_DMA_DESC_ADDR_OFFSET + 4: + ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32); + break; + case ROCKER_DMA_DESC_SIZE_OFFSET: + ret = desc_ring_get_size(r->rings[index]); + break; + case ROCKER_DMA_DESC_HEAD_OFFSET: + ret = desc_ring_get_head(r->rings[index]); + break; + case ROCKER_DMA_DESC_TAIL_OFFSET: + ret = desc_ring_get_tail(r->rings[index]); + break; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + ret = desc_ring_get_credits(r->rings[index]); + break; + default: + DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx + " (ring %d, addr=0x%02x)\n", addr, index, offset); + ret = 0; + break; + } + return ret; + } + + switch (addr) { + case ROCKER_BOGUS_REG0: + case ROCKER_BOGUS_REG1: + case ROCKER_BOGUS_REG2: + case ROCKER_BOGUS_REG3: + ret = 0xDEADBABE; + break; + case ROCKER_TEST_REG: + ret = r->test_reg * 2; + break; + case ROCKER_TEST_REG64: + ret = (uint32_t)(r->test_reg64 * 2); + break; + case ROCKER_TEST_REG64 + 4: + ret = (uint32_t)((r->test_reg64 * 2) >> 32); + break; + case ROCKER_TEST_DMA_SIZE: + ret = r->test_dma_size; + break; + case ROCKER_TEST_DMA_ADDR: + ret = (uint32_t)r->test_dma_addr; + break; + case ROCKER_TEST_DMA_ADDR + 4: + ret = (uint32_t)(r->test_dma_addr >> 32); + break; + case ROCKER_PORT_PHYS_COUNT: + ret = r->fp_ports; + break; + case ROCKER_PORT_PHYS_LINK_STATUS: + ret = (uint32_t)rocker_port_phys_link_status(r); + break; + case ROCKER_PORT_PHYS_LINK_STATUS + 4: + ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32); + break; + case ROCKER_PORT_PHYS_ENABLE: + ret = (uint32_t)rocker_port_phys_enable_read(r); + break; + case ROCKER_PORT_PHYS_ENABLE + 4: + ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32); + break; + case ROCKER_SWITCH_ID: + ret = (uint32_t)r->switch_id; + break; + case ROCKER_SWITCH_ID + 4: + ret = (uint32_t)(r->switch_id >> 32); + break; + default: + DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr); + ret = 0; + break; + } + return ret; +} + +static uint64_t rocker_io_readq(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + uint64_t ret; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (addr & ROCKER_DMA_DESC_MASK) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + ret = desc_ring_get_base_addr(r->rings[index]); + break; + default: + DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx + " (ring %d, addr=0x%02x)\n", addr, index, offset); + ret = 0; + break; + } + return ret; + } + + switch (addr) { + case ROCKER_BOGUS_REG0: + case ROCKER_BOGUS_REG2: + ret = 0xDEADBABEDEADBABEULL; + break; + case ROCKER_TEST_REG64: + ret = r->test_reg64 * 2; + break; + case ROCKER_TEST_DMA_ADDR: + ret = r->test_dma_addr; + break; + case ROCKER_PORT_PHYS_LINK_STATUS: + ret = rocker_port_phys_link_status(r); + break; + case ROCKER_PORT_PHYS_ENABLE: + ret = rocker_port_phys_enable_read(r); + break; + case ROCKER_SWITCH_ID: + ret = r->switch_id; + break; + default: + DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr); + ret = 0; + break; + } + return ret; +} + +static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n", + rocker_reg_name(opaque, addr), addr, size); + + switch (size) { + case 4: + return rocker_io_readl(opaque, addr); + case 8: + return rocker_io_readq(opaque, addr); + } + + return -1; +} + +static const MemoryRegionOps rocker_mmio_ops = { + .read = rocker_mmio_read, + .write = rocker_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static void rocker_msix_vectors_unuse(Rocker *r, + unsigned int num_vectors) +{ + PCIDevice *dev = PCI_DEVICE(r); + int i; + + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(dev, i); + } +} + +static int rocker_msix_vectors_use(Rocker *r, + unsigned int num_vectors) +{ + PCIDevice *dev = PCI_DEVICE(r); + int err; + int i; + + for (i = 0; i < num_vectors; i++) { + err = msix_vector_use(dev, i); + if (err) { + goto rollback; + } + } + return 0; + +rollback: + rocker_msix_vectors_unuse(r, i); + return err; +} + +static int rocker_msix_init(Rocker *r) +{ + PCIDevice *dev = PCI_DEVICE(r); + int err; + + err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports), + &r->msix_bar, + ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET, + &r->msix_bar, + ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET, + 0); + if (err) { + return err; + } + + err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); + if (err) { + goto err_msix_vectors_use; + } + + return 0; + +err_msix_vectors_use: + msix_uninit(dev, &r->msix_bar, &r->msix_bar); + return err; +} + +static void rocker_msix_uninit(Rocker *r) +{ + PCIDevice *dev = PCI_DEVICE(r); + + msix_uninit(dev, &r->msix_bar, &r->msix_bar); + rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); +} + +static int pci_rocker_init(PCIDevice *dev) +{ + Rocker *r = to_rocker(dev); + const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; + const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } }; + static int sw_index; + int i, err = 0; + + /* allocate worlds */ + + r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r); + r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (!r->worlds[i]) { + goto err_world_alloc; + } + } + + /* set up memory-mapped region at BAR0 */ + + memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r, + "rocker-mmio", ROCKER_PCI_BAR0_SIZE); + pci_register_bar(dev, ROCKER_PCI_BAR0_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio); + + /* set up memory-mapped region for MSI-X */ + + memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar", + ROCKER_PCI_MSIX_BAR_SIZE); + pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar); + + /* MSI-X init */ + + err = rocker_msix_init(r); + if (err) { + goto err_msix_init; + } + + /* validate switch properties */ + + if (!r->name) { + r->name = g_strdup(ROCKER); + } + + if (rocker_find(r->name)) { + err = -EEXIST; + goto err_duplicate; + } + + /* Rocker name is passed in port name requests to OS with the intention + * that the name is used in interface names. Limit the length of the + * rocker name to avoid naming problems in the OS. Also, adding the + * port number as p# and unganged breakout b#, where # is at most 2 + * digits, so leave room for it too (-1 for string terminator, -3 for + * p# and -3 for b#) + */ +#define ROCKER_IFNAMSIZ 16 +#define MAX_ROCKER_NAME_LEN (ROCKER_IFNAMSIZ - 1 - 3 - 3) + if (strlen(r->name) > MAX_ROCKER_NAME_LEN) { + fprintf(stderr, + "rocker: name too long; please shorten to at most %d chars\n", + MAX_ROCKER_NAME_LEN); + return -EINVAL; + } + + if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) { + memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt)); + r->fp_start_macaddr.a[4] += (sw_index++); + } + + if (!r->switch_id) { + memcpy(&r->switch_id, &r->fp_start_macaddr, + sizeof(r->fp_start_macaddr)); + } + + if (r->fp_ports > ROCKER_FP_PORTS_MAX) { + r->fp_ports = ROCKER_FP_PORTS_MAX; + } + + r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r)); + if (!r->rings) { + goto err_rings_alloc; + } + + /* Rings are ordered like this: + * - command ring + * - event ring + * - port0 tx ring + * - port0 rx ring + * - port1 tx ring + * - port1 rx ring + * ..... + */ + + err = -ENOMEM; + for (i = 0; i < rocker_pci_ring_count(r); i++) { + DescRing *ring = desc_ring_alloc(r, i); + + if (!ring) { + goto err_ring_alloc; + } + + if (i == ROCKER_RING_CMD) { + desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD); + } else if (i == ROCKER_RING_EVENT) { + desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT); + } else if (i % 2 == 0) { + desc_ring_set_consume(ring, tx_consume, + ROCKER_MSIX_VEC_TX((i - 2) / 2)); + } else if (i % 2 == 1) { + desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2)); + } + + r->rings[i] = ring; + } + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = + fp_port_alloc(r, r->name, &r->fp_start_macaddr, + i, &r->fp_ports_peers[i]); + + if (!port) { + goto err_port_alloc; + } + + r->fp_port[i] = port; + fp_port_set_world(port, r->world_dflt); + } + + QLIST_INSERT_HEAD(&rockers, r, next); + + return 0; + +err_port_alloc: + for (--i; i >= 0; i--) { + FpPort *port = r->fp_port[i]; + fp_port_free(port); + } + i = rocker_pci_ring_count(r); +err_ring_alloc: + for (--i; i >= 0; i--) { + desc_ring_free(r->rings[i]); + } + g_free(r->rings); +err_rings_alloc: +err_duplicate: + rocker_msix_uninit(r); +err_msix_init: + object_unparent(OBJECT(&r->msix_bar)); + object_unparent(OBJECT(&r->mmio)); +err_world_alloc: + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_free(r->worlds[i]); + } + } + return err; +} + +static void pci_rocker_uninit(PCIDevice *dev) +{ + Rocker *r = to_rocker(dev); + int i; + + QLIST_REMOVE(r, next); + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + fp_port_free(port); + r->fp_port[i] = NULL; + } + + for (i = 0; i < rocker_pci_ring_count(r); i++) { + if (r->rings[i]) { + desc_ring_free(r->rings[i]); + } + } + g_free(r->rings); + + rocker_msix_uninit(r); + object_unparent(OBJECT(&r->msix_bar)); + object_unparent(OBJECT(&r->mmio)); + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_free(r->worlds[i]); + } + } + g_free(r->fp_ports_peers); +} + +static void rocker_reset(DeviceState *dev) +{ + Rocker *r = to_rocker(dev); + int i; + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_reset(r->worlds[i]); + } + } + for (i = 0; i < r->fp_ports; i++) { + fp_port_reset(r->fp_port[i]); + fp_port_set_world(r->fp_port[i], r->world_dflt); + } + + r->test_reg = 0; + r->test_reg64 = 0; + r->test_dma_addr = 0; + r->test_dma_size = 0; + + for (i = 0; i < rocker_pci_ring_count(r); i++) { + desc_ring_reset(r->rings[i]); + } + + DPRINTF("Reset done\n"); +} + +static Property rocker_properties[] = { + DEFINE_PROP_STRING("name", Rocker, name), + DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker, + fp_start_macaddr), + DEFINE_PROP_UINT64("switch_id", Rocker, + switch_id, 0), + DEFINE_PROP_ARRAY("ports", Rocker, fp_ports, + fp_ports_peers, qdev_prop_netdev, NICPeers), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription rocker_vmsd = { + .name = ROCKER, + .unmigratable = 1, +}; + +static void rocker_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = pci_rocker_init; + k->exit = pci_rocker_uninit; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER; + k->revision = ROCKER_PCI_REVISION; + k->class_id = PCI_CLASS_NETWORK_OTHER; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->desc = "Rocker Switch"; + dc->reset = rocker_reset; + dc->props = rocker_properties; + dc->vmsd = &rocker_vmsd; +} + +static const TypeInfo rocker_info = { + .name = ROCKER, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(Rocker), + .class_init = rocker_class_init, +}; + +static void rocker_register_types(void) +{ + type_register_static(&rocker_info); +} + +type_init(rocker_register_types) diff --git a/qemu/hw/net/rocker/rocker.h b/qemu/hw/net/rocker/rocker.h new file mode 100644 index 000000000..f9c80f801 --- /dev/null +++ b/qemu/hw/net/rocker/rocker.h @@ -0,0 +1,84 @@ +/* + * QEMU rocker switch emulation + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2014 Neil Horman <nhorman@tuxdriver.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_H_ +#define _ROCKER_H_ + +#include "qemu/sockets.h" + +#if defined(DEBUG_ROCKER) +# define DPRINTF(fmt, ...) \ + do { \ + struct timeval tv; \ + char timestr[64]; \ + time_t now; \ + gettimeofday(&tv, NULL); \ + now = tv.tv_sec; \ + strftime(timestr, sizeof(timestr), "%T", localtime(&now)); \ + fprintf(stderr, "%s.%06ld ", timestr, tv.tv_usec); \ + fprintf(stderr, "ROCKER: " fmt, ## __VA_ARGS__); \ + } while (0) +#else +static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...) +{ + return 0; +} +#endif + +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +#define __be16 uint16_t +#define __be32 uint32_t +#define __be64 uint64_t + +static inline bool ipv4_addr_is_multicast(__be32 addr) +{ + return (addr & htonl(0xf0000000)) == htonl(0xe0000000); +} + +typedef struct ipv6_addr { + union { + uint8_t addr8[16]; + __be16 addr16[8]; + __be32 addr32[4]; + }; +} Ipv6Addr; + +static inline bool ipv6_addr_is_multicast(const Ipv6Addr *addr) +{ + return (addr->addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); +} + +typedef struct rocker Rocker; +typedef struct world World; +typedef struct desc_info DescInfo; +typedef struct desc_ring DescRing; + +Rocker *rocker_find(const char *name); +uint32_t rocker_fp_ports(Rocker *r); +int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); +int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, + uint16_t vlan_id); +int rx_produce(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu); +int rocker_port_eg(Rocker *r, uint32_t pport, + const struct iovec *iov, int iovcnt); + +#endif /* _ROCKER_H_ */ diff --git a/qemu/hw/net/rocker/rocker_desc.c b/qemu/hw/net/rocker/rocker_desc.c new file mode 100644 index 000000000..9d896fe47 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_desc.c @@ -0,0 +1,377 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/net.h" +#include "hw/hw.h" +#include "hw/pci/pci.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_desc.h" + +struct desc_ring { + hwaddr base_addr; + uint32_t size; + uint32_t head; + uint32_t tail; + uint32_t ctrl; + uint32_t credits; + Rocker *r; + DescInfo *info; + int index; + desc_ring_consume *consume; + unsigned msix_vector; +}; + +struct desc_info { + DescRing *ring; + RockerDesc desc; + char *buf; + size_t buf_size; +}; + +uint16_t desc_buf_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.buf_size); +} + +uint16_t desc_tlv_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.tlv_size); +} + +char *desc_get_buf(DescInfo *info, bool read_only) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + size_t size = read_only ? le16_to_cpu(info->desc.tlv_size) : + le16_to_cpu(info->desc.buf_size); + + if (size > info->buf_size) { + info->buf = g_realloc(info->buf, size); + info->buf_size = size; + } + + if (!info->buf) { + return NULL; + } + + if (pci_dma_read(dev, le64_to_cpu(info->desc.buf_addr), info->buf, size)) { + return NULL; + } + + return info->buf; +} + +int desc_set_buf(DescInfo *info, size_t tlv_size) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + + if (tlv_size > info->buf_size) { + DPRINTF("ERROR: trying to write more to desc buf than it " + "can hold buf_size %zu tlv_size %zu\n", + info->buf_size, tlv_size); + return -ROCKER_EMSGSIZE; + } + + info->desc.tlv_size = cpu_to_le16(tlv_size); + pci_dma_write(dev, le64_to_cpu(info->desc.buf_addr), info->buf, tlv_size); + + return ROCKER_OK; +} + +DescRing *desc_get_ring(DescInfo *info) +{ + return info->ring; +} + +int desc_ring_index(DescRing *ring) +{ + return ring->index; +} + +static bool desc_ring_empty(DescRing *ring) +{ + return ring->head == ring->tail; +} + +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr) +{ + if (base_addr & 0x7) { + DPRINTF("ERROR: ring[%d] desc base addr (0x" TARGET_FMT_plx + ") not 8-byte aligned\n", ring->index, base_addr); + return false; + } + + ring->base_addr = base_addr; + + return true; +} + +uint64_t desc_ring_get_base_addr(DescRing *ring) +{ + return ring->base_addr; +} + +bool desc_ring_set_size(DescRing *ring, uint32_t size) +{ + int i; + + if (size < 2 || size > 0x10000 || (size & (size - 1))) { + DPRINTF("ERROR: ring[%d] size (%d) not a power of 2 " + "or in range [2, 64K]\n", ring->index, size); + return false; + } + + for (i = 0; i < ring->size; i++) { + if (ring->info[i].buf) { + g_free(ring->info[i].buf); + } + } + + ring->size = size; + ring->head = ring->tail = 0; + + ring->info = g_realloc(ring->info, size * sizeof(DescInfo)); + if (!ring->info) { + return false; + } + + memset(ring->info, 0, size * sizeof(DescInfo)); + + for (i = 0; i < size; i++) { + ring->info[i].ring = ring; + } + + return true; +} + +uint32_t desc_ring_get_size(DescRing *ring) +{ + return ring->size; +} + +static DescInfo *desc_read(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_read(dev, addr, &info->desc, sizeof(info->desc)); + + return info; +} + +static void desc_write(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_write(dev, addr, &info->desc, sizeof(info->desc)); +} + +static bool desc_ring_base_addr_check(DescRing *ring) +{ + if (!ring->base_addr) { + DPRINTF("ERROR: ring[%d] not-initialized desc base address!\n", + ring->index); + return false; + } + return true; +} + +static DescInfo *__desc_ring_fetch_desc(DescRing *ring) +{ + return desc_read(ring, ring->tail); +} + +DescInfo *desc_ring_fetch_desc(DescRing *ring) +{ + if (desc_ring_empty(ring) || !desc_ring_base_addr_check(ring)) { + return NULL; + } + + return desc_read(ring, ring->tail); +} + +static bool __desc_ring_post_desc(DescRing *ring, int err) +{ + uint16_t comp_err = 0x8000 | (uint16_t)-err; + DescInfo *info = &ring->info[ring->tail]; + + info->desc.comp_err = cpu_to_le16(comp_err); + desc_write(ring, ring->tail); + ring->tail = (ring->tail + 1) % ring->size; + + /* return true if starting credit count */ + + return ring->credits++ == 0; +} + +bool desc_ring_post_desc(DescRing *ring, int err) +{ + if (desc_ring_empty(ring)) { + DPRINTF("ERROR: ring[%d] trying to post desc to empty ring\n", + ring->index); + return false; + } + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + return __desc_ring_post_desc(ring, err); +} + +static bool ring_pump(DescRing *ring) +{ + DescInfo *info; + bool primed = false; + int err; + + /* If the ring has a consumer, call consumer for each + * desc starting at tail and stopping when tail reaches + * head (the empty ring condition). + */ + + if (ring->consume) { + while (ring->head != ring->tail) { + info = __desc_ring_fetch_desc(ring); + err = ring->consume(ring->r, info); + if (__desc_ring_post_desc(ring, err)) { + primed = true; + } + } + } + + return primed; +} + +bool desc_ring_set_head(DescRing *ring, uint32_t new) +{ + uint32_t tail = ring->tail; + uint32_t head = ring->head; + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + if (new >= ring->size) { + DPRINTF("ERROR: trying to set head (%d) past ring[%d] size (%d)\n", + new, ring->index, ring->size); + return false; + } + + if (((head < tail) && ((new >= tail) || (new < head))) || + ((head > tail) && ((new >= tail) && (new < head)))) { + DPRINTF("ERROR: trying to wrap ring[%d] " + "(head %d, tail %d, new head %d)\n", + ring->index, head, tail, new); + return false; + } + + if (new == ring->head) { + DPRINTF("WARNING: setting head (%d) to current head position\n", new); + } + + ring->head = new; + + return ring_pump(ring); +} + +uint32_t desc_ring_get_head(DescRing *ring) +{ + return ring->head; +} + +uint32_t desc_ring_get_tail(DescRing *ring) +{ + return ring->tail; +} + +void desc_ring_set_ctrl(DescRing *ring, uint32_t val) +{ + if (val & ROCKER_DMA_DESC_CTRL_RESET) { + DPRINTF("ring[%d] resetting\n", ring->index); + desc_ring_reset(ring); + } +} + +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits) +{ + if (credits > ring->credits) { + DPRINTF("ERROR: trying to return more credits (%d) " + "than are outstanding (%d)\n", credits, ring->credits); + ring->credits = 0; + return false; + } + + ring->credits -= credits; + + /* return true if credits are still outstanding */ + + return ring->credits > 0; +} + +uint32_t desc_ring_get_credits(DescRing *ring) +{ + return ring->credits; +} + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector) +{ + ring->consume = consume; + ring->msix_vector = vector; +} + +unsigned desc_ring_get_msix_vector(DescRing *ring) +{ + return ring->msix_vector; +} + +DescRing *desc_ring_alloc(Rocker *r, int index) +{ + DescRing *ring; + + ring = g_malloc0(sizeof(DescRing)); + if (!ring) { + return NULL; + } + + ring->r = r; + ring->index = index; + + return ring; +} + +void desc_ring_free(DescRing *ring) +{ + if (ring->info) { + g_free(ring->info); + } + g_free(ring); +} + +void desc_ring_reset(DescRing *ring) +{ + ring->base_addr = 0; + ring->size = 0; + ring->head = 0; + ring->tail = 0; + ring->ctrl = 0; + ring->credits = 0; +} diff --git a/qemu/hw/net/rocker/rocker_desc.h b/qemu/hw/net/rocker/rocker_desc.h new file mode 100644 index 000000000..d4041f5c4 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_desc.h @@ -0,0 +1,53 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#ifndef _ROCKER_DESC_H_ +#define _ROCKER_DESC_H_ + +#include "rocker_hw.h" + +typedef int (desc_ring_consume)(Rocker *r, DescInfo *info); + +uint16_t desc_buf_size(DescInfo *info); +uint16_t desc_tlv_size(DescInfo *info); +char *desc_get_buf(DescInfo *info, bool read_only); +int desc_set_buf(DescInfo *info, size_t tlv_size); +DescRing *desc_get_ring(DescInfo *info); + +int desc_ring_index(DescRing *ring); +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr); +uint64_t desc_ring_get_base_addr(DescRing *ring); +bool desc_ring_set_size(DescRing *ring, uint32_t size); +uint32_t desc_ring_get_size(DescRing *ring); +bool desc_ring_set_head(DescRing *ring, uint32_t new); +uint32_t desc_ring_get_head(DescRing *ring); +uint32_t desc_ring_get_tail(DescRing *ring); +void desc_ring_set_ctrl(DescRing *ring, uint32_t val); +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits); +uint32_t desc_ring_get_credits(DescRing *ring); + +DescInfo *desc_ring_fetch_desc(DescRing *ring); +bool desc_ring_post_desc(DescRing *ring, int status); + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector); +unsigned desc_ring_get_msix_vector(DescRing *ring); +DescRing *desc_ring_alloc(Rocker *r, int index); +void desc_ring_free(DescRing *ring); +void desc_ring_reset(DescRing *ring); + +#endif diff --git a/qemu/hw/net/rocker/rocker_fp.c b/qemu/hw/net/rocker/rocker_fp.c new file mode 100644 index 000000000..c693ae508 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_fp.c @@ -0,0 +1,263 @@ +/* + * QEMU rocker switch emulation - front-panel ports + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/clients.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_world.h" + +enum duplex { + DUPLEX_HALF = 0, + DUPLEX_FULL +}; + +struct fp_port { + Rocker *r; + World *world; + unsigned int index; + char *name; + uint32_t pport; + bool enabled; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + NICState *nic; + NICConf conf; +}; + +char *fp_port_get_name(FpPort *port) +{ + return port->name; +} + +bool fp_port_get_link_up(FpPort *port) +{ + return !qemu_get_queue(port->nic)->link_down; +} + +void fp_port_get_info(FpPort *port, RockerPortList *info) +{ + info->value->name = g_strdup(port->name); + info->value->enabled = port->enabled; + info->value->link_up = fp_port_get_link_up(port); + info->value->speed = port->speed; + info->value->duplex = port->duplex; + info->value->autoneg = port->autoneg; +} + +void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr) +{ + memcpy(macaddr->a, port->conf.macaddr.a, sizeof(macaddr->a)); +} + +void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr) +{ +/* XXX TODO implement and test setting mac addr + * XXX memcpy(port->conf.macaddr.a, macaddr.a, sizeof(port->conf.macaddr.a)); + */ +} + +uint8_t fp_port_get_learning(FpPort *port) +{ + return port->learning; +} + +void fp_port_set_learning(FpPort *port, uint8_t learning) +{ + port->learning = learning; +} + +int fp_port_get_settings(FpPort *port, uint32_t *speed, + uint8_t *duplex, uint8_t *autoneg) +{ + *speed = port->speed; + *duplex = port->duplex; + *autoneg = port->autoneg; + + return ROCKER_OK; +} + +int fp_port_set_settings(FpPort *port, uint32_t speed, + uint8_t duplex, uint8_t autoneg) +{ + /* XXX validate inputs */ + + port->speed = speed; + port->duplex = duplex; + port->autoneg = autoneg; + + return ROCKER_OK; +} + +bool fp_port_from_pport(uint32_t pport, uint32_t *port) +{ + if (pport < 1 || pport > ROCKER_FP_PORTS_MAX) { + return false; + } + *port = pport - 1; + return true; +} + +int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) +{ + NetClientState *nc = qemu_get_queue(port->nic); + + if (port->enabled) { + qemu_sendv_packet(nc, iov, iovcnt); + } + + return ROCKER_OK; +} + +static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, + int iovcnt) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + /* If the port is disabled, we want to drop this pkt + * now rather than queing it for later. We don't want + * any stale pkts getting into the device when the port + * transitions to enabled. + */ + + if (!port->enabled) { + return -1; + } + + return world_ingress(port->world, port->pport, iov, iovcnt); +} + +static ssize_t fp_port_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return fp_port_receive_iov(nc, &iov, 1); +} + +static void fp_port_cleanup(NetClientState *nc) +{ +} + +static void fp_port_set_link_status(NetClientState *nc) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + rocker_event_link_changed(port->r, port->pport, !nc->link_down); +} + +static NetClientInfo fp_port_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = fp_port_receive, + .receive_iov = fp_port_receive_iov, + .cleanup = fp_port_cleanup, + .link_status_changed = fp_port_set_link_status, +}; + +World *fp_port_get_world(FpPort *port) +{ + return port->world; +} + +void fp_port_set_world(FpPort *port, World *world) +{ + DPRINTF("port %d setting world \"%s\"\n", port->index, world_name(world)); + port->world = world; +} + +bool fp_port_enabled(FpPort *port) +{ + return port->enabled; +} + +static void fp_port_set_link(FpPort *port, bool up) +{ + NetClientState *nc = qemu_get_queue(port->nic); + + if (up == nc->link_down) { + nc->link_down = !up; + nc->info->link_status_changed(nc); + } +} + +void fp_port_enable(FpPort *port) +{ + fp_port_set_link(port, true); + port->enabled = true; + DPRINTF("port %d enabled\n", port->index); +} + +void fp_port_disable(FpPort *port) +{ + port->enabled = false; + fp_port_set_link(port, false); + DPRINTF("port %d disabled\n", port->index); +} + +FpPort *fp_port_alloc(Rocker *r, char *sw_name, + MACAddr *start_mac, unsigned int index, + NICPeers *peers) +{ + FpPort *port = g_malloc0(sizeof(FpPort)); + + if (!port) { + return NULL; + } + + port->r = r; + port->index = index; + port->pport = index + 1; + + /* front-panel switch port names are 1-based */ + + port->name = g_strdup_printf("%sp%d", sw_name, port->pport); + + memcpy(port->conf.macaddr.a, start_mac, sizeof(port->conf.macaddr.a)); + port->conf.macaddr.a[5] += index; + port->conf.bootindex = -1; + port->conf.peers = *peers; + + port->nic = qemu_new_nic(&fp_port_info, &port->conf, + sw_name, NULL, port); + qemu_format_nic_info_str(qemu_get_queue(port->nic), + port->conf.macaddr.a); + + fp_port_reset(port); + + return port; +} + +void fp_port_free(FpPort *port) +{ + qemu_del_nic(port->nic); + g_free(port->name); + g_free(port); +} + +void fp_port_reset(FpPort *port) +{ + fp_port_disable(port); + port->speed = 10000; /* 10Gbps */ + port->duplex = DUPLEX_FULL; + port->autoneg = 0; +} diff --git a/qemu/hw/net/rocker/rocker_fp.h b/qemu/hw/net/rocker/rocker_fp.h new file mode 100644 index 000000000..ab80fd833 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_fp.h @@ -0,0 +1,53 @@ +/* + * QEMU rocker switch emulation - front-panel ports + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_FP_H_ +#define _ROCKER_FP_H_ + +#include "net/net.h" +#include "qemu/iov.h" + +#define ROCKER_FP_PORTS_MAX 62 + +typedef struct fp_port FpPort; + +int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt); + +char *fp_port_get_name(FpPort *port); +bool fp_port_get_link_up(FpPort *port); +void fp_port_get_info(FpPort *port, RockerPortList *info); +void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr); +void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr); +uint8_t fp_port_get_learning(FpPort *port); +void fp_port_set_learning(FpPort *port, uint8_t learning); +int fp_port_get_settings(FpPort *port, uint32_t *speed, + uint8_t *duplex, uint8_t *autoneg); +int fp_port_set_settings(FpPort *port, uint32_t speed, + uint8_t duplex, uint8_t autoneg); +bool fp_port_from_pport(uint32_t pport, uint32_t *port); +World *fp_port_get_world(FpPort *port); +void fp_port_set_world(FpPort *port, World *world); +bool fp_port_enabled(FpPort *port); +void fp_port_enable(FpPort *port); +void fp_port_disable(FpPort *port); + +FpPort *fp_port_alloc(Rocker *r, char *sw_name, + MACAddr *start_mac, unsigned int index, + NICPeers *peers); +void fp_port_free(FpPort *port); +void fp_port_reset(FpPort *port); + +#endif /* _ROCKER_FP_H_ */ diff --git a/qemu/hw/net/rocker/rocker_hw.h b/qemu/hw/net/rocker/rocker_hw.h new file mode 100644 index 000000000..8c5083032 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_hw.h @@ -0,0 +1,493 @@ +/* + * Rocker switch hardware register and descriptor definitions. + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * + */ + +#ifndef _ROCKER_HW_ +#define _ROCKER_HW_ + +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +/* + * Return codes + */ + +enum { + ROCKER_OK = 0, + ROCKER_ENOENT = 2, + ROCKER_ENXIO = 6, + ROCKER_ENOMEM = 12, + ROCKER_EEXIST = 17, + ROCKER_EINVAL = 22, + ROCKER_EMSGSIZE = 90, + ROCKER_ENOTSUP = 95, + ROCKER_ENOBUFS = 105, +}; + +/* + * PCI configuration space + */ + +#define ROCKER_PCI_REVISION 0x1 +#define ROCKER_PCI_BAR0_IDX 0 +#define ROCKER_PCI_BAR0_SIZE 0x2000 +#define ROCKER_PCI_MSIX_BAR_IDX 1 +#define ROCKER_PCI_MSIX_BAR_SIZE 0x2000 +#define ROCKER_PCI_MSIX_TABLE_OFFSET 0x0000 +#define ROCKER_PCI_MSIX_PBA_OFFSET 0x1000 + +/* + * MSI-X vectors + */ + +enum { + ROCKER_MSIX_VEC_CMD, + ROCKER_MSIX_VEC_EVENT, + ROCKER_MSIX_VEC_TEST, + ROCKER_MSIX_VEC_RESERVED0, + __ROCKER_MSIX_VEC_TX, + __ROCKER_MSIX_VEC_RX, +#define ROCKER_MSIX_VEC_TX(port) \ + (__ROCKER_MSIX_VEC_TX + ((port) * 2)) +#define ROCKER_MSIX_VEC_RX(port) \ + (__ROCKER_MSIX_VEC_RX + ((port) * 2)) +#define ROCKER_MSIX_VEC_COUNT(portcnt) \ + (ROCKER_MSIX_VEC_RX((portcnt) - 1) + 1) +}; + +/* + * Rocker bogus registers + */ +#define ROCKER_BOGUS_REG0 0x0000 +#define ROCKER_BOGUS_REG1 0x0004 +#define ROCKER_BOGUS_REG2 0x0008 +#define ROCKER_BOGUS_REG3 0x000c + +/* + * Rocker test registers + */ +#define ROCKER_TEST_REG 0x0010 +#define ROCKER_TEST_REG64 0x0018 /* 8-byte */ +#define ROCKER_TEST_IRQ 0x0020 +#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */ +#define ROCKER_TEST_DMA_SIZE 0x0030 +#define ROCKER_TEST_DMA_CTRL 0x0034 + +/* + * Rocker test register ctrl + */ +#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0) +#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1) +#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2) + +/* + * Rocker DMA ring register offsets + */ +#define ROCKER_DMA_DESC_BASE 0x1000 +#define ROCKER_DMA_DESC_SIZE 32 +#define ROCKER_DMA_DESC_MASK 0x1F +#define ROCKER_DMA_DESC_TOTAL_SIZE \ + (ROCKER_DMA_DESC_SIZE * 64) /* 62 ports + event + cmd */ +#define ROCKER_DMA_DESC_ADDR_OFFSET 0x00 /* 8-byte */ +#define ROCKER_DMA_DESC_SIZE_OFFSET 0x08 +#define ROCKER_DMA_DESC_HEAD_OFFSET 0x0c +#define ROCKER_DMA_DESC_TAIL_OFFSET 0x10 +#define ROCKER_DMA_DESC_CTRL_OFFSET 0x14 +#define ROCKER_DMA_DESC_CREDITS_OFFSET 0x18 +#define ROCKER_DMA_DESC_RSVD_OFFSET 0x1c + +/* + * Rocker dma ctrl register bits + */ +#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0) + +/* + * Rocker ring indices + */ +#define ROCKER_RING_CMD 0 +#define ROCKER_RING_EVENT 1 + +/* + * Helper macro to do convert a dma ring register + * to its index. Based on the fact that the register + * group stride is 32 bytes. + */ +#define ROCKER_RING_INDEX(reg) ((reg >> 5) & 0x7F) + +/* + * Rocker DMA Descriptor + */ + +typedef struct rocker_desc { + __le64 buf_addr; + uint64_t cookie; + __le16 buf_size; + __le16 tlv_size; + __le16 rsvd[5]; /* pad to 32 bytes */ + __le16 comp_err; +} __attribute__((packed, aligned(8))) RockerDesc; + +/* + * Rocker TLV type fields + */ + +typedef struct rocker_tlv { + __le32 type; + __le16 len; + __le16 rsvd; +} __attribute__((packed, aligned(8))) RockerTlv; + +/* cmd msg */ +enum { + ROCKER_TLV_CMD_UNSPEC, + ROCKER_TLV_CMD_TYPE, /* u16 */ + ROCKER_TLV_CMD_INFO, /* nest */ + + __ROCKER_TLV_CMD_MAX, + ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1, +}; + +enum { + ROCKER_TLV_CMD_TYPE_UNSPEC, + ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS, + + __ROCKER_TLV_CMD_TYPE_MAX, + ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1, +}; + +/* cmd info nested for set/get port settings */ +enum { + ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC, + ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */ + + __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + ROCKER_TLV_CMD_PORT_SETTINGS_MAX = __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1, +}; + +enum { + ROCKER_PORT_MODE_OF_DPA, +}; + +/* event msg */ +enum { + ROCKER_TLV_EVENT_UNSPEC, + ROCKER_TLV_EVENT_TYPE, /* u16 */ + ROCKER_TLV_EVENT_INFO, /* nest */ + + __ROCKER_TLV_EVENT_MAX, + ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1, +}; + +enum { + ROCKER_TLV_EVENT_TYPE_UNSPEC, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN, + + __ROCKER_TLV_EVENT_TYPE_MAX, + ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1, +}; + +/* event info nested for link changed */ +enum { + ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC, + ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, /* u32 */ + ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */ + + __ROCKER_TLV_EVENT_LINK_CHANGED_MAX, + ROCKER_TLV_EVENT_LINK_CHANGED_MAX = __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1, +}; + +/* event info nested for MAC/VLAN */ +enum { + ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC, + ROCKER_TLV_EVENT_MAC_VLAN_PPORT, /* u32 */ + ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */ + ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */ + + __ROCKER_TLV_EVENT_MAC_VLAN_MAX, + ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1, +}; + +/* Rx msg */ +enum { + ROCKER_TLV_RX_UNSPEC, + ROCKER_TLV_RX_FLAGS, /* u16, see RX_FLAGS_ */ + ROCKER_TLV_RX_CSUM, /* u16 */ + ROCKER_TLV_RX_FRAG_ADDR, /* u64 */ + ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */ + ROCKER_TLV_RX_FRAG_LEN, /* u16 */ + + __ROCKER_TLV_RX_MAX, + ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, +}; + +#define ROCKER_RX_FLAGS_IPV4 (1 << 0) +#define ROCKER_RX_FLAGS_IPV6 (1 << 1) +#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2) +#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3) +#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4) +#define ROCKER_RX_FLAGS_TCP (1 << 5) +#define ROCKER_RX_FLAGS_UDP (1 << 6) +#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8) + +/* Tx msg */ +enum { + ROCKER_TLV_TX_UNSPEC, + ROCKER_TLV_TX_OFFLOAD, /* u8, see TX_OFFLOAD_ */ + ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */ + ROCKER_TLV_TX_TSO_MSS, /* u16 */ + ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */ + ROCKER_TLV_TX_FRAGS, /* array */ + + __ROCKER_TLV_TX_MAX, + ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1, +}; + +#define ROCKER_TX_OFFLOAD_NONE 0 +#define ROCKER_TX_OFFLOAD_IP_CSUM 1 +#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2 +#define ROCKER_TX_OFFLOAD_L3_CSUM 3 +#define ROCKER_TX_OFFLOAD_TSO 4 + +#define ROCKER_TX_FRAGS_MAX 16 + +enum { + ROCKER_TLV_TX_FRAG_UNSPEC, + ROCKER_TLV_TX_FRAG, /* nest */ + + __ROCKER_TLV_TX_FRAG_MAX, + ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1, +}; + +enum { + ROCKER_TLV_TX_FRAG_ATTR_UNSPEC, + ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */ + ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */ + + __ROCKER_TLV_TX_FRAG_ATTR_MAX, + ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1, +}; + +/* + * cmd info nested for OF-DPA msgs + */ + +enum { + ROCKER_TLV_OF_DPA_UNSPEC, + ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */ + ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */ + ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */ + ROCKER_TLV_OF_DPA_COOKIE, /* u64 */ + ROCKER_TLV_OF_DPA_IN_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_IN_PPORT_MASK, /* u32 */ + ROCKER_TLV_OF_DPA_OUT_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */ + ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */ + ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */ + ROCKER_TLV_OF_DPA_TUNNEL_LPORT, /* u32 */ + ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */ + ROCKER_TLV_OF_DPA_DST_MAC, /* binary */ + ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */ + ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */ + ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */ + ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */ + ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */ + ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */ + + __ROCKER_TLV_OF_DPA_MAX, + ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1, +}; + +/* + * OF-DPA table IDs + */ + +enum rocker_of_dpa_table_id { + ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0, + ROCKER_OF_DPA_TABLE_ID_VLAN = 10, + ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20, + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30, + ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40, + ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50, + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60, +}; + +/* + * OF-DPA flow stats + */ + +enum { + ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC, + ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */ + + __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX, + ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1, +}; + +/* + * OF-DPA group types + */ + +enum rocker_of_dpa_group_type { + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0, + ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE, + ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD, + ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE, + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP, + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY, +}; + +/* + * OF-DPA group L2 overlay types + */ + +enum rocker_of_dpa_overlay_type { + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0, + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST, +}; + +/* + * OF-DPA group ID encoding + */ + +#define ROCKER_GROUP_TYPE_SHIFT 28 +#define ROCKER_GROUP_TYPE_MASK 0xf0000000 +#define ROCKER_GROUP_VLAN_ID_SHIFT 16 +#define ROCKER_GROUP_VLAN_ID_MASK 0x0fff0000 +#define ROCKER_GROUP_PORT_SHIFT 0 +#define ROCKER_GROUP_PORT_MASK 0x0000ffff +#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12 +#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000 +#define ROCKER_GROUP_SUBTYPE_SHIFT 10 +#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00 +#define ROCKER_GROUP_INDEX_SHIFT 0 +#define ROCKER_GROUP_INDEX_MASK 0x0000ffff +#define ROCKER_GROUP_INDEX_LONG_SHIFT 0 +#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff + +#define ROCKER_GROUP_TYPE_GET(group_id) \ + (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT) +#define ROCKER_GROUP_TYPE_SET(type) \ + (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK) +#define ROCKER_GROUP_VLAN_GET(group_id) \ + (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT) +#define ROCKER_GROUP_VLAN_SET(vlan_id) \ + (((vlan_id) << ROCKER_GROUP_VLAN_ID_SHIFT) & ROCKER_GROUP_VLAN_ID_MASK) +#define ROCKER_GROUP_PORT_GET(group_id) \ + (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT) +#define ROCKER_GROUP_PORT_SET(port) \ + (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK) +#define ROCKER_GROUP_INDEX_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT) +#define ROCKER_GROUP_INDEX_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK) +#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \ + ROCKER_GROUP_INDEX_LONG_SHIFT) +#define ROCKER_GROUP_INDEX_LONG_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \ + ROCKER_GROUP_INDEX_LONG_MASK) + +#define ROCKER_GROUP_NONE 0 +#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port)) +#define ROCKER_GROUP_L2_REWRITE(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) +#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L3_UNICAST(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) + +/* + * Rocker general purpose registers + */ +#define ROCKER_CONTROL 0x0300 +#define ROCKER_PORT_PHYS_COUNT 0x0304 +#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */ +#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */ +#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ + +/* + * Rocker control bits + */ +#define ROCKER_CONTROL_RESET (1 << 0) + +#endif /* _ROCKER_HW_ */ diff --git a/qemu/hw/net/rocker/rocker_of_dpa.c b/qemu/hw/net/rocker/rocker_of_dpa.c new file mode 100644 index 000000000..874fb01d6 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_of_dpa.c @@ -0,0 +1,2630 @@ +/* + * QEMU rocker switch emulation - OF-DPA flow processing support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/eth.h" +#include "qemu/iov.h" +#include "qemu/timer.h" +#include "qmp-commands.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_tlv.h" +#include "rocker_world.h" +#include "rocker_desc.h" +#include "rocker_of_dpa.h" + +static const MACAddr zero_mac = { .a = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; +static const MACAddr ff_mac = { .a = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } }; + +typedef struct of_dpa { + World *world; + GHashTable *flow_tbl; + GHashTable *group_tbl; + unsigned int flow_tbl_max_size; + unsigned int group_tbl_max_size; +} OfDpa; + +/* flow_key stolen mostly from OVS + * + * Note: fields that compare with network packet header fields + * are stored in network order (BE) to avoid per-packet field + * byte-swaps. + */ + +typedef struct of_dpa_flow_key { + uint32_t in_pport; /* ingress port */ + uint32_t tunnel_id; /* overlay tunnel id */ + uint32_t tbl_id; /* table id */ + struct { + __be16 vlan_id; /* 0 if no VLAN */ + MACAddr src; /* ethernet source address */ + MACAddr dst; /* ethernet destination address */ + __be16 type; /* ethernet frame type */ + } eth; + struct { + uint8_t proto; /* IP protocol or ARP opcode */ + uint8_t tos; /* IP ToS */ + uint8_t ttl; /* IP TTL/hop limit */ + uint8_t frag; /* one of FRAG_TYPE_* */ + } ip; + union { + struct { + struct { + __be32 src; /* IP source address */ + __be32 dst; /* IP destination address */ + } addr; + union { + struct { + __be16 src; /* TCP/UDP/SCTP source port */ + __be16 dst; /* TCP/UDP/SCTP destination port */ + __be16 flags; /* TCP flags */ + } tp; + struct { + MACAddr sha; /* ARP source hardware address */ + MACAddr tha; /* ARP target hardware address */ + } arp; + }; + } ipv4; + struct { + struct { + Ipv6Addr src; /* IPv6 source address */ + Ipv6Addr dst; /* IPv6 destination address */ + } addr; + __be32 label; /* IPv6 flow label */ + struct { + __be16 src; /* TCP/UDP/SCTP source port */ + __be16 dst; /* TCP/UDP/SCTP destination port */ + __be16 flags; /* TCP flags */ + } tp; + struct { + Ipv6Addr target; /* ND target address */ + MACAddr sll; /* ND source link layer address */ + MACAddr tll; /* ND target link layer address */ + } nd; + } ipv6; + }; + int width; /* how many uint64_t's in key? */ +} OfDpaFlowKey; + +/* Width of key which includes field 'f' in u64s, rounded up */ +#define FLOW_KEY_WIDTH(f) \ + ((offsetof(OfDpaFlowKey, f) + \ + sizeof(((OfDpaFlowKey *)0)->f) + \ + sizeof(uint64_t) - 1) / sizeof(uint64_t)) + +typedef struct of_dpa_flow_action { + uint32_t goto_tbl; + struct { + uint32_t group_id; + uint32_t tun_log_lport; + __be16 vlan_id; + } write; + struct { + __be16 new_vlan_id; + uint32_t out_pport; + uint8_t copy_to_cpu; + __be16 vlan_id; + } apply; +} OfDpaFlowAction; + +typedef struct of_dpa_flow { + uint32_t lpm; + uint32_t priority; + uint32_t hardtime; + uint32_t idletime; + uint64_t cookie; + OfDpaFlowKey key; + OfDpaFlowKey mask; + OfDpaFlowAction action; + struct { + uint64_t hits; + int64_t install_time; + int64_t refresh_time; + uint64_t rx_pkts; + uint64_t tx_pkts; + } stats; +} OfDpaFlow; + +typedef struct of_dpa_flow_pkt_fields { + uint32_t tunnel_id; + struct eth_header *ethhdr; + __be16 *h_proto; + struct vlan_header *vlanhdr; + struct ip_header *ipv4hdr; + struct ip6_header *ipv6hdr; + Ipv6Addr *ipv6_src_addr; + Ipv6Addr *ipv6_dst_addr; +} OfDpaFlowPktFields; + +typedef struct of_dpa_flow_context { + uint32_t in_pport; + uint32_t tunnel_id; + struct iovec *iov; + int iovcnt; + struct eth_header ethhdr_rewrite; + struct vlan_header vlanhdr_rewrite; + struct vlan_header vlanhdr; + OfDpa *of_dpa; + OfDpaFlowPktFields fields; + OfDpaFlowAction action_set; +} OfDpaFlowContext; + +typedef struct of_dpa_flow_match { + OfDpaFlowKey value; + OfDpaFlow *best; +} OfDpaFlowMatch; + +typedef struct of_dpa_group { + uint32_t id; + union { + struct { + uint32_t out_pport; + uint8_t pop_vlan; + } l2_interface; + struct { + uint32_t group_id; + MACAddr src_mac; + MACAddr dst_mac; + __be16 vlan_id; + } l2_rewrite; + struct { + uint16_t group_count; + uint32_t *group_ids; + } l2_flood; + struct { + uint32_t group_id; + MACAddr src_mac; + MACAddr dst_mac; + __be16 vlan_id; + uint8_t ttl_check; + } l3_unicast; + }; +} OfDpaGroup; + +static int of_dpa_mask2prefix(__be32 mask) +{ + int i; + int count = 32; + + for (i = 0; i < 32; i++) { + if (!(ntohl(mask) & ((2 << i) - 1))) { + count--; + } + } + + return count; +} + +#if defined(DEBUG_ROCKER) +static void of_dpa_flow_key_dump(OfDpaFlowKey *key, OfDpaFlowKey *mask) +{ + char buf[512], *b = buf, *mac; + + b += sprintf(b, " tbl %2d", key->tbl_id); + + if (key->in_pport || (mask && mask->in_pport)) { + b += sprintf(b, " in_pport %2d", key->in_pport); + if (mask && mask->in_pport != 0xffffffff) { + b += sprintf(b, "/0x%08x", key->in_pport); + } + } + + if (key->tunnel_id || (mask && mask->tunnel_id)) { + b += sprintf(b, " tun %8d", key->tunnel_id); + if (mask && mask->tunnel_id != 0xffffffff) { + b += sprintf(b, "/0x%08x", key->tunnel_id); + } + } + + if (key->eth.vlan_id || (mask && mask->eth.vlan_id)) { + b += sprintf(b, " vlan %4d", ntohs(key->eth.vlan_id)); + if (mask && mask->eth.vlan_id != 0xffff) { + b += sprintf(b, "/0x%04x", ntohs(key->eth.vlan_id)); + } + } + + if (memcmp(key->eth.src.a, zero_mac.a, ETH_ALEN) || + (mask && memcmp(mask->eth.src.a, zero_mac.a, ETH_ALEN))) { + mac = qemu_mac_strdup_printf(key->eth.src.a); + b += sprintf(b, " src %s", mac); + g_free(mac); + if (mask && memcmp(mask->eth.src.a, ff_mac.a, ETH_ALEN)) { + mac = qemu_mac_strdup_printf(mask->eth.src.a); + b += sprintf(b, "/%s", mac); + g_free(mac); + } + } + + if (memcmp(key->eth.dst.a, zero_mac.a, ETH_ALEN) || + (mask && memcmp(mask->eth.dst.a, zero_mac.a, ETH_ALEN))) { + mac = qemu_mac_strdup_printf(key->eth.dst.a); + b += sprintf(b, " dst %s", mac); + g_free(mac); + if (mask && memcmp(mask->eth.dst.a, ff_mac.a, ETH_ALEN)) { + mac = qemu_mac_strdup_printf(mask->eth.dst.a); + b += sprintf(b, "/%s", mac); + g_free(mac); + } + } + + if (key->eth.type || (mask && mask->eth.type)) { + b += sprintf(b, " type 0x%04x", ntohs(key->eth.type)); + if (mask && mask->eth.type != 0xffff) { + b += sprintf(b, "/0x%04x", ntohs(mask->eth.type)); + } + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + if (key->ip.proto || (mask && mask->ip.proto)) { + b += sprintf(b, " ip proto %2d", key->ip.proto); + if (mask && mask->ip.proto != 0xff) { + b += sprintf(b, "/0x%02x", mask->ip.proto); + } + } + if (key->ip.tos || (mask && mask->ip.tos)) { + b += sprintf(b, " ip tos %2d", key->ip.tos); + if (mask && mask->ip.tos != 0xff) { + b += sprintf(b, "/0x%02x", mask->ip.tos); + } + } + break; + } + switch (ntohs(key->eth.type)) { + case 0x0800: + if (key->ipv4.addr.dst || (mask && mask->ipv4.addr.dst)) { + b += sprintf(b, " dst %s", + inet_ntoa(*(struct in_addr *)&key->ipv4.addr.dst)); + if (mask) { + b += sprintf(b, "/%d", + of_dpa_mask2prefix(mask->ipv4.addr.dst)); + } + } + break; + } + } + + DPRINTF("%s\n", buf); +} +#else +#define of_dpa_flow_key_dump(k, m) +#endif + +static void _of_dpa_flow_match(void *key, void *value, void *user_data) +{ + OfDpaFlow *flow = value; + OfDpaFlowMatch *match = user_data; + uint64_t *k = (uint64_t *)&flow->key; + uint64_t *m = (uint64_t *)&flow->mask; + uint64_t *v = (uint64_t *)&match->value; + int i; + + if (flow->key.tbl_id == match->value.tbl_id) { + of_dpa_flow_key_dump(&flow->key, &flow->mask); + } + + if (flow->key.width > match->value.width) { + return; + } + + for (i = 0; i < flow->key.width; i++, k++, m++, v++) { + if ((~*k & *m & *v) | (*k & *m & ~*v)) { + return; + } + } + + DPRINTF("match\n"); + + if (!match->best || + flow->priority > match->best->priority || + flow->lpm > match->best->lpm) { + match->best = flow; + } +} + +static OfDpaFlow *of_dpa_flow_match(OfDpa *of_dpa, OfDpaFlowMatch *match) +{ + DPRINTF("\nnew search\n"); + of_dpa_flow_key_dump(&match->value, NULL); + + g_hash_table_foreach(of_dpa->flow_tbl, _of_dpa_flow_match, match); + + return match->best; +} + +static OfDpaFlow *of_dpa_flow_find(OfDpa *of_dpa, uint64_t cookie) +{ + return g_hash_table_lookup(of_dpa->flow_tbl, &cookie); +} + +static int of_dpa_flow_add(OfDpa *of_dpa, OfDpaFlow *flow) +{ + g_hash_table_insert(of_dpa->flow_tbl, &flow->cookie, flow); + + return ROCKER_OK; +} + +static void of_dpa_flow_del(OfDpa *of_dpa, OfDpaFlow *flow) +{ + g_hash_table_remove(of_dpa->flow_tbl, &flow->cookie); +} + +static OfDpaFlow *of_dpa_flow_alloc(uint64_t cookie) +{ + OfDpaFlow *flow; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + + flow = g_malloc0(sizeof(OfDpaFlow)); + if (!flow) { + return NULL; + } + + flow->cookie = cookie; + flow->mask.tbl_id = 0xffffffff; + + flow->stats.install_time = flow->stats.refresh_time = now; + + return flow; +} + +static void of_dpa_flow_pkt_hdr_reset(OfDpaFlowContext *fc) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + fc->iov[0].iov_base = fields->ethhdr; + fc->iov[0].iov_len = sizeof(struct eth_header); + fc->iov[1].iov_base = fields->vlanhdr; + fc->iov[1].iov_len = fields->vlanhdr ? sizeof(struct vlan_header) : 0; +} + +static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, + const struct iovec *iov, int iovcnt) +{ + OfDpaFlowPktFields *fields = &fc->fields; + size_t sofar = 0; + int i; + + sofar += sizeof(struct eth_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on eth_header\n"); + return; + } + + fields->ethhdr = iov->iov_base; + fields->h_proto = &fields->ethhdr->h_proto; + + if (ntohs(*fields->h_proto) == ETH_P_VLAN) { + sofar += sizeof(struct vlan_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on vlan_header\n"); + return; + } + fields->vlanhdr = (struct vlan_header *)(fields->ethhdr + 1); + fields->h_proto = &fields->vlanhdr->h_proto; + } + + switch (ntohs(*fields->h_proto)) { + case ETH_P_IP: + sofar += sizeof(struct ip_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on ip_header\n"); + return; + } + fields->ipv4hdr = (struct ip_header *)(fields->h_proto + 1); + break; + case ETH_P_IPV6: + sofar += sizeof(struct ip6_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on ip6_header\n"); + return; + } + fields->ipv6hdr = (struct ip6_header *)(fields->h_proto + 1); + break; + } + + /* To facilitate (potential) VLAN tag insertion, Make a + * copy of the iov and insert two new vectors at the + * beginning for eth hdr and vlan hdr. No data is copied, + * just the vectors. + */ + + of_dpa_flow_pkt_hdr_reset(fc); + + fc->iov[2].iov_base = fields->h_proto + 1; + fc->iov[2].iov_len = iov->iov_len - fc->iov[0].iov_len - fc->iov[1].iov_len; + + for (i = 1; i < iovcnt; i++) { + fc->iov[i+2] = iov[i]; + } + + fc->iovcnt = iovcnt + 2; +} + +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +{ + OfDpaFlowPktFields *fields = &fc->fields; + uint16_t h_proto = fields->ethhdr->h_proto; + + if (fields->vlanhdr) { + DPRINTF("flow_pkt_insert_vlan packet already has vlan\n"); + return; + } + + fields->ethhdr->h_proto = htons(ETH_P_VLAN); + fields->vlanhdr = &fc->vlanhdr; + fields->vlanhdr->h_tci = vlan_id; + fields->vlanhdr->h_proto = h_proto; + fields->h_proto = &fields->vlanhdr->h_proto; + + fc->iov[1].iov_base = fields->vlanhdr; + fc->iov[1].iov_len = sizeof(struct vlan_header); +} + +static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + if (!fields->vlanhdr) { + return; + } + + fc->iov[0].iov_len -= sizeof(fields->ethhdr->h_proto); + fc->iov[1].iov_base = fields->h_proto; + fc->iov[1].iov_len = sizeof(fields->ethhdr->h_proto); +} + +static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, + uint8_t *src_mac, uint8_t *dst_mac, + __be16 vlan_id) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + if (src_mac || dst_mac) { + memcpy(&fc->ethhdr_rewrite, fields->ethhdr, sizeof(struct eth_header)); + if (src_mac && memcmp(src_mac, zero_mac.a, ETH_ALEN)) { + memcpy(fc->ethhdr_rewrite.h_source, src_mac, ETH_ALEN); + } + if (dst_mac && memcmp(dst_mac, zero_mac.a, ETH_ALEN)) { + memcpy(fc->ethhdr_rewrite.h_dest, dst_mac, ETH_ALEN); + } + fc->iov[0].iov_base = &fc->ethhdr_rewrite; + } + + if (vlan_id && fields->vlanhdr) { + fc->vlanhdr_rewrite = fc->vlanhdr; + fc->vlanhdr_rewrite.h_tci = vlan_id; + fc->iov[1].iov_base = &fc->vlanhdr_rewrite; + } +} + +static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id); + +static void of_dpa_ig_port_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + match->value.in_pport = fc->in_pport; + match->value.width = FLOW_KEY_WIDTH(tbl_id); +} + +static void of_dpa_ig_port_miss(OfDpaFlowContext *fc) +{ + uint32_t port; + + /* The default on miss is for packets from physical ports + * to go to the VLAN Flow Table. There is no default rule + * for packets from logical ports, which are dropped on miss. + */ + + if (fp_port_from_pport(fc->in_pport, &port)) { + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_VLAN); + } +} + +static void of_dpa_vlan_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + match->value.in_pport = fc->in_pport; + if (fc->fields.vlanhdr) { + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + } + match->value.width = FLOW_KEY_WIDTH(eth.vlan_id); +} + +static void of_dpa_vlan_insert(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.apply.new_vlan_id) { + of_dpa_flow_pkt_insert_vlan(fc, flow->action.apply.new_vlan_id); + } +} + +static void of_dpa_term_mac_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + match->value.in_pport = fc->in_pport; + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.width = FLOW_KEY_WIDTH(eth.type); +} + +static void of_dpa_term_mac_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_BRIDGING); +} + +static void of_dpa_apply_actions(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + fc->action_set.apply.copy_to_cpu = flow->action.apply.copy_to_cpu; + fc->action_set.apply.vlan_id = flow->key.eth.vlan_id; +} + +static void of_dpa_bridging_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + if (fc->fields.vlanhdr) { + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + } else if (fc->tunnel_id) { + match->value.tunnel_id = fc->tunnel_id; + } + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.width = FLOW_KEY_WIDTH(eth.dst); +} + +static void of_dpa_bridging_learn(OfDpaFlowContext *fc, + OfDpaFlow *dst_flow) +{ + OfDpaFlowMatch match = { { 0, }, }; + OfDpaFlow *flow; + uint8_t *addr; + uint16_t vlan_id; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + int64_t refresh_delay = 1; + + /* Do a lookup in bridge table by src_mac/vlan */ + + addr = fc->fields.ethhdr->h_source; + vlan_id = fc->fields.vlanhdr->h_tci; + + match.value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + match.value.eth.vlan_id = vlan_id; + memcpy(match.value.eth.dst.a, addr, sizeof(match.value.eth.dst.a)); + match.value.width = FLOW_KEY_WIDTH(eth.dst); + + flow = of_dpa_flow_match(fc->of_dpa, &match); + if (flow) { + if (!memcmp(flow->mask.eth.dst.a, ff_mac.a, + sizeof(flow->mask.eth.dst.a))) { + /* src_mac/vlan already learned; if in_port and out_port + * don't match, the end station has moved and the port + * needs updating */ + /* XXX implement the in_port/out_port check */ + if (now - flow->stats.refresh_time < refresh_delay) { + return; + } + flow->stats.refresh_time = now; + } + } + + /* Let driver know about mac/vlan. This may be a new mac/vlan + * or a refresh of existing mac/vlan that's been hit after the + * refresh_delay. + */ + + rocker_event_mac_vlan_seen(world_rocker(fc->of_dpa->world), + fc->in_pport, addr, vlan_id); +} + +static void of_dpa_bridging_miss(OfDpaFlowContext *fc) +{ + of_dpa_bridging_learn(fc, NULL); + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void of_dpa_bridging_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } + fc->action_set.write.tun_log_lport = flow->action.write.tun_log_lport; +} + +static void of_dpa_unicast_routing_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + match->value.eth.type = *fc->fields.h_proto; + if (fc->fields.ipv4hdr) { + match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst; + } + if (fc->fields.ipv6_dst_addr) { + memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr, + sizeof(match->value.ipv6.addr.dst)); + } + match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst); +} + +static void of_dpa_unicast_routing_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void of_dpa_unicast_routing_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } +} + +static void +of_dpa_multicast_routing_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + if (fc->fields.ipv4hdr) { + match->value.ipv4.addr.src = fc->fields.ipv4hdr->ip_src; + match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst; + } + if (fc->fields.ipv6_src_addr) { + memcpy(&match->value.ipv6.addr.src, fc->fields.ipv6_src_addr, + sizeof(match->value.ipv6.addr.src)); + } + if (fc->fields.ipv6_dst_addr) { + memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr, + sizeof(match->value.ipv6.addr.dst)); + } + match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst); +} + +static void of_dpa_multicast_routing_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void +of_dpa_multicast_routing_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } + fc->action_set.write.vlan_id = flow->action.write.vlan_id; +} + +static void of_dpa_acl_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + match->value.in_pport = fc->in_pport; + memcpy(match->value.eth.src.a, fc->fields.ethhdr->h_source, + sizeof(match->value.eth.src.a)); + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + match->value.width = FLOW_KEY_WIDTH(eth.type); + if (fc->fields.ipv4hdr) { + match->value.ip.proto = fc->fields.ipv4hdr->ip_p; + match->value.ip.tos = fc->fields.ipv4hdr->ip_tos; + match->value.width = FLOW_KEY_WIDTH(ip.tos); + } else if (fc->fields.ipv6hdr) { + match->value.ip.proto = + fc->fields.ipv6hdr->ip6_ctlun.ip6_un1.ip6_un1_nxt; + match->value.ip.tos = 0; /* XXX what goes here? */ + match->value.width = FLOW_KEY_WIDTH(ip.tos); + } +} + +static void of_dpa_eg(OfDpaFlowContext *fc); +static void of_dpa_acl_hit(OfDpaFlowContext *fc, + OfDpaFlow *dst_flow) +{ + of_dpa_eg(fc); +} + +static void of_dpa_acl_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } +} + +static void of_dpa_drop(OfDpaFlowContext *fc) +{ + /* drop packet */ +} + +static OfDpaGroup *of_dpa_group_find(OfDpa *of_dpa, + uint32_t group_id) +{ + return g_hash_table_lookup(of_dpa->group_tbl, &group_id); +} + +static int of_dpa_group_add(OfDpa *of_dpa, OfDpaGroup *group) +{ + g_hash_table_insert(of_dpa->group_tbl, &group->id, group); + + return 0; +} + +#if 0 +static int of_dpa_group_mod(OfDpa *of_dpa, OfDpaGroup *group) +{ + OfDpaGroup *old_group = of_dpa_group_find(of_dpa, group->id); + + if (!old_group) { + return -ENOENT; + } + + /* XXX */ + + return 0; +} +#endif + +static int of_dpa_group_del(OfDpa *of_dpa, OfDpaGroup *group) +{ + g_hash_table_remove(of_dpa->group_tbl, &group->id); + + return 0; +} + +#if 0 +static int of_dpa_group_get_stats(OfDpa *of_dpa, uint32_t id) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, id); + + if (!group) { + return -ENOENT; + } + + /* XXX get/return stats */ + + return 0; +} +#endif + +static OfDpaGroup *of_dpa_group_alloc(uint32_t id) +{ + OfDpaGroup *group = g_malloc0(sizeof(OfDpaGroup)); + + if (!group) { + return NULL; + } + + group->id = id; + + return group; +} + +static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu; + + if (group->l2_interface.pop_vlan) { + of_dpa_flow_pkt_strip_vlan(fc); + } + + /* Note: By default, and as per the OpenFlow 1.3.1 + * specification, a packet cannot be forwarded back + * to the IN_PORT from which it came in. An action + * bucket that specifies the particular packet's + * egress port is not evaluated. + */ + + if (group->l2_interface.out_pport == 0) { + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt, + copy_to_cpu); + } else if (group->l2_interface.out_pport != fc->in_pport) { + rocker_port_eg(world_rocker(fc->of_dpa->world), + group->l2_interface.out_pport, + fc->iov, fc->iovcnt); + } +} + +static void of_dpa_output_l2_rewrite(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + OfDpaGroup *l2_group = + of_dpa_group_find(fc->of_dpa, group->l2_rewrite.group_id); + + if (!l2_group) { + return; + } + + of_dpa_flow_pkt_hdr_rewrite(fc, group->l2_rewrite.src_mac.a, + group->l2_rewrite.dst_mac.a, + group->l2_rewrite.vlan_id); + of_dpa_output_l2_interface(fc, l2_group); +} + +static void of_dpa_output_l2_flood(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + OfDpaGroup *l2_group; + int i; + + for (i = 0; i < group->l2_flood.group_count; i++) { + of_dpa_flow_pkt_hdr_reset(fc); + l2_group = of_dpa_group_find(fc->of_dpa, group->l2_flood.group_ids[i]); + if (!l2_group) { + continue; + } + switch (ROCKER_GROUP_TYPE_GET(l2_group->id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + of_dpa_output_l2_interface(fc, l2_group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + of_dpa_output_l2_rewrite(fc, l2_group); + break; + } + } +} + +static void of_dpa_output_l3_unicast(OfDpaFlowContext *fc, OfDpaGroup *group) +{ + OfDpaGroup *l2_group = + of_dpa_group_find(fc->of_dpa, group->l3_unicast.group_id); + + if (!l2_group) { + return; + } + + of_dpa_flow_pkt_hdr_rewrite(fc, group->l3_unicast.src_mac.a, + group->l3_unicast.dst_mac.a, + group->l3_unicast.vlan_id); + /* XXX need ttl_check */ + of_dpa_output_l2_interface(fc, l2_group); +} + +static void of_dpa_eg(OfDpaFlowContext *fc) +{ + OfDpaFlowAction *set = &fc->action_set; + OfDpaGroup *group; + uint32_t group_id; + + /* send a copy of pkt to CPU (controller)? */ + + if (set->apply.copy_to_cpu) { + group_id = ROCKER_GROUP_L2_INTERFACE(set->apply.vlan_id, 0); + group = of_dpa_group_find(fc->of_dpa, group_id); + if (group) { + of_dpa_output_l2_interface(fc, group); + of_dpa_flow_pkt_hdr_reset(fc); + } + } + + /* process group write actions */ + + if (!set->write.group_id) { + return; + } + + group = of_dpa_group_find(fc->of_dpa, set->write.group_id); + if (!group) { + return; + } + + switch (ROCKER_GROUP_TYPE_GET(group->id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + of_dpa_output_l2_interface(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + of_dpa_output_l2_rewrite(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + of_dpa_output_l2_flood(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + of_dpa_output_l3_unicast(fc, group); + break; + } +} + +typedef struct of_dpa_flow_tbl_ops { + void (*build_match)(OfDpaFlowContext *fc, OfDpaFlowMatch *match); + void (*hit)(OfDpaFlowContext *fc, OfDpaFlow *flow); + void (*miss)(OfDpaFlowContext *fc); + void (*hit_no_goto)(OfDpaFlowContext *fc); + void (*action_apply)(OfDpaFlowContext *fc, OfDpaFlow *flow); + void (*action_write)(OfDpaFlowContext *fc, OfDpaFlow *flow); +} OfDpaFlowTblOps; + +static OfDpaFlowTblOps of_dpa_tbl_ops[] = { + [ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT] = { + .build_match = of_dpa_ig_port_build_match, + .miss = of_dpa_ig_port_miss, + .hit_no_goto = of_dpa_drop, + }, + [ROCKER_OF_DPA_TABLE_ID_VLAN] = { + .build_match = of_dpa_vlan_build_match, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_vlan_insert, + }, + [ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC] = { + .build_match = of_dpa_term_mac_build_match, + .miss = of_dpa_term_mac_miss, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_apply_actions, + }, + [ROCKER_OF_DPA_TABLE_ID_BRIDGING] = { + .build_match = of_dpa_bridging_build_match, + .hit = of_dpa_bridging_learn, + .miss = of_dpa_bridging_miss, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_apply_actions, + .action_write = of_dpa_bridging_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING] = { + .build_match = of_dpa_unicast_routing_build_match, + .miss = of_dpa_unicast_routing_miss, + .hit_no_goto = of_dpa_drop, + .action_write = of_dpa_unicast_routing_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING] = { + .build_match = of_dpa_multicast_routing_build_match, + .miss = of_dpa_multicast_routing_miss, + .hit_no_goto = of_dpa_drop, + .action_write = of_dpa_multicast_routing_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_ACL_POLICY] = { + .build_match = of_dpa_acl_build_match, + .hit = of_dpa_acl_hit, + .miss = of_dpa_eg, + .action_apply = of_dpa_apply_actions, + .action_write = of_dpa_acl_action_write, + }, +}; + +static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id) +{ + OfDpaFlowTblOps *ops = &of_dpa_tbl_ops[tbl_id]; + OfDpaFlowMatch match = { { 0, }, }; + OfDpaFlow *flow; + + if (ops->build_match) { + ops->build_match(fc, &match); + } else { + return; + } + + flow = of_dpa_flow_match(fc->of_dpa, &match); + if (!flow) { + if (ops->miss) { + ops->miss(fc); + } + return; + } + + flow->stats.hits++; + + if (ops->action_apply) { + ops->action_apply(fc, flow); + } + + if (ops->action_write) { + ops->action_write(fc, flow); + } + + if (ops->hit) { + ops->hit(fc, flow); + } + + if (flow->action.goto_tbl) { + of_dpa_flow_ig_tbl(fc, flow->action.goto_tbl); + } else if (ops->hit_no_goto) { + ops->hit_no_goto(fc); + } + + /* drop packet */ +} + +static ssize_t of_dpa_ig(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + struct iovec iov_copy[iovcnt + 2]; + OfDpaFlowContext fc = { + .of_dpa = world_private(world), + .in_pport = pport, + .iov = iov_copy, + .iovcnt = iovcnt + 2, + }; + + of_dpa_flow_pkt_parse(&fc, iov, iovcnt); + of_dpa_flow_ig_tbl(&fc, ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT); + + return iov_size(iov, iovcnt); +} + +#define ROCKER_TUNNEL_LPORT 0x00010000 + +static int of_dpa_cmd_add_ig_port(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + bool overlay_tunnel; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + key->width = FLOW_KEY_WIDTH(tbl_id); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) { + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + } + + overlay_tunnel = !!(key->in_pport & ROCKER_TUNNEL_LPORT); + + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + + if (!overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_VLAN) { + return -ROCKER_EINVAL; + } + + if (overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_BRIDGING) { + return -ROCKER_EINVAL; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_vlan(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + uint32_t port; + bool untagged; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + DPRINTF("Must give in_pport and vlan_id to install VLAN tbl entry\n"); + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + key->width = FLOW_KEY_WIDTH(eth.vlan_id); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (!fp_port_from_pport(key->in_pport, &port)) { + DPRINTF("in_pport (%d) not a front-panel port\n", key->in_pport); + return -ROCKER_EINVAL; + } + mask->in_pport = 0xffffffff; + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + } + + if (key->eth.vlan_id) { + untagged = false; /* filtering */ + } else { + untagged = true; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) { + DPRINTF("Goto tbl (%d) must be TERM_MAC\n", action->goto_tbl); + return -ROCKER_EINVAL; + } + } + + if (untagged) { + if (!flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]) { + DPRINTF("Must specify new vlan_id if untagged\n"); + return -ROCKER_EINVAL; + } + action->apply.new_vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]); + if (1 > ntohs(action->apply.new_vlan_id) || + ntohs(action->apply.new_vlan_id) > 4095) { + DPRINTF("New vlan_id (%d) must be between 1 and 4095\n", + ntohs(action->apply.new_vlan_id)); + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_term_mac(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + const MACAddr ipv4_mcast = { .a = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 } }; + const MACAddr ipv4_mask = { .a = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 } }; + const MACAddr ipv6_mcast = { .a = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 } }; + const MACAddr ipv6_mask = { .a = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 } }; + uint32_t port; + bool unicast = false; + bool multicast = false; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK] || + !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] || + !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC] || + !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + key->width = FLOW_KEY_WIDTH(eth.type); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (!fp_port_from_pport(key->in_pport, &port)) { + return -ROCKER_EINVAL; + } + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + if (key->eth.type != htons(0x0800) && key->eth.type != htons(0x86dd)) { + return -ROCKER_EINVAL; + } + mask->eth.type = htons(0xffff); + + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + + if ((key->eth.dst.a[0] & 0x01) == 0x00) { + unicast = true; + } + + /* only two wildcard rules are acceptable for IPv4 and IPv6 multicast */ + if (memcmp(key->eth.dst.a, ipv4_mcast.a, sizeof(key->eth.dst.a)) == 0 && + memcmp(mask->eth.dst.a, ipv4_mask.a, sizeof(mask->eth.dst.a)) == 0) { + multicast = true; + } + if (memcmp(key->eth.dst.a, ipv6_mcast.a, sizeof(key->eth.dst.a)) == 0 && + memcmp(mask->eth.dst.a, ipv6_mask.a, sizeof(mask->eth.dst.a)) == 0) { + multicast = true; + } + + if (!unicast && !multicast) { + return -ROCKER_EINVAL; + } + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) { + return -ROCKER_EINVAL; + } + + if (unicast && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) { + return -ROCKER_EINVAL; + } + + if (multicast && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_bridging(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + bool unicast = false; + bool dst_mac = false; + bool dst_mac_mask = false; + enum { + BRIDGING_MODE_UNKNOWN, + BRIDGING_MODE_VLAN_UCAST, + BRIDGING_MODE_VLAN_MCAST, + BRIDGING_MODE_VLAN_DFLT, + BRIDGING_MODE_TUNNEL_UCAST, + BRIDGING_MODE_TUNNEL_MCAST, + BRIDGING_MODE_TUNNEL_DFLT, + } mode = BRIDGING_MODE_UNKNOWN; + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + key->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + mask->eth.vlan_id = 0xffff; + key->width = FLOW_KEY_WIDTH(eth.vlan_id); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) { + key->tunnel_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]); + mask->tunnel_id = 0xffffffff; + key->width = FLOW_KEY_WIDTH(tunnel_id); + } + + /* can't do VLAN bridging and tunnel bridging at same time */ + if (key->eth.vlan_id && key->tunnel_id) { + DPRINTF("can't do VLAN bridging and tunnel bridging at same time\n"); + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + key->width = FLOW_KEY_WIDTH(eth.dst); + dst_mac = true; + unicast = (key->eth.dst.a[0] & 0x01) == 0x00; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) { + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + key->width = FLOW_KEY_WIDTH(eth.dst); + dst_mac_mask = true; + } else if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(mask->eth.dst.a, ff_mac.a, sizeof(mask->eth.dst.a)); + } + + if (key->eth.vlan_id) { + if (dst_mac && !dst_mac_mask) { + mode = unicast ? BRIDGING_MODE_VLAN_UCAST : + BRIDGING_MODE_VLAN_MCAST; + } else if ((dst_mac && dst_mac_mask) || !dst_mac) { + mode = BRIDGING_MODE_VLAN_DFLT; + } + } else if (key->tunnel_id) { + if (dst_mac && !dst_mac_mask) { + mode = unicast ? BRIDGING_MODE_TUNNEL_UCAST : + BRIDGING_MODE_TUNNEL_MCAST; + } else if ((dst_mac && dst_mac_mask) || !dst_mac) { + mode = BRIDGING_MODE_TUNNEL_DFLT; + } + } + + if (mode == BRIDGING_MODE_UNKNOWN) { + DPRINTF("Unknown bridging mode\n"); + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + DPRINTF("Briding goto tbl must be ACL policy\n"); + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + switch (mode) { + case BRIDGING_MODE_VLAN_UCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) { + DPRINTF("Bridging mode vlan ucast needs L2 " + "interface group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_VLAN_MCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) { + DPRINTF("Bridging mode vlan mcast needs L2 " + "mcast group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_VLAN_DFLT: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) { + DPRINTF("Bridging mode vlan dflt needs L2 " + "flood group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_TUNNEL_MCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) { + DPRINTF("Bridging mode tunnel mcast needs L2 " + "overlay group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_TUNNEL_DFLT: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) { + DPRINTF("Bridging mode tunnel dflt needs L2 " + "overlay group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + default: + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]) { + action->write.tun_log_lport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]); + if (mode != BRIDGING_MODE_TUNNEL_UCAST) { + DPRINTF("Have tunnel logical port but not " + "in bridging tunnel mode\n"); + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_unicast_routing(OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + UNICAST_ROUTING_MODE_UNKNOWN, + UNICAST_ROUTING_MODE_IPV4, + UNICAST_ROUTING_MODE_IPV6, + } mode = UNICAST_ROUTING_MODE_UNKNOWN; + uint8_t type; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + key->width = FLOW_KEY_WIDTH(ipv6.addr.dst); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + switch (ntohs(key->eth.type)) { + case 0x0800: + mode = UNICAST_ROUTING_MODE_IPV4; + break; + case 0x86dd: + mode = UNICAST_ROUTING_MODE_IPV6; + break; + default: + return -ROCKER_EINVAL; + } + mask->eth.type = htons(0xffff); + + switch (mode) { + case UNICAST_ROUTING_MODE_IPV4: + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) { + return -ROCKER_EINVAL; + } + key->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]); + if (ipv4_addr_is_multicast(key->ipv4.addr.dst)) { + return -ROCKER_EINVAL; + } + flow->lpm = of_dpa_mask2prefix(htonl(0xffffffff)); + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]) { + mask->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]); + flow->lpm = of_dpa_mask2prefix(mask->ipv4.addr.dst); + } + break; + case UNICAST_ROUTING_MODE_IPV6: + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) { + return -ROCKER_EINVAL; + } + memcpy(&key->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]), + sizeof(key->ipv6.addr.dst)); + if (ipv6_addr_is_multicast(&key->ipv6.addr.dst)) { + return -ROCKER_EINVAL; + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]) { + memcpy(&mask->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]), + sizeof(mask->ipv6.addr.dst)); + } + break; + default: + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + type = ROCKER_GROUP_TYPE_GET(action->write.group_id); + if (type != ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE && + type != ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST && + type != ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP) { + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_multicast_routing(OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + MULTICAST_ROUTING_MODE_UNKNOWN, + MULTICAST_ROUTING_MODE_IPV4, + MULTICAST_ROUTING_MODE_IPV6, + } mode = MULTICAST_ROUTING_MODE_UNKNOWN; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + key->width = FLOW_KEY_WIDTH(ipv6.addr.dst); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + switch (ntohs(key->eth.type)) { + case 0x0800: + mode = MULTICAST_ROUTING_MODE_IPV4; + break; + case 0x86dd: + mode = MULTICAST_ROUTING_MODE_IPV6; + break; + default: + return -ROCKER_EINVAL; + } + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + + switch (mode) { + case MULTICAST_ROUTING_MODE_IPV4: + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) { + key->ipv4.addr.src = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]) { + mask->ipv4.addr.src = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]); + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) { + if (mask->ipv4.addr.src != 0) { + return -ROCKER_EINVAL; + } + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) { + return -ROCKER_EINVAL; + } + + key->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]); + if (!ipv4_addr_is_multicast(key->ipv4.addr.dst)) { + return -ROCKER_EINVAL; + } + + break; + + case MULTICAST_ROUTING_MODE_IPV6: + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) { + memcpy(&key->ipv6.addr.src, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]), + sizeof(key->ipv6.addr.src)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]) { + memcpy(&mask->ipv6.addr.src, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]), + sizeof(mask->ipv6.addr.src)); + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) { + if (mask->ipv6.addr.src.addr32[0] != 0 && + mask->ipv6.addr.src.addr32[1] != 0 && + mask->ipv6.addr.src.addr32[2] != 0 && + mask->ipv6.addr.src.addr32[3] != 0) { + return -ROCKER_EINVAL; + } + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) { + return -ROCKER_EINVAL; + } + + memcpy(&key->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]), + sizeof(key->ipv6.addr.dst)); + if (!ipv6_addr_is_multicast(&key->ipv6.addr.dst)) { + return -ROCKER_EINVAL; + } + + break; + + default: + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST) { + return -ROCKER_EINVAL; + } + action->write.vlan_id = key->eth.vlan_id; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_acl_ip(OfDpaFlowKey *key, OfDpaFlowKey *mask, + RockerTlv **flow_tlvs) +{ + key->width = FLOW_KEY_WIDTH(ip.tos); + + key->ip.proto = 0; + key->ip.tos = 0; + mask->ip.proto = 0; + mask->ip.tos = 0; + + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]) { + key->ip.proto = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]) { + mask->ip.proto = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]) { + key->ip.tos = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]) { + mask->ip.tos = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) { + key->ip.tos |= + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) << 6; + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) { + mask->ip.tos |= + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) << 6; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_acl(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + ACL_MODE_UNKNOWN, + ACL_MODE_IPV4_VLAN, + ACL_MODE_IPV6_VLAN, + ACL_MODE_IPV4_TENANT, + ACL_MODE_IPV6_TENANT, + ACL_MODE_NON_IP_VLAN, + ACL_MODE_NON_IP_TENANT, + ACL_MODE_ANY_VLAN, + ACL_MODE_ANY_TENANT, + } mode = ACL_MODE_UNKNOWN; + int err = ROCKER_OK; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) { + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] && + flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + key->width = FLOW_KEY_WIDTH(eth.type); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) { + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(key->eth.src.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(key->eth.src.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]) { + memcpy(mask->eth.src.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]), + sizeof(mask->eth.src.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) { + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + } + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + if (key->eth.type) { + mask->eth.type = 0xffff; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + key->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + } + + switch (ntohs(key->eth.type)) { + case 0x0000: + mode = (key->eth.vlan_id) ? ACL_MODE_ANY_VLAN : ACL_MODE_ANY_TENANT; + break; + case 0x0800: + mode = (key->eth.vlan_id) ? ACL_MODE_IPV4_VLAN : ACL_MODE_IPV4_TENANT; + break; + case 0x86dd: + mode = (key->eth.vlan_id) ? ACL_MODE_IPV6_VLAN : ACL_MODE_IPV6_TENANT; + break; + default: + mode = (key->eth.vlan_id) ? ACL_MODE_NON_IP_VLAN : + ACL_MODE_NON_IP_TENANT; + break; + } + + /* XXX only supporting VLAN modes for now */ + if (mode != ACL_MODE_IPV4_VLAN && + mode != ACL_MODE_IPV6_VLAN && + mode != ACL_MODE_NON_IP_VLAN && + mode != ACL_MODE_ANY_VLAN) { + return -ROCKER_EINVAL; + } + + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + err = of_dpa_cmd_add_acl_ip(key, mask, flow_tlvs); + break; + } + + if (err) { + return err; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_flow_add_mod(OfDpa *of_dpa, OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + enum rocker_of_dpa_table_id tbl; + int err = ROCKER_OK; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID] || + !flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY] || + !flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]) { + return -ROCKER_EINVAL; + } + + tbl = rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID]); + flow->priority = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY]); + flow->hardtime = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]) { + if (tbl == ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT || + tbl == ROCKER_OF_DPA_TABLE_ID_VLAN || + tbl == ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) { + return -ROCKER_EINVAL; + } + flow->idletime = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]); + } + + switch (tbl) { + case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT: + err = of_dpa_cmd_add_ig_port(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_VLAN: + err = of_dpa_cmd_add_vlan(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC: + err = of_dpa_cmd_add_term_mac(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_BRIDGING: + err = of_dpa_cmd_add_bridging(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING: + err = of_dpa_cmd_add_unicast_routing(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING: + err = of_dpa_cmd_add_multicast_routing(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY: + err = of_dpa_cmd_add_acl(flow, flow_tlvs); + break; + } + + return err; +} + +static int of_dpa_cmd_flow_add(OfDpa *of_dpa, uint64_t cookie, + RockerTlv **flow_tlvs) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + int err = ROCKER_OK; + + if (flow) { + return -ROCKER_EEXIST; + } + + flow = of_dpa_flow_alloc(cookie); + if (!flow) { + return -ROCKER_ENOMEM; + } + + err = of_dpa_cmd_flow_add_mod(of_dpa, flow, flow_tlvs); + if (err) { + g_free(flow); + return err; + } + + return of_dpa_flow_add(of_dpa, flow); +} + +static int of_dpa_cmd_flow_mod(OfDpa *of_dpa, uint64_t cookie, + RockerTlv **flow_tlvs) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + + if (!flow) { + return -ROCKER_ENOENT; + } + + return of_dpa_cmd_flow_add_mod(of_dpa, flow, flow_tlvs); +} + +static int of_dpa_cmd_flow_del(OfDpa *of_dpa, uint64_t cookie) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + + if (!flow) { + return -ROCKER_ENOENT; + } + + of_dpa_flow_del(of_dpa, flow); + + return ROCKER_OK; +} + +static int of_dpa_cmd_flow_get_stats(OfDpa *of_dpa, uint64_t cookie, + struct desc_info *info, char *buf) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + size_t tlv_size; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + int pos; + + if (!flow) { + return -ROCKER_ENOENT; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint32_t)) + /* duration */ + rocker_tlv_total_size(sizeof(uint64_t)) + /* rx_pkts */ + rocker_tlv_total_size(sizeof(uint64_t)); /* tx_ptks */ + + if (tlv_size > desc_buf_size(info)) { + return -ROCKER_EMSGSIZE; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, + (int32_t)(now - flow->stats.install_time)); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, + flow->stats.rx_pkts); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, + flow->stats.tx_pkts); + + return desc_set_buf(info, tlv_size); +} + +static int of_dpa_flow_cmd(OfDpa *of_dpa, struct desc_info *info, + char *buf, uint16_t cmd, + RockerTlv **flow_tlvs) +{ + uint64_t cookie; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]) { + return -ROCKER_EINVAL; + } + + cookie = rocker_tlv_get_le64(flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + return of_dpa_cmd_flow_add(of_dpa, cookie, flow_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + return of_dpa_cmd_flow_mod(of_dpa, cookie, flow_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + return of_dpa_cmd_flow_del(of_dpa, cookie); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + return of_dpa_cmd_flow_get_stats(of_dpa, cookie, info, buf); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd_add_l2_interface(OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + if (!group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT] || + !group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]) { + return -ROCKER_EINVAL; + } + + group->l2_interface.out_pport = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT]); + group->l2_interface.pop_vlan = + rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]); + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_l2_rewrite(OfDpa *of_dpa, OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + OfDpaGroup *l2_interface_group; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) { + return -ROCKER_EINVAL; + } + + group->l2_rewrite.group_id = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]); + + l2_interface_group = of_dpa_group_find(of_dpa, group->l2_rewrite.group_id); + if (!l2_interface_group || + ROCKER_GROUP_TYPE_GET(l2_interface_group->id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) { + DPRINTF("l2 rewrite group needs a valid l2 interface group\n"); + return -ROCKER_EINVAL; + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(group->l2_rewrite.src_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(group->l2_rewrite.src_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(group->l2_rewrite.dst_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(group->l2_rewrite.dst_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + group->l2_rewrite.vlan_id = + rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + if (ROCKER_GROUP_VLAN_GET(l2_interface_group->id) != + (ntohs(group->l2_rewrite.vlan_id) & VLAN_VID_MASK)) { + DPRINTF("Set VLAN ID must be same as L2 interface group\n"); + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + OfDpaGroup *l2_group; + RockerTlv **tlvs; + int err; + int i; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT] || + !group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]) { + return -ROCKER_EINVAL; + } + + group->l2_flood.group_count = + rocker_tlv_get_le16(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT]); + + tlvs = g_malloc0((group->l2_flood.group_count + 1) * + sizeof(RockerTlv *)); + if (!tlvs) { + return -ROCKER_ENOMEM; + } + + g_free(group->l2_flood.group_ids); + group->l2_flood.group_ids = + g_malloc0(group->l2_flood.group_count * sizeof(uint32_t)); + if (!group->l2_flood.group_ids) { + err = -ROCKER_ENOMEM; + goto err_out; + } + + rocker_tlv_parse_nested(tlvs, group->l2_flood.group_count, + group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]); + + for (i = 0; i < group->l2_flood.group_count; i++) { + group->l2_flood.group_ids[i] = rocker_tlv_get_le32(tlvs[i + 1]); + } + + /* All of the L2 interface groups referenced by the L2 flood + * must have same VLAN + */ + + for (i = 0; i < group->l2_flood.group_count; i++) { + l2_group = of_dpa_group_find(of_dpa, group->l2_flood.group_ids[i]); + if (!l2_group) { + continue; + } + if ((ROCKER_GROUP_TYPE_GET(l2_group->id) == + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) && + (ROCKER_GROUP_VLAN_GET(l2_group->id) != + ROCKER_GROUP_VLAN_GET(group->id))) { + DPRINTF("l2 interface group 0x%08x VLAN doesn't match l2 " + "flood group 0x%08x\n", + group->l2_flood.group_ids[i], group->id); + err = -ROCKER_EINVAL; + goto err_out; + } + } + + g_free(tlvs); + return ROCKER_OK; + +err_out: + group->l2_flood.group_count = 0; + g_free(group->l2_flood.group_ids); + g_free(tlvs); + + return err; +} + +static int of_dpa_cmd_add_l3_unicast(OfDpaGroup *group, RockerTlv **group_tlvs) +{ + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) { + return -ROCKER_EINVAL; + } + + group->l3_unicast.group_id = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]); + + if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(group->l3_unicast.src_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(group->l3_unicast.src_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(group->l3_unicast.dst_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(group->l3_unicast.dst_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + group->l3_unicast.vlan_id = + rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]) { + group->l3_unicast.ttl_check = + rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_group_do(OfDpa *of_dpa, uint32_t group_id, + OfDpaGroup *group, RockerTlv **group_tlvs) +{ + uint8_t type = ROCKER_GROUP_TYPE_GET(group_id); + + switch (type) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + return of_dpa_cmd_add_l2_interface(group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + return of_dpa_cmd_add_l2_rewrite(of_dpa, group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + /* Treat L2 multicast group same as a L2 flood group */ + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + return of_dpa_cmd_add_l2_flood(of_dpa, group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + return of_dpa_cmd_add_l3_unicast(group, group_tlvs); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd_group_add(OfDpa *of_dpa, uint32_t group_id, + RockerTlv **group_tlvs) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + int err; + + if (group) { + return -ROCKER_EEXIST; + } + + group = of_dpa_group_alloc(group_id); + if (!group) { + return -ROCKER_ENOMEM; + } + + err = of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs); + if (err) { + goto err_cmd_add; + } + + err = of_dpa_group_add(of_dpa, group); + if (err) { + goto err_cmd_add; + } + + return ROCKER_OK; + +err_cmd_add: + g_free(group); + return err; +} + +static int of_dpa_cmd_group_mod(OfDpa *of_dpa, uint32_t group_id, + RockerTlv **group_tlvs) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + + if (!group) { + return -ROCKER_ENOENT; + } + + return of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs); +} + +static int of_dpa_cmd_group_del(OfDpa *of_dpa, uint32_t group_id) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + + if (!group) { + return -ROCKER_ENOENT; + } + + return of_dpa_group_del(of_dpa, group); +} + +static int of_dpa_cmd_group_get_stats(OfDpa *of_dpa, uint32_t group_id, + struct desc_info *info, char *buf) +{ + return -ROCKER_ENOTSUP; +} + +static int of_dpa_group_cmd(OfDpa *of_dpa, struct desc_info *info, + char *buf, uint16_t cmd, RockerTlv **group_tlvs) +{ + uint32_t group_id; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + return -ROCKER_EINVAL; + } + + group_id = rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + return of_dpa_cmd_group_add(of_dpa, group_id, group_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + return of_dpa_cmd_group_mod(of_dpa, group_id, group_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + return of_dpa_cmd_group_del(of_dpa, group_id); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + return of_dpa_cmd_group_get_stats(of_dpa, group_id, info, buf); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd(World *world, struct desc_info *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv) +{ + OfDpa *of_dpa = world_private(world); + RockerTlv *tlvs[ROCKER_TLV_OF_DPA_MAX + 1]; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_OF_DPA_MAX, cmd_info_tlv); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + return of_dpa_flow_cmd(of_dpa, info, buf, cmd, tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + return of_dpa_group_cmd(of_dpa, info, buf, cmd, tlvs); + } + + return -ROCKER_ENOTSUP; +} + +static gboolean rocker_int64_equal(gconstpointer v1, gconstpointer v2) +{ + return *((const uint64_t *)v1) == *((const uint64_t *)v2); +} + +static guint rocker_int64_hash(gconstpointer v) +{ + return (guint)*(const uint64_t *)v; +} + +static int of_dpa_init(World *world) +{ + OfDpa *of_dpa = world_private(world); + + of_dpa->world = world; + + of_dpa->flow_tbl = g_hash_table_new_full(rocker_int64_hash, + rocker_int64_equal, + NULL, g_free); + if (!of_dpa->flow_tbl) { + return -ENOMEM; + } + + of_dpa->group_tbl = g_hash_table_new_full(g_int_hash, g_int_equal, + NULL, g_free); + if (!of_dpa->group_tbl) { + goto err_group_tbl; + } + + /* XXX hardcode some artificial table max values */ + of_dpa->flow_tbl_max_size = 100; + of_dpa->group_tbl_max_size = 100; + + return 0; + +err_group_tbl: + g_hash_table_destroy(of_dpa->flow_tbl); + return -ENOMEM; +} + +static void of_dpa_uninit(World *world) +{ + OfDpa *of_dpa = world_private(world); + + g_hash_table_destroy(of_dpa->group_tbl); + g_hash_table_destroy(of_dpa->flow_tbl); +} + +struct of_dpa_flow_fill_context { + RockerOfDpaFlowList *list; + uint32_t tbl_id; +}; + +static void of_dpa_flow_fill(void *cookie, void *value, void *user_data) +{ + struct of_dpa_flow *flow = value; + struct of_dpa_flow_key *key = &flow->key; + struct of_dpa_flow_key *mask = &flow->mask; + struct of_dpa_flow_fill_context *flow_context = user_data; + RockerOfDpaFlowList *new; + RockerOfDpaFlow *nflow; + RockerOfDpaFlowKey *nkey; + RockerOfDpaFlowMask *nmask; + RockerOfDpaFlowAction *naction; + + if (flow_context->tbl_id != -1 && + flow_context->tbl_id != key->tbl_id) { + return; + } + + new = g_malloc0(sizeof(*new)); + nflow = new->value = g_malloc0(sizeof(*nflow)); + nkey = nflow->key = g_malloc0(sizeof(*nkey)); + nmask = nflow->mask = g_malloc0(sizeof(*nmask)); + naction = nflow->action = g_malloc0(sizeof(*naction)); + + nflow->cookie = flow->cookie; + nflow->hits = flow->stats.hits; + nkey->priority = flow->priority; + nkey->tbl_id = key->tbl_id; + + if (key->in_pport || mask->in_pport) { + nkey->has_in_pport = true; + nkey->in_pport = key->in_pport; + } + + if (nkey->has_in_pport && mask->in_pport != 0xffffffff) { + nmask->has_in_pport = true; + nmask->in_pport = mask->in_pport; + } + + if (key->eth.vlan_id || mask->eth.vlan_id) { + nkey->has_vlan_id = true; + nkey->vlan_id = ntohs(key->eth.vlan_id); + } + + if (nkey->has_vlan_id && mask->eth.vlan_id != 0xffff) { + nmask->has_vlan_id = true; + nmask->vlan_id = ntohs(mask->eth.vlan_id); + } + + if (key->tunnel_id || mask->tunnel_id) { + nkey->has_tunnel_id = true; + nkey->tunnel_id = key->tunnel_id; + } + + if (nkey->has_tunnel_id && mask->tunnel_id != 0xffffffff) { + nmask->has_tunnel_id = true; + nmask->tunnel_id = mask->tunnel_id; + } + + if (memcmp(key->eth.src.a, zero_mac.a, ETH_ALEN) || + memcmp(mask->eth.src.a, zero_mac.a, ETH_ALEN)) { + nkey->has_eth_src = true; + nkey->eth_src = qemu_mac_strdup_printf(key->eth.src.a); + } + + if (nkey->has_eth_src && memcmp(mask->eth.src.a, ff_mac.a, ETH_ALEN)) { + nmask->has_eth_src = true; + nmask->eth_src = qemu_mac_strdup_printf(mask->eth.src.a); + } + + if (memcmp(key->eth.dst.a, zero_mac.a, ETH_ALEN) || + memcmp(mask->eth.dst.a, zero_mac.a, ETH_ALEN)) { + nkey->has_eth_dst = true; + nkey->eth_dst = qemu_mac_strdup_printf(key->eth.dst.a); + } + + if (nkey->has_eth_dst && memcmp(mask->eth.dst.a, ff_mac.a, ETH_ALEN)) { + nmask->has_eth_dst = true; + nmask->eth_dst = qemu_mac_strdup_printf(mask->eth.dst.a); + } + + if (key->eth.type) { + + nkey->has_eth_type = true; + nkey->eth_type = ntohs(key->eth.type); + + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + if (key->ip.proto || mask->ip.proto) { + nkey->has_ip_proto = true; + nkey->ip_proto = key->ip.proto; + } + if (nkey->has_ip_proto && mask->ip.proto != 0xff) { + nmask->has_ip_proto = true; + nmask->ip_proto = mask->ip.proto; + } + if (key->ip.tos || mask->ip.tos) { + nkey->has_ip_tos = true; + nkey->ip_tos = key->ip.tos; + } + if (nkey->has_ip_tos && mask->ip.tos != 0xff) { + nmask->has_ip_tos = true; + nmask->ip_tos = mask->ip.tos; + } + break; + } + + switch (ntohs(key->eth.type)) { + case 0x0800: + if (key->ipv4.addr.dst || mask->ipv4.addr.dst) { + char *dst = inet_ntoa(*(struct in_addr *)&key->ipv4.addr.dst); + int dst_len = of_dpa_mask2prefix(mask->ipv4.addr.dst); + nkey->has_ip_dst = true; + nkey->ip_dst = g_strdup_printf("%s/%d", dst, dst_len); + } + break; + } + } + + if (flow->action.goto_tbl) { + naction->has_goto_tbl = true; + naction->goto_tbl = flow->action.goto_tbl; + } + + if (flow->action.write.group_id) { + naction->has_group_id = true; + naction->group_id = flow->action.write.group_id; + } + + if (flow->action.apply.new_vlan_id) { + naction->has_new_vlan_id = true; + naction->new_vlan_id = flow->action.apply.new_vlan_id; + } + + new->next = flow_context->list; + flow_context->list = new; +} + +RockerOfDpaFlowList *qmp_query_rocker_of_dpa_flows(const char *name, + bool has_tbl_id, + uint32_t tbl_id, + Error **errp) +{ + struct rocker *r; + struct world *w; + struct of_dpa *of_dpa; + struct of_dpa_flow_fill_context fill_context = { + .list = NULL, + .tbl_id = tbl_id, + }; + + r = rocker_find(name); + if (!r) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "rocker %s not found", name); + return NULL; + } + + w = rocker_get_world(r, ROCKER_WORLD_TYPE_OF_DPA); + if (!w) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "rocker %s doesn't have OF-DPA world", name); + return NULL; + } + + of_dpa = world_private(w); + + g_hash_table_foreach(of_dpa->flow_tbl, of_dpa_flow_fill, &fill_context); + + return fill_context.list; +} + +struct of_dpa_group_fill_context { + RockerOfDpaGroupList *list; + uint8_t type; +}; + +static void of_dpa_group_fill(void *key, void *value, void *user_data) +{ + struct of_dpa_group *group = value; + struct of_dpa_group_fill_context *flow_context = user_data; + RockerOfDpaGroupList *new; + RockerOfDpaGroup *ngroup; + struct uint32List *id; + int i; + + if (flow_context->type != 9 && + flow_context->type != ROCKER_GROUP_TYPE_GET(group->id)) { + return; + } + + new = g_malloc0(sizeof(*new)); + ngroup = new->value = g_malloc0(sizeof(*ngroup)); + + ngroup->id = group->id; + + ngroup->type = ROCKER_GROUP_TYPE_GET(group->id); + + switch (ngroup->type) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + ngroup->has_vlan_id = true; + ngroup->vlan_id = ROCKER_GROUP_VLAN_GET(group->id); + ngroup->has_pport = true; + ngroup->pport = ROCKER_GROUP_PORT_GET(group->id); + ngroup->has_out_pport = true; + ngroup->out_pport = group->l2_interface.out_pport; + ngroup->has_pop_vlan = true; + ngroup->pop_vlan = group->l2_interface.pop_vlan; + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + ngroup->has_index = true; + ngroup->index = ROCKER_GROUP_INDEX_LONG_GET(group->id); + ngroup->has_group_id = true; + ngroup->group_id = group->l2_rewrite.group_id; + if (group->l2_rewrite.vlan_id) { + ngroup->has_set_vlan_id = true; + ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id); + } + if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) { + ngroup->has_set_eth_src = true; + ngroup->set_eth_src = + qemu_mac_strdup_printf(group->l2_rewrite.src_mac.a); + } + if (memcmp(group->l2_rewrite.dst_mac.a, zero_mac.a, ETH_ALEN)) { + ngroup->has_set_eth_dst = true; + ngroup->set_eth_dst = + qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a); + } + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + ngroup->has_vlan_id = true; + ngroup->vlan_id = ROCKER_GROUP_VLAN_GET(group->id); + ngroup->has_index = true; + ngroup->index = ROCKER_GROUP_INDEX_GET(group->id); + for (i = 0; i < group->l2_flood.group_count; i++) { + ngroup->has_group_ids = true; + id = g_malloc0(sizeof(*id)); + id->value = group->l2_flood.group_ids[i]; + id->next = ngroup->group_ids; + ngroup->group_ids = id; + } + break; + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + ngroup->has_index = true; + ngroup->index = ROCKER_GROUP_INDEX_LONG_GET(group->id); + ngroup->has_group_id = true; + ngroup->group_id = group->l3_unicast.group_id; + if (group->l3_unicast.vlan_id) { + ngroup->has_set_vlan_id = true; + ngroup->set_vlan_id = ntohs(group->l3_unicast.vlan_id); + } + if (memcmp(group->l3_unicast.src_mac.a, zero_mac.a, ETH_ALEN)) { + ngroup->has_set_eth_src = true; + ngroup->set_eth_src = + qemu_mac_strdup_printf(group->l3_unicast.src_mac.a); + } + if (memcmp(group->l3_unicast.dst_mac.a, zero_mac.a, ETH_ALEN)) { + ngroup->has_set_eth_dst = true; + ngroup->set_eth_dst = + qemu_mac_strdup_printf(group->l3_unicast.dst_mac.a); + } + if (group->l3_unicast.ttl_check) { + ngroup->has_ttl_check = true; + ngroup->ttl_check = group->l3_unicast.ttl_check; + } + break; + } + + new->next = flow_context->list; + flow_context->list = new; +} + +RockerOfDpaGroupList *qmp_query_rocker_of_dpa_groups(const char *name, + bool has_type, + uint8_t type, + Error **errp) +{ + struct rocker *r; + struct world *w; + struct of_dpa *of_dpa; + struct of_dpa_group_fill_context fill_context = { + .list = NULL, + .type = type, + }; + + r = rocker_find(name); + if (!r) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "rocker %s not found", name); + return NULL; + } + + w = rocker_get_world(r, ROCKER_WORLD_TYPE_OF_DPA); + if (!w) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "rocker %s doesn't have OF-DPA world", name); + return NULL; + } + + of_dpa = world_private(w); + + g_hash_table_foreach(of_dpa->group_tbl, of_dpa_group_fill, &fill_context); + + return fill_context.list; +} + +static WorldOps of_dpa_ops = { + .init = of_dpa_init, + .uninit = of_dpa_uninit, + .ig = of_dpa_ig, + .cmd = of_dpa_cmd, +}; + +World *of_dpa_world_alloc(Rocker *r) +{ + return world_alloc(r, sizeof(OfDpa), ROCKER_WORLD_TYPE_OF_DPA, &of_dpa_ops); +} diff --git a/qemu/hw/net/rocker/rocker_of_dpa.h b/qemu/hw/net/rocker/rocker_of_dpa.h new file mode 100644 index 000000000..f3f6d7780 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_of_dpa.h @@ -0,0 +1,22 @@ +/* + * QEMU rocker switch emulation - OF-DPA flow processing support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_OF_DPA_H_ +#define _ROCKER_OF_DPA_H_ + +World *of_dpa_world_alloc(Rocker *r); + +#endif /* _ROCKER_OF_DPA_H_ */ diff --git a/qemu/hw/net/rocker/rocker_tlv.h b/qemu/hw/net/rocker/rocker_tlv.h new file mode 100644 index 000000000..e3c4ab679 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_tlv.h @@ -0,0 +1,244 @@ +/* + * QEMU rocker switch emulation - TLV parsing and composing + * + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_TLV_H_ +#define _ROCKER_TLV_H_ + +#define ROCKER_TLV_ALIGNTO 8U +#define ROCKER_TLV_ALIGN(len) \ + (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1)) +#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(RockerTlv)) + +/* + * <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) ---> + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (RockerTlv) | ing | | ing | + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * <--------------------------- tlv->len --------------------------> + */ + +static inline RockerTlv *rocker_tlv_next(const RockerTlv *tlv, int *remaining) +{ + int totlen = ROCKER_TLV_ALIGN(le16_to_cpu(tlv->len)); + + *remaining -= totlen; + return (RockerTlv *) ((char *) tlv + totlen); +} + +static inline int rocker_tlv_ok(const RockerTlv *tlv, int remaining) +{ + return remaining >= (int) ROCKER_TLV_HDRLEN && + le16_to_cpu(tlv->len) >= ROCKER_TLV_HDRLEN && + le16_to_cpu(tlv->len) <= remaining; +} + +#define rocker_tlv_for_each(pos, head, len, rem) \ + for (pos = head, rem = len; \ + rocker_tlv_ok(pos, rem); \ + pos = rocker_tlv_next(pos, &(rem))) + +#define rocker_tlv_for_each_nested(pos, tlv, rem) \ + rocker_tlv_for_each(pos, rocker_tlv_data(tlv), rocker_tlv_len(tlv), rem) + +static inline int rocker_tlv_size(int payload) +{ + return ROCKER_TLV_HDRLEN + payload; +} + +static inline int rocker_tlv_total_size(int payload) +{ + return ROCKER_TLV_ALIGN(rocker_tlv_size(payload)); +} + +static inline int rocker_tlv_padlen(int payload) +{ + return rocker_tlv_total_size(payload) - rocker_tlv_size(payload); +} + +static inline int rocker_tlv_type(const RockerTlv *tlv) +{ + return le32_to_cpu(tlv->type); +} + +static inline void *rocker_tlv_data(const RockerTlv *tlv) +{ + return (char *) tlv + ROCKER_TLV_HDRLEN; +} + +static inline int rocker_tlv_len(const RockerTlv *tlv) +{ + return le16_to_cpu(tlv->len) - ROCKER_TLV_HDRLEN; +} + +static inline uint8_t rocker_tlv_get_u8(const RockerTlv *tlv) +{ + return *(uint8_t *) rocker_tlv_data(tlv); +} + +static inline uint16_t rocker_tlv_get_u16(const RockerTlv *tlv) +{ + return *(uint16_t *) rocker_tlv_data(tlv); +} + +static inline uint32_t rocker_tlv_get_u32(const RockerTlv *tlv) +{ + return *(uint32_t *) rocker_tlv_data(tlv); +} + +static inline uint64_t rocker_tlv_get_u64(const RockerTlv *tlv) +{ + return *(uint64_t *) rocker_tlv_data(tlv); +} + +static inline uint16_t rocker_tlv_get_le16(const RockerTlv *tlv) +{ + return le16_to_cpup((uint16_t *) rocker_tlv_data(tlv)); +} + +static inline uint32_t rocker_tlv_get_le32(const RockerTlv *tlv) +{ + return le32_to_cpup((uint32_t *) rocker_tlv_data(tlv)); +} + +static inline uint64_t rocker_tlv_get_le64(const RockerTlv *tlv) +{ + return le64_to_cpup((uint64_t *) rocker_tlv_data(tlv)); +} + +static inline void rocker_tlv_parse(RockerTlv **tb, int maxtype, + const char *buf, int buf_len) +{ + const RockerTlv *tlv; + const RockerTlv *head = (const RockerTlv *) buf; + int rem; + + memset(tb, 0, sizeof(RockerTlv *) * (maxtype + 1)); + + rocker_tlv_for_each(tlv, head, buf_len, rem) { + uint32_t type = rocker_tlv_type(tlv); + + if (type > 0 && type <= maxtype) { + tb[type] = (RockerTlv *) tlv; + } + } +} + +static inline void rocker_tlv_parse_nested(RockerTlv **tb, int maxtype, + const RockerTlv *tlv) +{ + rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv), rocker_tlv_len(tlv)); +} + +static inline RockerTlv *rocker_tlv_start(char *buf, int buf_pos) +{ + return (RockerTlv *) (buf + buf_pos); +} + +static inline void rocker_tlv_put_iov(char *buf, int *buf_pos, + int type, const struct iovec *iov, + const unsigned int iovcnt) +{ + size_t len = iov_size(iov, iovcnt); + int total_size = rocker_tlv_total_size(len); + RockerTlv *tlv; + + tlv = rocker_tlv_start(buf, *buf_pos); + *buf_pos += total_size; + tlv->type = cpu_to_le32(type); + tlv->len = cpu_to_le16(rocker_tlv_size(len)); + iov_to_buf(iov, iovcnt, 0, rocker_tlv_data(tlv), len); + memset((char *) tlv + le16_to_cpu(tlv->len), 0, rocker_tlv_padlen(len)); +} + +static inline void rocker_tlv_put(char *buf, int *buf_pos, + int type, int len, void *data) +{ + struct iovec iov = { + .iov_base = data, + .iov_len = len, + }; + + rocker_tlv_put_iov(buf, buf_pos, type, &iov, 1); +} + +static inline void rocker_tlv_put_u8(char *buf, int *buf_pos, + int type, uint8_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint8_t), &value); +} + +static inline void rocker_tlv_put_u16(char *buf, int *buf_pos, + int type, uint16_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value); +} + +static inline void rocker_tlv_put_u32(char *buf, int *buf_pos, + int type, uint32_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value); +} + +static inline void rocker_tlv_put_u64(char *buf, int *buf_pos, + int type, uint64_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value); +} + +static inline void rocker_tlv_put_le16(char *buf, int *buf_pos, + int type, uint16_t value) +{ + value = cpu_to_le16(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value); +} + +static inline void rocker_tlv_put_le32(char *buf, int *buf_pos, + int type, uint32_t value) +{ + value = cpu_to_le32(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value); +} + +static inline void rocker_tlv_put_le64(char *buf, int *buf_pos, + int type, uint64_t value) +{ + value = cpu_to_le64(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value); +} + +static inline RockerTlv *rocker_tlv_nest_start(char *buf, int *buf_pos, + int type) +{ + RockerTlv *start = rocker_tlv_start(buf, *buf_pos); + + rocker_tlv_put(buf, buf_pos, type, 0, NULL); + return start; +} + +static inline void rocker_tlv_nest_end(char *buf, int *buf_pos, + RockerTlv *start) +{ + start->len = (char *) rocker_tlv_start(buf, *buf_pos) - (char *) start; +} + +static inline void rocker_tlv_nest_cancel(char *buf, int *buf_pos, + RockerTlv *start) +{ + *buf_pos = (char *) start - buf; +} + +#endif diff --git a/qemu/hw/net/rocker/rocker_world.c b/qemu/hw/net/rocker/rocker_world.c new file mode 100644 index 000000000..a6b18f175 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_world.c @@ -0,0 +1,106 @@ +/* + * QEMU rocker switch emulation - switch worlds + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/iov.h" + +#include "rocker.h" +#include "rocker_world.h" + +struct world { + Rocker *r; + enum rocker_world_type type; + WorldOps *ops; +}; + +ssize_t world_ingress(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + if (world->ops->ig) { + return world->ops->ig(world, pport, iov, iovcnt); + } + + return -1; +} + +int world_do_cmd(World *world, DescInfo *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv) +{ + if (world->ops->cmd) { + return world->ops->cmd(world, info, buf, cmd, cmd_info_tlv); + } + + return -ROCKER_ENOTSUP; +} + +World *world_alloc(Rocker *r, size_t sizeof_private, + enum rocker_world_type type, WorldOps *ops) +{ + World *w = g_malloc0(sizeof(World) + sizeof_private); + + if (w) { + w->r = r; + w->type = type; + w->ops = ops; + if (w->ops->init) { + w->ops->init(w); + } + } + + return w; +} + +void world_free(World *world) +{ + if (world->ops->uninit) { + world->ops->uninit(world); + } + g_free(world); +} + +void world_reset(World *world) +{ + if (world->ops->uninit) { + world->ops->uninit(world); + } + if (world->ops->init) { + world->ops->init(world); + } +} + +void *world_private(World *world) +{ + return world + 1; +} + +Rocker *world_rocker(World *world) +{ + return world->r; +} + +enum rocker_world_type world_type(World *world) +{ + return world->type; +} + +const char *world_name(World *world) +{ + switch (world->type) { + case ROCKER_WORLD_TYPE_OF_DPA: + return "OF_DPA"; + default: + return "unknown"; + } +} diff --git a/qemu/hw/net/rocker/rocker_world.h b/qemu/hw/net/rocker/rocker_world.h new file mode 100644 index 000000000..18d277b92 --- /dev/null +++ b/qemu/hw/net/rocker/rocker_world.h @@ -0,0 +1,60 @@ +/* + * QEMU rocker switch emulation - switch worlds + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_WORLD_H_ +#define _ROCKER_WORLD_H_ + +#include "rocker_hw.h" + +enum rocker_world_type { + ROCKER_WORLD_TYPE_OF_DPA = ROCKER_PORT_MODE_OF_DPA, + ROCKER_WORLD_TYPE_MAX, +}; + +typedef int (world_init)(World *world); +typedef void (world_uninit)(World *world); +typedef ssize_t (world_ig)(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +typedef int (world_cmd)(World *world, DescInfo *info, + char *buf, uint16_t cmd, + RockerTlv *cmd_info_tlv); + +typedef struct world_ops { + world_init *init; + world_uninit *uninit; + world_ig *ig; + world_cmd *cmd; +} WorldOps; + +ssize_t world_ingress(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +int world_do_cmd(World *world, DescInfo *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv); + +World *world_alloc(Rocker *r, size_t sizeof_private, + enum rocker_world_type type, WorldOps *ops); +void world_free(World *world); +void world_reset(World *world); + +void *world_private(World *world); +Rocker *world_rocker(World *world); + +enum rocker_world_type world_type(World *world); +const char *world_name(World *world); + +World *rocker_get_world(Rocker *r, enum rocker_world_type type); + +#endif /* _ROCKER_WORLD_H_ */ diff --git a/qemu/hw/net/rtl8139.c b/qemu/hw/net/rtl8139.c new file mode 100644 index 000000000..edbb61ccf --- /dev/null +++ b/qemu/hw/net/rtl8139.c @@ -0,0 +1,3560 @@ +/** + * QEMU RTL8139 emulation + * + * Copyright (c) 2006 Igor Kovalenko + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + + * Modifications: + * 2006-Jan-28 Mark Malakanov : TSAD and CSCR implementation (for Windows driver) + * + * 2006-Apr-28 Juergen Lock : EEPROM emulation changes for FreeBSD driver + * HW revision ID changes for FreeBSD driver + * + * 2006-Jul-01 Igor Kovalenko : Implemented loopback mode for FreeBSD driver + * Corrected packet transfer reassembly routine for 8139C+ mode + * Rearranged debugging print statements + * Implemented PCI timer interrupt (disabled by default) + * Implemented Tally Counters, increased VM load/save version + * Implemented IP/TCP/UDP checksum task offloading + * + * 2006-Jul-04 Igor Kovalenko : Implemented TCP segmentation offloading + * Fixed MTU=1500 for produced ethernet frames + * + * 2006-Jul-09 Igor Kovalenko : Fixed TCP header length calculation while processing + * segmentation offloading + * Removed slirp.h dependency + * Added rx/tx buffer reset when enabling rx/tx operation + * + * 2010-Feb-04 Frediano Ziglio: Rewrote timer support using QEMU timer only + * when strictly needed (required for for + * Darwin) + * 2011-Mar-22 Benjamin Poirier: Implemented VLAN offloading + */ + +/* For crc32 */ +#include <zlib.h> + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "sysemu/dma.h" +#include "qemu/timer.h" +#include "net/net.h" +#include "hw/loader.h" +#include "sysemu/sysemu.h" +#include "qemu/iov.h" + +/* debug RTL8139 card */ +//#define DEBUG_RTL8139 1 + +#define PCI_FREQUENCY 33000000L + +#define SET_MASKED(input, mask, curr) \ + ( ( (input) & ~(mask) ) | ( (curr) & (mask) ) ) + +/* arg % size for size which is a power of 2 */ +#define MOD2(input, size) \ + ( ( input ) & ( size - 1 ) ) + +#define ETHER_ADDR_LEN 6 +#define ETHER_TYPE_LEN 2 +#define ETH_HLEN (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_MTU 1500 + +#define VLAN_TCI_LEN 2 +#define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN) + +#if defined (DEBUG_RTL8139) +# define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "RTL8139: " fmt, ## __VA_ARGS__); } while (0) +#else +static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...) +{ + return 0; +} +#endif + +#define TYPE_RTL8139 "rtl8139" + +#define RTL8139(obj) \ + OBJECT_CHECK(RTL8139State, (obj), TYPE_RTL8139) + +/* Symbolic offsets to registers. */ +enum RTL8139_registers { + MAC0 = 0, /* Ethernet hardware address. */ + MAR0 = 8, /* Multicast filter. */ + TxStatus0 = 0x10,/* Transmit status (Four 32bit registers). C mode only */ + /* Dump Tally Conter control register(64bit). C+ mode only */ + TxAddr0 = 0x20, /* Tx descriptors (also four 32bit). */ + RxBuf = 0x30, + ChipCmd = 0x37, + RxBufPtr = 0x38, + RxBufAddr = 0x3A, + IntrMask = 0x3C, + IntrStatus = 0x3E, + TxConfig = 0x40, + RxConfig = 0x44, + Timer = 0x48, /* A general-purpose counter. */ + RxMissed = 0x4C, /* 24 bits valid, write clears. */ + Cfg9346 = 0x50, + Config0 = 0x51, + Config1 = 0x52, + FlashReg = 0x54, + MediaStatus = 0x58, + Config3 = 0x59, + Config4 = 0x5A, /* absent on RTL-8139A */ + HltClk = 0x5B, + MultiIntr = 0x5C, + PCIRevisionID = 0x5E, + TxSummary = 0x60, /* TSAD register. Transmit Status of All Descriptors*/ + BasicModeCtrl = 0x62, + BasicModeStatus = 0x64, + NWayAdvert = 0x66, + NWayLPAR = 0x68, + NWayExpansion = 0x6A, + /* Undocumented registers, but required for proper operation. */ + FIFOTMS = 0x70, /* FIFO Control and test. */ + CSCR = 0x74, /* Chip Status and Configuration Register. */ + PARA78 = 0x78, + PARA7c = 0x7c, /* Magic transceiver parameter register. */ + Config5 = 0xD8, /* absent on RTL-8139A */ + /* C+ mode */ + TxPoll = 0xD9, /* Tell chip to check Tx descriptors for work */ + RxMaxSize = 0xDA, /* Max size of an Rx packet (8169 only) */ + CpCmd = 0xE0, /* C+ Command register (C+ mode only) */ + IntrMitigate = 0xE2, /* rx/tx interrupt mitigation control */ + RxRingAddrLO = 0xE4, /* 64-bit start addr of Rx ring */ + RxRingAddrHI = 0xE8, /* 64-bit start addr of Rx ring */ + TxThresh = 0xEC, /* Early Tx threshold */ +}; + +enum ClearBitMasks { + MultiIntrClear = 0xF000, + ChipCmdClear = 0xE2, + Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1), +}; + +enum ChipCmdBits { + CmdReset = 0x10, + CmdRxEnb = 0x08, + CmdTxEnb = 0x04, + RxBufEmpty = 0x01, +}; + +/* C+ mode */ +enum CplusCmdBits { + CPlusRxVLAN = 0x0040, /* enable receive VLAN detagging */ + CPlusRxChkSum = 0x0020, /* enable receive checksum offloading */ + CPlusRxEnb = 0x0002, + CPlusTxEnb = 0x0001, +}; + +/* Interrupt register bits, using my own meaningful names. */ +enum IntrStatusBits { + PCIErr = 0x8000, + PCSTimeout = 0x4000, + RxFIFOOver = 0x40, + RxUnderrun = 0x20, /* Packet Underrun / Link Change */ + RxOverflow = 0x10, + TxErr = 0x08, + TxOK = 0x04, + RxErr = 0x02, + RxOK = 0x01, + + RxAckBits = RxFIFOOver | RxOverflow | RxOK, +}; + +enum TxStatusBits { + TxHostOwns = 0x2000, + TxUnderrun = 0x4000, + TxStatOK = 0x8000, + TxOutOfWindow = 0x20000000, + TxAborted = 0x40000000, + TxCarrierLost = 0x80000000, +}; +enum RxStatusBits { + RxMulticast = 0x8000, + RxPhysical = 0x4000, + RxBroadcast = 0x2000, + RxBadSymbol = 0x0020, + RxRunt = 0x0010, + RxTooLong = 0x0008, + RxCRCErr = 0x0004, + RxBadAlign = 0x0002, + RxStatusOK = 0x0001, +}; + +/* Bits in RxConfig. */ +enum rx_mode_bits { + AcceptErr = 0x20, + AcceptRunt = 0x10, + AcceptBroadcast = 0x08, + AcceptMulticast = 0x04, + AcceptMyPhys = 0x02, + AcceptAllPhys = 0x01, +}; + +/* Bits in TxConfig. */ +enum tx_config_bits { + + /* Interframe Gap Time. Only TxIFG96 doesn't violate IEEE 802.3 */ + TxIFGShift = 24, + TxIFG84 = (0 << TxIFGShift), /* 8.4us / 840ns (10 / 100Mbps) */ + TxIFG88 = (1 << TxIFGShift), /* 8.8us / 880ns (10 / 100Mbps) */ + TxIFG92 = (2 << TxIFGShift), /* 9.2us / 920ns (10 / 100Mbps) */ + TxIFG96 = (3 << TxIFGShift), /* 9.6us / 960ns (10 / 100Mbps) */ + + TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */ + TxCRC = (1 << 16), /* DISABLE appending CRC to end of Tx packets */ + TxClearAbt = (1 << 0), /* Clear abort (WO) */ + TxDMAShift = 8, /* DMA burst value (0-7) is shifted this many bits */ + TxRetryShift = 4, /* TXRR value (0-15) is shifted this many bits */ + + TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */ +}; + + +/* Transmit Status of All Descriptors (TSAD) Register */ +enum TSAD_bits { + TSAD_TOK3 = 1<<15, // TOK bit of Descriptor 3 + TSAD_TOK2 = 1<<14, // TOK bit of Descriptor 2 + TSAD_TOK1 = 1<<13, // TOK bit of Descriptor 1 + TSAD_TOK0 = 1<<12, // TOK bit of Descriptor 0 + TSAD_TUN3 = 1<<11, // TUN bit of Descriptor 3 + TSAD_TUN2 = 1<<10, // TUN bit of Descriptor 2 + TSAD_TUN1 = 1<<9, // TUN bit of Descriptor 1 + TSAD_TUN0 = 1<<8, // TUN bit of Descriptor 0 + TSAD_TABT3 = 1<<07, // TABT bit of Descriptor 3 + TSAD_TABT2 = 1<<06, // TABT bit of Descriptor 2 + TSAD_TABT1 = 1<<05, // TABT bit of Descriptor 1 + TSAD_TABT0 = 1<<04, // TABT bit of Descriptor 0 + TSAD_OWN3 = 1<<03, // OWN bit of Descriptor 3 + TSAD_OWN2 = 1<<02, // OWN bit of Descriptor 2 + TSAD_OWN1 = 1<<01, // OWN bit of Descriptor 1 + TSAD_OWN0 = 1<<00, // OWN bit of Descriptor 0 +}; + + +/* Bits in Config1 */ +enum Config1Bits { + Cfg1_PM_Enable = 0x01, + Cfg1_VPD_Enable = 0x02, + Cfg1_PIO = 0x04, + Cfg1_MMIO = 0x08, + LWAKE = 0x10, /* not on 8139, 8139A */ + Cfg1_Driver_Load = 0x20, + Cfg1_LED0 = 0x40, + Cfg1_LED1 = 0x80, + SLEEP = (1 << 1), /* only on 8139, 8139A */ + PWRDN = (1 << 0), /* only on 8139, 8139A */ +}; + +/* Bits in Config3 */ +enum Config3Bits { + Cfg3_FBtBEn = (1 << 0), /* 1 = Fast Back to Back */ + Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */ + Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */ + Cfg3_CardB_En = (1 << 3), /* 1 = enable CardBus registers */ + Cfg3_LinkUp = (1 << 4), /* 1 = wake up on link up */ + Cfg3_Magic = (1 << 5), /* 1 = wake up on Magic Packet (tm) */ + Cfg3_PARM_En = (1 << 6), /* 0 = software can set twister parameters */ + Cfg3_GNTSel = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */ +}; + +/* Bits in Config4 */ +enum Config4Bits { + LWPTN = (1 << 2), /* not on 8139, 8139A */ +}; + +/* Bits in Config5 */ +enum Config5Bits { + Cfg5_PME_STS = (1 << 0), /* 1 = PCI reset resets PME_Status */ + Cfg5_LANWake = (1 << 1), /* 1 = enable LANWake signal */ + Cfg5_LDPS = (1 << 2), /* 0 = save power when link is down */ + Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */ + Cfg5_UWF = (1 << 4), /* 1 = accept unicast wakeup frame */ + Cfg5_MWF = (1 << 5), /* 1 = accept multicast wakeup frame */ + Cfg5_BWF = (1 << 6), /* 1 = accept broadcast wakeup frame */ +}; + +enum RxConfigBits { + /* rx fifo threshold */ + RxCfgFIFOShift = 13, + RxCfgFIFONone = (7 << RxCfgFIFOShift), + + /* Max DMA burst */ + RxCfgDMAShift = 8, + RxCfgDMAUnlimited = (7 << RxCfgDMAShift), + + /* rx ring buffer length */ + RxCfgRcv8K = 0, + RxCfgRcv16K = (1 << 11), + RxCfgRcv32K = (1 << 12), + RxCfgRcv64K = (1 << 11) | (1 << 12), + + /* Disable packet wrap at end of Rx buffer. (not possible with 64k) */ + RxNoWrap = (1 << 7), +}; + +/* Twister tuning parameters from RealTek. + Completely undocumented, but required to tune bad links on some boards. */ +/* +enum CSCRBits { + CSCR_LinkOKBit = 0x0400, + CSCR_LinkChangeBit = 0x0800, + CSCR_LinkStatusBits = 0x0f000, + CSCR_LinkDownOffCmd = 0x003c0, + CSCR_LinkDownCmd = 0x0f3c0, +*/ +enum CSCRBits { + CSCR_Testfun = 1<<15, /* 1 = Auto-neg speeds up internal timer, WO, def 0 */ + CSCR_LD = 1<<9, /* Active low TPI link disable signal. When low, TPI still transmits link pulses and TPI stays in good link state. def 1*/ + CSCR_HEART_BIT = 1<<8, /* 1 = HEART BEAT enable, 0 = HEART BEAT disable. HEART BEAT function is only valid in 10Mbps mode. def 1*/ + CSCR_JBEN = 1<<7, /* 1 = enable jabber function. 0 = disable jabber function, def 1*/ + CSCR_F_LINK_100 = 1<<6, /* Used to login force good link in 100Mbps for diagnostic purposes. 1 = DISABLE, 0 = ENABLE. def 1*/ + CSCR_F_Connect = 1<<5, /* Assertion of this bit forces the disconnect function to be bypassed. def 0*/ + CSCR_Con_status = 1<<3, /* This bit indicates the status of the connection. 1 = valid connected link detected; 0 = disconnected link detected. RO def 0*/ + CSCR_Con_status_En = 1<<2, /* Assertion of this bit configures LED1 pin to indicate connection status. def 0*/ + CSCR_PASS_SCR = 1<<0, /* Bypass Scramble, def 0*/ +}; + +enum Cfg9346Bits { + Cfg9346_Normal = 0x00, + Cfg9346_Autoload = 0x40, + Cfg9346_Programming = 0x80, + Cfg9346_ConfigWrite = 0xC0, +}; + +typedef enum { + CH_8139 = 0, + CH_8139_K, + CH_8139A, + CH_8139A_G, + CH_8139B, + CH_8130, + CH_8139C, + CH_8100, + CH_8100B_8139D, + CH_8101, +} chip_t; + +enum chip_flags { + HasHltClk = (1 << 0), + HasLWake = (1 << 1), +}; + +#define HW_REVID(b30, b29, b28, b27, b26, b23, b22) \ + (b30<<30 | b29<<29 | b28<<28 | b27<<27 | b26<<26 | b23<<23 | b22<<22) +#define HW_REVID_MASK HW_REVID(1, 1, 1, 1, 1, 1, 1) + +#define RTL8139_PCI_REVID_8139 0x10 +#define RTL8139_PCI_REVID_8139CPLUS 0x20 + +#define RTL8139_PCI_REVID RTL8139_PCI_REVID_8139CPLUS + +/* Size is 64 * 16bit words */ +#define EEPROM_9346_ADDR_BITS 6 +#define EEPROM_9346_SIZE (1 << EEPROM_9346_ADDR_BITS) +#define EEPROM_9346_ADDR_MASK (EEPROM_9346_SIZE - 1) + +enum Chip9346Operation +{ + Chip9346_op_mask = 0xc0, /* 10 zzzzzz */ + Chip9346_op_read = 0x80, /* 10 AAAAAA */ + Chip9346_op_write = 0x40, /* 01 AAAAAA D(15)..D(0) */ + Chip9346_op_ext_mask = 0xf0, /* 11 zzzzzz */ + Chip9346_op_write_enable = 0x30, /* 00 11zzzz */ + Chip9346_op_write_all = 0x10, /* 00 01zzzz */ + Chip9346_op_write_disable = 0x00, /* 00 00zzzz */ +}; + +enum Chip9346Mode +{ + Chip9346_none = 0, + Chip9346_enter_command_mode, + Chip9346_read_command, + Chip9346_data_read, /* from output register */ + Chip9346_data_write, /* to input register, then to contents at specified address */ + Chip9346_data_write_all, /* to input register, then filling contents */ +}; + +typedef struct EEprom9346 +{ + uint16_t contents[EEPROM_9346_SIZE]; + int mode; + uint32_t tick; + uint8_t address; + uint16_t input; + uint16_t output; + + uint8_t eecs; + uint8_t eesk; + uint8_t eedi; + uint8_t eedo; +} EEprom9346; + +typedef struct RTL8139TallyCounters +{ + /* Tally counters */ + uint64_t TxOk; + uint64_t RxOk; + uint64_t TxERR; + uint32_t RxERR; + uint16_t MissPkt; + uint16_t FAE; + uint32_t Tx1Col; + uint32_t TxMCol; + uint64_t RxOkPhy; + uint64_t RxOkBrd; + uint32_t RxOkMul; + uint16_t TxAbt; + uint16_t TxUndrn; +} RTL8139TallyCounters; + +/* Clears all tally counters */ +static void RTL8139TallyCounters_clear(RTL8139TallyCounters* counters); + +typedef struct RTL8139State { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + uint8_t phys[8]; /* mac address */ + uint8_t mult[8]; /* multicast mask array */ + + uint32_t TxStatus[4]; /* TxStatus0 in C mode*/ /* also DTCCR[0] and DTCCR[1] in C+ mode */ + uint32_t TxAddr[4]; /* TxAddr0 */ + uint32_t RxBuf; /* Receive buffer */ + uint32_t RxBufferSize;/* internal variable, receive ring buffer size in C mode */ + uint32_t RxBufPtr; + uint32_t RxBufAddr; + + uint16_t IntrStatus; + uint16_t IntrMask; + + uint32_t TxConfig; + uint32_t RxConfig; + uint32_t RxMissed; + + uint16_t CSCR; + + uint8_t Cfg9346; + uint8_t Config0; + uint8_t Config1; + uint8_t Config3; + uint8_t Config4; + uint8_t Config5; + + uint8_t clock_enabled; + uint8_t bChipCmdState; + + uint16_t MultiIntr; + + uint16_t BasicModeCtrl; + uint16_t BasicModeStatus; + uint16_t NWayAdvert; + uint16_t NWayLPAR; + uint16_t NWayExpansion; + + uint16_t CpCmd; + uint8_t TxThresh; + + NICState *nic; + NICConf conf; + + /* C ring mode */ + uint32_t currTxDesc; + + /* C+ mode */ + uint32_t cplus_enabled; + + uint32_t currCPlusRxDesc; + uint32_t currCPlusTxDesc; + + uint32_t RxRingAddrLO; + uint32_t RxRingAddrHI; + + EEprom9346 eeprom; + + uint32_t TCTR; + uint32_t TimerInt; + int64_t TCTR_base; + + /* Tally counters */ + RTL8139TallyCounters tally_counters; + + /* Non-persistent data */ + uint8_t *cplus_txbuffer; + int cplus_txbuffer_len; + int cplus_txbuffer_offset; + + /* PCI interrupt timer */ + QEMUTimer *timer; + + MemoryRegion bar_io; + MemoryRegion bar_mem; + + /* Support migration to/from old versions */ + int rtl8139_mmio_io_addr_dummy; +} RTL8139State; + +/* Writes tally counters to memory via DMA */ +static void RTL8139TallyCounters_dma_write(RTL8139State *s, dma_addr_t tc_addr); + +static void rtl8139_set_next_tctr_time(RTL8139State *s); + +static void prom9346_decode_command(EEprom9346 *eeprom, uint8_t command) +{ + DPRINTF("eeprom command 0x%02x\n", command); + + switch (command & Chip9346_op_mask) + { + case Chip9346_op_read: + { + eeprom->address = command & EEPROM_9346_ADDR_MASK; + eeprom->output = eeprom->contents[eeprom->address]; + eeprom->eedo = 0; + eeprom->tick = 0; + eeprom->mode = Chip9346_data_read; + DPRINTF("eeprom read from address 0x%02x data=0x%04x\n", + eeprom->address, eeprom->output); + } + break; + + case Chip9346_op_write: + { + eeprom->address = command & EEPROM_9346_ADDR_MASK; + eeprom->input = 0; + eeprom->tick = 0; + eeprom->mode = Chip9346_none; /* Chip9346_data_write */ + DPRINTF("eeprom begin write to address 0x%02x\n", + eeprom->address); + } + break; + default: + eeprom->mode = Chip9346_none; + switch (command & Chip9346_op_ext_mask) + { + case Chip9346_op_write_enable: + DPRINTF("eeprom write enabled\n"); + break; + case Chip9346_op_write_all: + DPRINTF("eeprom begin write all\n"); + break; + case Chip9346_op_write_disable: + DPRINTF("eeprom write disabled\n"); + break; + } + break; + } +} + +static void prom9346_shift_clock(EEprom9346 *eeprom) +{ + int bit = eeprom->eedi?1:0; + + ++ eeprom->tick; + + DPRINTF("eeprom: tick %d eedi=%d eedo=%d\n", eeprom->tick, eeprom->eedi, + eeprom->eedo); + + switch (eeprom->mode) + { + case Chip9346_enter_command_mode: + if (bit) + { + eeprom->mode = Chip9346_read_command; + eeprom->tick = 0; + eeprom->input = 0; + DPRINTF("eeprom: +++ synchronized, begin command read\n"); + } + break; + + case Chip9346_read_command: + eeprom->input = (eeprom->input << 1) | (bit & 1); + if (eeprom->tick == 8) + { + prom9346_decode_command(eeprom, eeprom->input & 0xff); + } + break; + + case Chip9346_data_read: + eeprom->eedo = (eeprom->output & 0x8000)?1:0; + eeprom->output <<= 1; + if (eeprom->tick == 16) + { +#if 1 + // the FreeBSD drivers (rl and re) don't explicitly toggle + // CS between reads (or does setting Cfg9346 to 0 count too?), + // so we need to enter wait-for-command state here + eeprom->mode = Chip9346_enter_command_mode; + eeprom->input = 0; + eeprom->tick = 0; + + DPRINTF("eeprom: +++ end of read, awaiting next command\n"); +#else + // original behaviour + ++eeprom->address; + eeprom->address &= EEPROM_9346_ADDR_MASK; + eeprom->output = eeprom->contents[eeprom->address]; + eeprom->tick = 0; + + DPRINTF("eeprom: +++ read next address 0x%02x data=0x%04x\n", + eeprom->address, eeprom->output); +#endif + } + break; + + case Chip9346_data_write: + eeprom->input = (eeprom->input << 1) | (bit & 1); + if (eeprom->tick == 16) + { + DPRINTF("eeprom write to address 0x%02x data=0x%04x\n", + eeprom->address, eeprom->input); + + eeprom->contents[eeprom->address] = eeprom->input; + eeprom->mode = Chip9346_none; /* waiting for next command after CS cycle */ + eeprom->tick = 0; + eeprom->input = 0; + } + break; + + case Chip9346_data_write_all: + eeprom->input = (eeprom->input << 1) | (bit & 1); + if (eeprom->tick == 16) + { + int i; + for (i = 0; i < EEPROM_9346_SIZE; i++) + { + eeprom->contents[i] = eeprom->input; + } + DPRINTF("eeprom filled with data=0x%04x\n", eeprom->input); + + eeprom->mode = Chip9346_enter_command_mode; + eeprom->tick = 0; + eeprom->input = 0; + } + break; + + default: + break; + } +} + +static int prom9346_get_wire(RTL8139State *s) +{ + EEprom9346 *eeprom = &s->eeprom; + if (!eeprom->eecs) + return 0; + + return eeprom->eedo; +} + +/* FIXME: This should be merged into/replaced by eeprom93xx.c. */ +static void prom9346_set_wire(RTL8139State *s, int eecs, int eesk, int eedi) +{ + EEprom9346 *eeprom = &s->eeprom; + uint8_t old_eecs = eeprom->eecs; + uint8_t old_eesk = eeprom->eesk; + + eeprom->eecs = eecs; + eeprom->eesk = eesk; + eeprom->eedi = eedi; + + DPRINTF("eeprom: +++ wires CS=%d SK=%d DI=%d DO=%d\n", eeprom->eecs, + eeprom->eesk, eeprom->eedi, eeprom->eedo); + + if (!old_eecs && eecs) + { + /* Synchronize start */ + eeprom->tick = 0; + eeprom->input = 0; + eeprom->output = 0; + eeprom->mode = Chip9346_enter_command_mode; + + DPRINTF("=== eeprom: begin access, enter command mode\n"); + } + + if (!eecs) + { + DPRINTF("=== eeprom: end access\n"); + return; + } + + if (!old_eesk && eesk) + { + /* SK front rules */ + prom9346_shift_clock(eeprom); + } +} + +static void rtl8139_update_irq(RTL8139State *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int isr; + isr = (s->IntrStatus & s->IntrMask) & 0xffff; + + DPRINTF("Set IRQ to %d (%04x %04x)\n", isr ? 1 : 0, s->IntrStatus, + s->IntrMask); + + pci_set_irq(d, (isr != 0)); +} + +static int rtl8139_RxWrap(RTL8139State *s) +{ + /* wrapping enabled; assume 1.5k more buffer space if size < 65536 */ + return (s->RxConfig & (1 << 7)); +} + +static int rtl8139_receiver_enabled(RTL8139State *s) +{ + return s->bChipCmdState & CmdRxEnb; +} + +static int rtl8139_transmitter_enabled(RTL8139State *s) +{ + return s->bChipCmdState & CmdTxEnb; +} + +static int rtl8139_cp_receiver_enabled(RTL8139State *s) +{ + return s->CpCmd & CPlusRxEnb; +} + +static int rtl8139_cp_transmitter_enabled(RTL8139State *s) +{ + return s->CpCmd & CPlusTxEnb; +} + +static void rtl8139_write_buffer(RTL8139State *s, const void *buf, int size) +{ + PCIDevice *d = PCI_DEVICE(s); + + if (s->RxBufAddr + size > s->RxBufferSize) + { + int wrapped = MOD2(s->RxBufAddr + size, s->RxBufferSize); + + /* write packet data */ + if (wrapped && !(s->RxBufferSize < 65536 && rtl8139_RxWrap(s))) + { + DPRINTF(">>> rx packet wrapped in buffer at %d\n", size - wrapped); + + if (size > wrapped) + { + pci_dma_write(d, s->RxBuf + s->RxBufAddr, + buf, size-wrapped); + } + + /* reset buffer pointer */ + s->RxBufAddr = 0; + + pci_dma_write(d, s->RxBuf + s->RxBufAddr, + buf + (size-wrapped), wrapped); + + s->RxBufAddr = wrapped; + + return; + } + } + + /* non-wrapping path or overwrapping enabled */ + pci_dma_write(d, s->RxBuf + s->RxBufAddr, buf, size); + + s->RxBufAddr += size; +} + +#define MIN_BUF_SIZE 60 +static inline dma_addr_t rtl8139_addr64(uint32_t low, uint32_t high) +{ + return low | ((uint64_t)high << 32); +} + +/* Workaround for buggy guest driver such as linux who allocates rx + * rings after the receiver were enabled. */ +static bool rtl8139_cp_rx_valid(RTL8139State *s) +{ + return !(s->RxRingAddrLO == 0 && s->RxRingAddrHI == 0); +} + +static int rtl8139_can_receive(NetClientState *nc) +{ + RTL8139State *s = qemu_get_nic_opaque(nc); + int avail; + + /* Receive (drop) packets if card is disabled. */ + if (!s->clock_enabled) + return 1; + if (!rtl8139_receiver_enabled(s)) + return 1; + + if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) { + /* ??? Flow control not implemented in c+ mode. + This is a hack to work around slirp deficiencies anyway. */ + return 1; + } else { + avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr, + s->RxBufferSize); + return (avail == 0 || avail >= 1514 || (s->IntrMask & RxOverflow)); + } +} + +static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t size_, int do_interrupt) +{ + RTL8139State *s = qemu_get_nic_opaque(nc); + PCIDevice *d = PCI_DEVICE(s); + /* size is the length of the buffer passed to the driver */ + int size = size_; + const uint8_t *dot1q_buf = NULL; + + uint32_t packet_header = 0; + + uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN]; + static const uint8_t broadcast_macaddr[6] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + DPRINTF(">>> received len=%d\n", size); + + /* test if board clock is stopped */ + if (!s->clock_enabled) + { + DPRINTF("stopped ==========================\n"); + return -1; + } + + /* first check if receiver is enabled */ + + if (!rtl8139_receiver_enabled(s)) + { + DPRINTF("receiver disabled ================\n"); + return -1; + } + + /* XXX: check this */ + if (s->RxConfig & AcceptAllPhys) { + /* promiscuous: receive all */ + DPRINTF(">>> packet received in promiscuous mode\n"); + + } else { + if (!memcmp(buf, broadcast_macaddr, 6)) { + /* broadcast address */ + if (!(s->RxConfig & AcceptBroadcast)) + { + DPRINTF(">>> broadcast packet rejected\n"); + + /* update tally counter */ + ++s->tally_counters.RxERR; + + return size; + } + + packet_header |= RxBroadcast; + + DPRINTF(">>> broadcast packet received\n"); + + /* update tally counter */ + ++s->tally_counters.RxOkBrd; + + } else if (buf[0] & 0x01) { + /* multicast */ + if (!(s->RxConfig & AcceptMulticast)) + { + DPRINTF(">>> multicast packet rejected\n"); + + /* update tally counter */ + ++s->tally_counters.RxERR; + + return size; + } + + int mcast_idx = compute_mcast_idx(buf); + + if (!(s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7)))) + { + DPRINTF(">>> multicast address mismatch\n"); + + /* update tally counter */ + ++s->tally_counters.RxERR; + + return size; + } + + packet_header |= RxMulticast; + + DPRINTF(">>> multicast packet received\n"); + + /* update tally counter */ + ++s->tally_counters.RxOkMul; + + } else if (s->phys[0] == buf[0] && + s->phys[1] == buf[1] && + s->phys[2] == buf[2] && + s->phys[3] == buf[3] && + s->phys[4] == buf[4] && + s->phys[5] == buf[5]) { + /* match */ + if (!(s->RxConfig & AcceptMyPhys)) + { + DPRINTF(">>> rejecting physical address matching packet\n"); + + /* update tally counter */ + ++s->tally_counters.RxERR; + + return size; + } + + packet_header |= RxPhysical; + + DPRINTF(">>> physical address matching packet received\n"); + + /* update tally counter */ + ++s->tally_counters.RxOkPhy; + + } else { + + DPRINTF(">>> unknown packet\n"); + + /* update tally counter */ + ++s->tally_counters.RxERR; + + return size; + } + } + + /* if too small buffer, then expand it + * Include some tailroom in case a vlan tag is later removed. */ + if (size < MIN_BUF_SIZE + VLAN_HLEN) { + memcpy(buf1, buf, size); + memset(buf1 + size, 0, MIN_BUF_SIZE + VLAN_HLEN - size); + buf = buf1; + if (size < MIN_BUF_SIZE) { + size = MIN_BUF_SIZE; + } + } + + if (rtl8139_cp_receiver_enabled(s)) + { + if (!rtl8139_cp_rx_valid(s)) { + return size; + } + + DPRINTF("in C+ Rx mode ================\n"); + + /* begin C+ receiver mode */ + +/* w0 ownership flag */ +#define CP_RX_OWN (1<<31) +/* w0 end of ring flag */ +#define CP_RX_EOR (1<<30) +/* w0 bits 0...12 : buffer size */ +#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1) +/* w1 tag available flag */ +#define CP_RX_TAVA (1<<16) +/* w1 bits 0...15 : VLAN tag */ +#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1) +/* w2 low 32bit of Rx buffer ptr */ +/* w3 high 32bit of Rx buffer ptr */ + + int descriptor = s->currCPlusRxDesc; + dma_addr_t cplus_rx_ring_desc; + + cplus_rx_ring_desc = rtl8139_addr64(s->RxRingAddrLO, s->RxRingAddrHI); + cplus_rx_ring_desc += 16 * descriptor; + + DPRINTF("+++ C+ mode reading RX descriptor %d from host memory at " + "%08x %08x = "DMA_ADDR_FMT"\n", descriptor, s->RxRingAddrHI, + s->RxRingAddrLO, cplus_rx_ring_desc); + + uint32_t val, rxdw0,rxdw1,rxbufLO,rxbufHI; + + pci_dma_read(d, cplus_rx_ring_desc, &val, 4); + rxdw0 = le32_to_cpu(val); + pci_dma_read(d, cplus_rx_ring_desc+4, &val, 4); + rxdw1 = le32_to_cpu(val); + pci_dma_read(d, cplus_rx_ring_desc+8, &val, 4); + rxbufLO = le32_to_cpu(val); + pci_dma_read(d, cplus_rx_ring_desc+12, &val, 4); + rxbufHI = le32_to_cpu(val); + + DPRINTF("+++ C+ mode RX descriptor %d %08x %08x %08x %08x\n", + descriptor, rxdw0, rxdw1, rxbufLO, rxbufHI); + + if (!(rxdw0 & CP_RX_OWN)) + { + DPRINTF("C+ Rx mode : descriptor %d is owned by host\n", + descriptor); + + s->IntrStatus |= RxOverflow; + ++s->RxMissed; + + /* update tally counter */ + ++s->tally_counters.RxERR; + ++s->tally_counters.MissPkt; + + rtl8139_update_irq(s); + return size_; + } + + uint32_t rx_space = rxdw0 & CP_RX_BUFFER_SIZE_MASK; + + /* write VLAN info to descriptor variables. */ + if (s->CpCmd & CPlusRxVLAN && be16_to_cpup((uint16_t *) + &buf[ETHER_ADDR_LEN * 2]) == ETH_P_8021Q) { + dot1q_buf = &buf[ETHER_ADDR_LEN * 2]; + size -= VLAN_HLEN; + /* if too small buffer, use the tailroom added duing expansion */ + if (size < MIN_BUF_SIZE) { + size = MIN_BUF_SIZE; + } + + rxdw1 &= ~CP_RX_VLAN_TAG_MASK; + /* BE + ~le_to_cpu()~ + cpu_to_le() = BE */ + rxdw1 |= CP_RX_TAVA | le16_to_cpup((uint16_t *) + &dot1q_buf[ETHER_TYPE_LEN]); + + DPRINTF("C+ Rx mode : extracted vlan tag with tci: ""%u\n", + be16_to_cpup((uint16_t *)&dot1q_buf[ETHER_TYPE_LEN])); + } else { + /* reset VLAN tag flag */ + rxdw1 &= ~CP_RX_TAVA; + } + + /* TODO: scatter the packet over available receive ring descriptors space */ + + if (size+4 > rx_space) + { + DPRINTF("C+ Rx mode : descriptor %d size %d received %d + 4\n", + descriptor, rx_space, size); + + s->IntrStatus |= RxOverflow; + ++s->RxMissed; + + /* update tally counter */ + ++s->tally_counters.RxERR; + ++s->tally_counters.MissPkt; + + rtl8139_update_irq(s); + return size_; + } + + dma_addr_t rx_addr = rtl8139_addr64(rxbufLO, rxbufHI); + + /* receive/copy to target memory */ + if (dot1q_buf) { + pci_dma_write(d, rx_addr, buf, 2 * ETHER_ADDR_LEN); + pci_dma_write(d, rx_addr + 2 * ETHER_ADDR_LEN, + buf + 2 * ETHER_ADDR_LEN + VLAN_HLEN, + size - 2 * ETHER_ADDR_LEN); + } else { + pci_dma_write(d, rx_addr, buf, size); + } + + if (s->CpCmd & CPlusRxChkSum) + { + /* do some packet checksumming */ + } + + /* write checksum */ + val = cpu_to_le32(crc32(0, buf, size_)); + pci_dma_write(d, rx_addr+size, (uint8_t *)&val, 4); + +/* first segment of received packet flag */ +#define CP_RX_STATUS_FS (1<<29) +/* last segment of received packet flag */ +#define CP_RX_STATUS_LS (1<<28) +/* multicast packet flag */ +#define CP_RX_STATUS_MAR (1<<26) +/* physical-matching packet flag */ +#define CP_RX_STATUS_PAM (1<<25) +/* broadcast packet flag */ +#define CP_RX_STATUS_BAR (1<<24) +/* runt packet flag */ +#define CP_RX_STATUS_RUNT (1<<19) +/* crc error flag */ +#define CP_RX_STATUS_CRC (1<<18) +/* IP checksum error flag */ +#define CP_RX_STATUS_IPF (1<<15) +/* UDP checksum error flag */ +#define CP_RX_STATUS_UDPF (1<<14) +/* TCP checksum error flag */ +#define CP_RX_STATUS_TCPF (1<<13) + + /* transfer ownership to target */ + rxdw0 &= ~CP_RX_OWN; + + /* set first segment bit */ + rxdw0 |= CP_RX_STATUS_FS; + + /* set last segment bit */ + rxdw0 |= CP_RX_STATUS_LS; + + /* set received packet type flags */ + if (packet_header & RxBroadcast) + rxdw0 |= CP_RX_STATUS_BAR; + if (packet_header & RxMulticast) + rxdw0 |= CP_RX_STATUS_MAR; + if (packet_header & RxPhysical) + rxdw0 |= CP_RX_STATUS_PAM; + + /* set received size */ + rxdw0 &= ~CP_RX_BUFFER_SIZE_MASK; + rxdw0 |= (size+4); + + /* update ring data */ + val = cpu_to_le32(rxdw0); + pci_dma_write(d, cplus_rx_ring_desc, (uint8_t *)&val, 4); + val = cpu_to_le32(rxdw1); + pci_dma_write(d, cplus_rx_ring_desc+4, (uint8_t *)&val, 4); + + /* update tally counter */ + ++s->tally_counters.RxOk; + + /* seek to next Rx descriptor */ + if (rxdw0 & CP_RX_EOR) + { + s->currCPlusRxDesc = 0; + } + else + { + ++s->currCPlusRxDesc; + } + + DPRINTF("done C+ Rx mode ----------------\n"); + + } + else + { + DPRINTF("in ring Rx mode ================\n"); + + /* begin ring receiver mode */ + int avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr, s->RxBufferSize); + + /* if receiver buffer is empty then avail == 0 */ + + if (avail != 0 && size + 8 >= avail) + { + DPRINTF("rx overflow: rx buffer length %d head 0x%04x " + "read 0x%04x === available 0x%04x need 0x%04x\n", + s->RxBufferSize, s->RxBufAddr, s->RxBufPtr, avail, size + 8); + + s->IntrStatus |= RxOverflow; + ++s->RxMissed; + rtl8139_update_irq(s); + return size_; + } + + packet_header |= RxStatusOK; + + packet_header |= (((size+4) << 16) & 0xffff0000); + + /* write header */ + uint32_t val = cpu_to_le32(packet_header); + + rtl8139_write_buffer(s, (uint8_t *)&val, 4); + + rtl8139_write_buffer(s, buf, size); + + /* write checksum */ + val = cpu_to_le32(crc32(0, buf, size)); + rtl8139_write_buffer(s, (uint8_t *)&val, 4); + + /* correct buffer write pointer */ + s->RxBufAddr = MOD2((s->RxBufAddr + 3) & ~0x3, s->RxBufferSize); + + /* now we can signal we have received something */ + + DPRINTF("received: rx buffer length %d head 0x%04x read 0x%04x\n", + s->RxBufferSize, s->RxBufAddr, s->RxBufPtr); + } + + s->IntrStatus |= RxOK; + + if (do_interrupt) + { + rtl8139_update_irq(s); + } + + return size_; +} + +static ssize_t rtl8139_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + return rtl8139_do_receive(nc, buf, size, 1); +} + +static void rtl8139_reset_rxring(RTL8139State *s, uint32_t bufferSize) +{ + s->RxBufferSize = bufferSize; + s->RxBufPtr = 0; + s->RxBufAddr = 0; +} + +static void rtl8139_reset(DeviceState *d) +{ + RTL8139State *s = RTL8139(d); + int i; + + /* restore MAC address */ + memcpy(s->phys, s->conf.macaddr.a, 6); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys); + + /* reset interrupt mask */ + s->IntrStatus = 0; + s->IntrMask = 0; + + rtl8139_update_irq(s); + + /* mark all status registers as owned by host */ + for (i = 0; i < 4; ++i) + { + s->TxStatus[i] = TxHostOwns; + } + + s->currTxDesc = 0; + s->currCPlusRxDesc = 0; + s->currCPlusTxDesc = 0; + + s->RxRingAddrLO = 0; + s->RxRingAddrHI = 0; + + s->RxBuf = 0; + + rtl8139_reset_rxring(s, 8192); + + /* ACK the reset */ + s->TxConfig = 0; + +#if 0 +// s->TxConfig |= HW_REVID(1, 0, 0, 0, 0, 0, 0); // RTL-8139 HasHltClk + s->clock_enabled = 0; +#else + s->TxConfig |= HW_REVID(1, 1, 1, 0, 1, 1, 0); // RTL-8139C+ HasLWake + s->clock_enabled = 1; +#endif + + s->bChipCmdState = CmdReset; /* RxBufEmpty bit is calculated on read from ChipCmd */; + + /* set initial state data */ + s->Config0 = 0x0; /* No boot ROM */ + s->Config1 = 0xC; /* IO mapped and MEM mapped registers available */ + s->Config3 = 0x1; /* fast back-to-back compatible */ + s->Config5 = 0x0; + + s->CSCR = CSCR_F_LINK_100 | CSCR_HEART_BIT | CSCR_LD; + + s->CpCmd = 0x0; /* reset C+ mode */ + s->cplus_enabled = 0; + + +// s->BasicModeCtrl = 0x3100; // 100Mbps, full duplex, autonegotiation +// s->BasicModeCtrl = 0x2100; // 100Mbps, full duplex + s->BasicModeCtrl = 0x1000; // autonegotiation + + s->BasicModeStatus = 0x7809; + //s->BasicModeStatus |= 0x0040; /* UTP medium */ + s->BasicModeStatus |= 0x0020; /* autonegotiation completed */ + /* preserve link state */ + s->BasicModeStatus |= qemu_get_queue(s->nic)->link_down ? 0 : 0x04; + + s->NWayAdvert = 0x05e1; /* all modes, full duplex */ + s->NWayLPAR = 0x05e1; /* all modes, full duplex */ + s->NWayExpansion = 0x0001; /* autonegotiation supported */ + + /* also reset timer and disable timer interrupt */ + s->TCTR = 0; + s->TimerInt = 0; + s->TCTR_base = 0; + rtl8139_set_next_tctr_time(s); + + /* reset tally counters */ + RTL8139TallyCounters_clear(&s->tally_counters); +} + +static void RTL8139TallyCounters_clear(RTL8139TallyCounters* counters) +{ + counters->TxOk = 0; + counters->RxOk = 0; + counters->TxERR = 0; + counters->RxERR = 0; + counters->MissPkt = 0; + counters->FAE = 0; + counters->Tx1Col = 0; + counters->TxMCol = 0; + counters->RxOkPhy = 0; + counters->RxOkBrd = 0; + counters->RxOkMul = 0; + counters->TxAbt = 0; + counters->TxUndrn = 0; +} + +static void RTL8139TallyCounters_dma_write(RTL8139State *s, dma_addr_t tc_addr) +{ + PCIDevice *d = PCI_DEVICE(s); + RTL8139TallyCounters *tally_counters = &s->tally_counters; + uint16_t val16; + uint32_t val32; + uint64_t val64; + + val64 = cpu_to_le64(tally_counters->TxOk); + pci_dma_write(d, tc_addr + 0, (uint8_t *)&val64, 8); + + val64 = cpu_to_le64(tally_counters->RxOk); + pci_dma_write(d, tc_addr + 8, (uint8_t *)&val64, 8); + + val64 = cpu_to_le64(tally_counters->TxERR); + pci_dma_write(d, tc_addr + 16, (uint8_t *)&val64, 8); + + val32 = cpu_to_le32(tally_counters->RxERR); + pci_dma_write(d, tc_addr + 24, (uint8_t *)&val32, 4); + + val16 = cpu_to_le16(tally_counters->MissPkt); + pci_dma_write(d, tc_addr + 28, (uint8_t *)&val16, 2); + + val16 = cpu_to_le16(tally_counters->FAE); + pci_dma_write(d, tc_addr + 30, (uint8_t *)&val16, 2); + + val32 = cpu_to_le32(tally_counters->Tx1Col); + pci_dma_write(d, tc_addr + 32, (uint8_t *)&val32, 4); + + val32 = cpu_to_le32(tally_counters->TxMCol); + pci_dma_write(d, tc_addr + 36, (uint8_t *)&val32, 4); + + val64 = cpu_to_le64(tally_counters->RxOkPhy); + pci_dma_write(d, tc_addr + 40, (uint8_t *)&val64, 8); + + val64 = cpu_to_le64(tally_counters->RxOkBrd); + pci_dma_write(d, tc_addr + 48, (uint8_t *)&val64, 8); + + val32 = cpu_to_le32(tally_counters->RxOkMul); + pci_dma_write(d, tc_addr + 56, (uint8_t *)&val32, 4); + + val16 = cpu_to_le16(tally_counters->TxAbt); + pci_dma_write(d, tc_addr + 60, (uint8_t *)&val16, 2); + + val16 = cpu_to_le16(tally_counters->TxUndrn); + pci_dma_write(d, tc_addr + 62, (uint8_t *)&val16, 2); +} + +/* Loads values of tally counters from VM state file */ + +static const VMStateDescription vmstate_tally_counters = { + .name = "tally_counters", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(TxOk, RTL8139TallyCounters), + VMSTATE_UINT64(RxOk, RTL8139TallyCounters), + VMSTATE_UINT64(TxERR, RTL8139TallyCounters), + VMSTATE_UINT32(RxERR, RTL8139TallyCounters), + VMSTATE_UINT16(MissPkt, RTL8139TallyCounters), + VMSTATE_UINT16(FAE, RTL8139TallyCounters), + VMSTATE_UINT32(Tx1Col, RTL8139TallyCounters), + VMSTATE_UINT32(TxMCol, RTL8139TallyCounters), + VMSTATE_UINT64(RxOkPhy, RTL8139TallyCounters), + VMSTATE_UINT64(RxOkBrd, RTL8139TallyCounters), + VMSTATE_UINT16(TxAbt, RTL8139TallyCounters), + VMSTATE_UINT16(TxUndrn, RTL8139TallyCounters), + VMSTATE_END_OF_LIST() + } +}; + +static void rtl8139_ChipCmd_write(RTL8139State *s, uint32_t val) +{ + DeviceState *d = DEVICE(s); + + val &= 0xff; + + DPRINTF("ChipCmd write val=0x%08x\n", val); + + if (val & CmdReset) + { + DPRINTF("ChipCmd reset\n"); + rtl8139_reset(d); + } + if (val & CmdRxEnb) + { + DPRINTF("ChipCmd enable receiver\n"); + + s->currCPlusRxDesc = 0; + } + if (val & CmdTxEnb) + { + DPRINTF("ChipCmd enable transmitter\n"); + + s->currCPlusTxDesc = 0; + } + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xe3, s->bChipCmdState); + + /* Deassert reset pin before next read */ + val &= ~CmdReset; + + s->bChipCmdState = val; +} + +static int rtl8139_RxBufferEmpty(RTL8139State *s) +{ + int unread = MOD2(s->RxBufferSize + s->RxBufAddr - s->RxBufPtr, s->RxBufferSize); + + if (unread != 0) + { + DPRINTF("receiver buffer data available 0x%04x\n", unread); + return 0; + } + + DPRINTF("receiver buffer is empty\n"); + + return 1; +} + +static uint32_t rtl8139_ChipCmd_read(RTL8139State *s) +{ + uint32_t ret = s->bChipCmdState; + + if (rtl8139_RxBufferEmpty(s)) + ret |= RxBufEmpty; + + DPRINTF("ChipCmd read val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_CpCmd_write(RTL8139State *s, uint32_t val) +{ + val &= 0xffff; + + DPRINTF("C+ command register write(w) val=0x%04x\n", val); + + s->cplus_enabled = 1; + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xff84, s->CpCmd); + + s->CpCmd = val; +} + +static uint32_t rtl8139_CpCmd_read(RTL8139State *s) +{ + uint32_t ret = s->CpCmd; + + DPRINTF("C+ command register read(w) val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_IntrMitigate_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("C+ IntrMitigate register write(w) val=0x%04x\n", val); +} + +static uint32_t rtl8139_IntrMitigate_read(RTL8139State *s) +{ + uint32_t ret = 0; + + DPRINTF("C+ IntrMitigate register read(w) val=0x%04x\n", ret); + + return ret; +} + +static int rtl8139_config_writable(RTL8139State *s) +{ + if ((s->Cfg9346 & Chip9346_op_mask) == Cfg9346_ConfigWrite) + { + return 1; + } + + DPRINTF("Configuration registers are write-protected\n"); + + return 0; +} + +static void rtl8139_BasicModeCtrl_write(RTL8139State *s, uint32_t val) +{ + val &= 0xffff; + + DPRINTF("BasicModeCtrl register write(w) val=0x%04x\n", val); + + /* mask unwritable bits */ + uint32_t mask = 0x4cff; + + if (1 || !rtl8139_config_writable(s)) + { + /* Speed setting and autonegotiation enable bits are read-only */ + mask |= 0x3000; + /* Duplex mode setting is read-only */ + mask |= 0x0100; + } + + val = SET_MASKED(val, mask, s->BasicModeCtrl); + + s->BasicModeCtrl = val; +} + +static uint32_t rtl8139_BasicModeCtrl_read(RTL8139State *s) +{ + uint32_t ret = s->BasicModeCtrl; + + DPRINTF("BasicModeCtrl register read(w) val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_BasicModeStatus_write(RTL8139State *s, uint32_t val) +{ + val &= 0xffff; + + DPRINTF("BasicModeStatus register write(w) val=0x%04x\n", val); + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xff3f, s->BasicModeStatus); + + s->BasicModeStatus = val; +} + +static uint32_t rtl8139_BasicModeStatus_read(RTL8139State *s) +{ + uint32_t ret = s->BasicModeStatus; + + DPRINTF("BasicModeStatus register read(w) val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_Cfg9346_write(RTL8139State *s, uint32_t val) +{ + DeviceState *d = DEVICE(s); + + val &= 0xff; + + DPRINTF("Cfg9346 write val=0x%02x\n", val); + + /* mask unwritable bits */ + val = SET_MASKED(val, 0x31, s->Cfg9346); + + uint32_t opmode = val & 0xc0; + uint32_t eeprom_val = val & 0xf; + + if (opmode == 0x80) { + /* eeprom access */ + int eecs = (eeprom_val & 0x08)?1:0; + int eesk = (eeprom_val & 0x04)?1:0; + int eedi = (eeprom_val & 0x02)?1:0; + prom9346_set_wire(s, eecs, eesk, eedi); + } else if (opmode == 0x40) { + /* Reset. */ + val = 0; + rtl8139_reset(d); + } + + s->Cfg9346 = val; +} + +static uint32_t rtl8139_Cfg9346_read(RTL8139State *s) +{ + uint32_t ret = s->Cfg9346; + + uint32_t opmode = ret & 0xc0; + + if (opmode == 0x80) + { + /* eeprom access */ + int eedo = prom9346_get_wire(s); + if (eedo) + { + ret |= 0x01; + } + else + { + ret &= ~0x01; + } + } + + DPRINTF("Cfg9346 read val=0x%02x\n", ret); + + return ret; +} + +static void rtl8139_Config0_write(RTL8139State *s, uint32_t val) +{ + val &= 0xff; + + DPRINTF("Config0 write val=0x%02x\n", val); + + if (!rtl8139_config_writable(s)) { + return; + } + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xf8, s->Config0); + + s->Config0 = val; +} + +static uint32_t rtl8139_Config0_read(RTL8139State *s) +{ + uint32_t ret = s->Config0; + + DPRINTF("Config0 read val=0x%02x\n", ret); + + return ret; +} + +static void rtl8139_Config1_write(RTL8139State *s, uint32_t val) +{ + val &= 0xff; + + DPRINTF("Config1 write val=0x%02x\n", val); + + if (!rtl8139_config_writable(s)) { + return; + } + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xC, s->Config1); + + s->Config1 = val; +} + +static uint32_t rtl8139_Config1_read(RTL8139State *s) +{ + uint32_t ret = s->Config1; + + DPRINTF("Config1 read val=0x%02x\n", ret); + + return ret; +} + +static void rtl8139_Config3_write(RTL8139State *s, uint32_t val) +{ + val &= 0xff; + + DPRINTF("Config3 write val=0x%02x\n", val); + + if (!rtl8139_config_writable(s)) { + return; + } + + /* mask unwritable bits */ + val = SET_MASKED(val, 0x8F, s->Config3); + + s->Config3 = val; +} + +static uint32_t rtl8139_Config3_read(RTL8139State *s) +{ + uint32_t ret = s->Config3; + + DPRINTF("Config3 read val=0x%02x\n", ret); + + return ret; +} + +static void rtl8139_Config4_write(RTL8139State *s, uint32_t val) +{ + val &= 0xff; + + DPRINTF("Config4 write val=0x%02x\n", val); + + if (!rtl8139_config_writable(s)) { + return; + } + + /* mask unwritable bits */ + val = SET_MASKED(val, 0x0a, s->Config4); + + s->Config4 = val; +} + +static uint32_t rtl8139_Config4_read(RTL8139State *s) +{ + uint32_t ret = s->Config4; + + DPRINTF("Config4 read val=0x%02x\n", ret); + + return ret; +} + +static void rtl8139_Config5_write(RTL8139State *s, uint32_t val) +{ + val &= 0xff; + + DPRINTF("Config5 write val=0x%02x\n", val); + + /* mask unwritable bits */ + val = SET_MASKED(val, 0x80, s->Config5); + + s->Config5 = val; +} + +static uint32_t rtl8139_Config5_read(RTL8139State *s) +{ + uint32_t ret = s->Config5; + + DPRINTF("Config5 read val=0x%02x\n", ret); + + return ret; +} + +static void rtl8139_TxConfig_write(RTL8139State *s, uint32_t val) +{ + if (!rtl8139_transmitter_enabled(s)) + { + DPRINTF("transmitter disabled; no TxConfig write val=0x%08x\n", val); + return; + } + + DPRINTF("TxConfig write val=0x%08x\n", val); + + val = SET_MASKED(val, TxVersionMask | 0x8070f80f, s->TxConfig); + + s->TxConfig = val; +} + +static void rtl8139_TxConfig_writeb(RTL8139State *s, uint32_t val) +{ + DPRINTF("RTL8139C TxConfig via write(b) val=0x%02x\n", val); + + uint32_t tc = s->TxConfig; + tc &= 0xFFFFFF00; + tc |= (val & 0x000000FF); + rtl8139_TxConfig_write(s, tc); +} + +static uint32_t rtl8139_TxConfig_read(RTL8139State *s) +{ + uint32_t ret = s->TxConfig; + + DPRINTF("TxConfig read val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_RxConfig_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("RxConfig write val=0x%08x\n", val); + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xf0fc0040, s->RxConfig); + + s->RxConfig = val; + + /* reset buffer size and read/write pointers */ + rtl8139_reset_rxring(s, 8192 << ((s->RxConfig >> 11) & 0x3)); + + DPRINTF("RxConfig write reset buffer size to %d\n", s->RxBufferSize); +} + +static uint32_t rtl8139_RxConfig_read(RTL8139State *s) +{ + uint32_t ret = s->RxConfig; + + DPRINTF("RxConfig read val=0x%08x\n", ret); + + return ret; +} + +static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size, + int do_interrupt, const uint8_t *dot1q_buf) +{ + struct iovec *iov = NULL; + struct iovec vlan_iov[3]; + + if (!size) + { + DPRINTF("+++ empty ethernet frame\n"); + return; + } + + if (dot1q_buf && size >= ETHER_ADDR_LEN * 2) { + iov = (struct iovec[3]) { + { .iov_base = buf, .iov_len = ETHER_ADDR_LEN * 2 }, + { .iov_base = (void *) dot1q_buf, .iov_len = VLAN_HLEN }, + { .iov_base = buf + ETHER_ADDR_LEN * 2, + .iov_len = size - ETHER_ADDR_LEN * 2 }, + }; + + memcpy(vlan_iov, iov, sizeof(vlan_iov)); + iov = vlan_iov; + } + + if (TxLoopBack == (s->TxConfig & TxLoopBack)) + { + size_t buf2_size; + uint8_t *buf2; + + if (iov) { + buf2_size = iov_size(iov, 3); + buf2 = g_malloc(buf2_size); + iov_to_buf(iov, 3, 0, buf2, buf2_size); + buf = buf2; + } + + DPRINTF("+++ transmit loopback mode\n"); + rtl8139_do_receive(qemu_get_queue(s->nic), buf, size, do_interrupt); + + if (iov) { + g_free(buf2); + } + } + else + { + if (iov) { + qemu_sendv_packet(qemu_get_queue(s->nic), iov, 3); + } else { + qemu_send_packet(qemu_get_queue(s->nic), buf, size); + } + } +} + +static int rtl8139_transmit_one(RTL8139State *s, int descriptor) +{ + if (!rtl8139_transmitter_enabled(s)) + { + DPRINTF("+++ cannot transmit from descriptor %d: transmitter " + "disabled\n", descriptor); + return 0; + } + + if (s->TxStatus[descriptor] & TxHostOwns) + { + DPRINTF("+++ cannot transmit from descriptor %d: owned by host " + "(%08x)\n", descriptor, s->TxStatus[descriptor]); + return 0; + } + + DPRINTF("+++ transmitting from descriptor %d\n", descriptor); + + PCIDevice *d = PCI_DEVICE(s); + int txsize = s->TxStatus[descriptor] & 0x1fff; + uint8_t txbuffer[0x2000]; + + DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n", + txsize, s->TxAddr[descriptor]); + + pci_dma_read(d, s->TxAddr[descriptor], txbuffer, txsize); + + /* Mark descriptor as transferred */ + s->TxStatus[descriptor] |= TxHostOwns; + s->TxStatus[descriptor] |= TxStatOK; + + rtl8139_transfer_frame(s, txbuffer, txsize, 0, NULL); + + DPRINTF("+++ transmitted %d bytes from descriptor %d\n", txsize, + descriptor); + + /* update interrupt */ + s->IntrStatus |= TxOK; + rtl8139_update_irq(s); + + return 1; +} + +/* structures and macros for task offloading */ +typedef struct ip_header +{ + uint8_t ip_ver_len; /* version and header length */ + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset field */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + uint32_t ip_src,ip_dst; /* source and dest address */ +} ip_header; + +#define IP_HEADER_VERSION_4 4 +#define IP_HEADER_VERSION(ip) ((ip->ip_ver_len >> 4)&0xf) +#define IP_HEADER_LENGTH(ip) (((ip->ip_ver_len)&0xf) << 2) + +typedef struct tcp_header +{ + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + uint32_t th_seq; /* sequence number */ + uint32_t th_ack; /* acknowledgement number */ + uint16_t th_offset_flags; /* data offset, reserved 6 bits, TCP protocol flags */ + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +} tcp_header; + +typedef struct udp_header +{ + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + uint16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +} udp_header; + +typedef struct ip_pseudo_header +{ + uint32_t ip_src; + uint32_t ip_dst; + uint8_t zeros; + uint8_t ip_proto; + uint16_t ip_payload; +} ip_pseudo_header; + +#define IP_PROTO_TCP 6 +#define IP_PROTO_UDP 17 + +#define TCP_HEADER_DATA_OFFSET(tcp) (((be16_to_cpu(tcp->th_offset_flags) >> 12)&0xf) << 2) +#define TCP_FLAGS_ONLY(flags) ((flags)&0x3f) +#define TCP_HEADER_FLAGS(tcp) TCP_FLAGS_ONLY(be16_to_cpu(tcp->th_offset_flags)) + +#define TCP_HEADER_CLEAR_FLAGS(tcp, off) ((tcp)->th_offset_flags &= cpu_to_be16(~TCP_FLAGS_ONLY(off))) + +#define TCP_FLAG_FIN 0x01 +#define TCP_FLAG_PUSH 0x08 + +/* produces ones' complement sum of data */ +static uint16_t ones_complement_sum(uint8_t *data, size_t len) +{ + uint32_t result = 0; + + for (; len > 1; data+=2, len-=2) + { + result += *(uint16_t*)data; + } + + /* add the remainder byte */ + if (len) + { + uint8_t odd[2] = {*data, 0}; + result += *(uint16_t*)odd; + } + + while (result>>16) + result = (result & 0xffff) + (result >> 16); + + return result; +} + +static uint16_t ip_checksum(void *data, size_t len) +{ + return ~ones_complement_sum((uint8_t*)data, len); +} + +static int rtl8139_cplus_transmit_one(RTL8139State *s) +{ + if (!rtl8139_transmitter_enabled(s)) + { + DPRINTF("+++ C+ mode: transmitter disabled\n"); + return 0; + } + + if (!rtl8139_cp_transmitter_enabled(s)) + { + DPRINTF("+++ C+ mode: C+ transmitter disabled\n"); + return 0 ; + } + + PCIDevice *d = PCI_DEVICE(s); + int descriptor = s->currCPlusTxDesc; + + dma_addr_t cplus_tx_ring_desc = rtl8139_addr64(s->TxAddr[0], s->TxAddr[1]); + + /* Normal priority ring */ + cplus_tx_ring_desc += 16 * descriptor; + + DPRINTF("+++ C+ mode reading TX descriptor %d from host memory at " + "%08x %08x = 0x"DMA_ADDR_FMT"\n", descriptor, s->TxAddr[1], + s->TxAddr[0], cplus_tx_ring_desc); + + uint32_t val, txdw0,txdw1,txbufLO,txbufHI; + + pci_dma_read(d, cplus_tx_ring_desc, (uint8_t *)&val, 4); + txdw0 = le32_to_cpu(val); + pci_dma_read(d, cplus_tx_ring_desc+4, (uint8_t *)&val, 4); + txdw1 = le32_to_cpu(val); + pci_dma_read(d, cplus_tx_ring_desc+8, (uint8_t *)&val, 4); + txbufLO = le32_to_cpu(val); + pci_dma_read(d, cplus_tx_ring_desc+12, (uint8_t *)&val, 4); + txbufHI = le32_to_cpu(val); + + DPRINTF("+++ C+ mode TX descriptor %d %08x %08x %08x %08x\n", descriptor, + txdw0, txdw1, txbufLO, txbufHI); + +/* w0 ownership flag */ +#define CP_TX_OWN (1<<31) +/* w0 end of ring flag */ +#define CP_TX_EOR (1<<30) +/* first segment of received packet flag */ +#define CP_TX_FS (1<<29) +/* last segment of received packet flag */ +#define CP_TX_LS (1<<28) +/* large send packet flag */ +#define CP_TX_LGSEN (1<<27) +/* large send MSS mask, bits 16...25 */ +#define CP_TC_LGSEN_MSS_MASK ((1 << 12) - 1) + +/* IP checksum offload flag */ +#define CP_TX_IPCS (1<<18) +/* UDP checksum offload flag */ +#define CP_TX_UDPCS (1<<17) +/* TCP checksum offload flag */ +#define CP_TX_TCPCS (1<<16) + +/* w0 bits 0...15 : buffer size */ +#define CP_TX_BUFFER_SIZE (1<<16) +#define CP_TX_BUFFER_SIZE_MASK (CP_TX_BUFFER_SIZE - 1) +/* w1 add tag flag */ +#define CP_TX_TAGC (1<<17) +/* w1 bits 0...15 : VLAN tag (big endian) */ +#define CP_TX_VLAN_TAG_MASK ((1<<16) - 1) +/* w2 low 32bit of Rx buffer ptr */ +/* w3 high 32bit of Rx buffer ptr */ + +/* set after transmission */ +/* FIFO underrun flag */ +#define CP_TX_STATUS_UNF (1<<25) +/* transmit error summary flag, valid if set any of three below */ +#define CP_TX_STATUS_TES (1<<23) +/* out-of-window collision flag */ +#define CP_TX_STATUS_OWC (1<<22) +/* link failure flag */ +#define CP_TX_STATUS_LNKF (1<<21) +/* excessive collisions flag */ +#define CP_TX_STATUS_EXC (1<<20) + + if (!(txdw0 & CP_TX_OWN)) + { + DPRINTF("C+ Tx mode : descriptor %d is owned by host\n", descriptor); + return 0 ; + } + + DPRINTF("+++ C+ Tx mode : transmitting from descriptor %d\n", descriptor); + + if (txdw0 & CP_TX_FS) + { + DPRINTF("+++ C+ Tx mode : descriptor %d is first segment " + "descriptor\n", descriptor); + + /* reset internal buffer offset */ + s->cplus_txbuffer_offset = 0; + } + + int txsize = txdw0 & CP_TX_BUFFER_SIZE_MASK; + dma_addr_t tx_addr = rtl8139_addr64(txbufLO, txbufHI); + + /* make sure we have enough space to assemble the packet */ + if (!s->cplus_txbuffer) + { + s->cplus_txbuffer_len = CP_TX_BUFFER_SIZE; + s->cplus_txbuffer = g_malloc(s->cplus_txbuffer_len); + s->cplus_txbuffer_offset = 0; + + DPRINTF("+++ C+ mode transmission buffer allocated space %d\n", + s->cplus_txbuffer_len); + } + + if (s->cplus_txbuffer_offset + txsize >= s->cplus_txbuffer_len) + { + /* The spec didn't tell the maximum size, stick to CP_TX_BUFFER_SIZE */ + txsize = s->cplus_txbuffer_len - s->cplus_txbuffer_offset; + DPRINTF("+++ C+ mode transmission buffer overrun, truncated descriptor" + "length to %d\n", txsize); + } + + /* append more data to the packet */ + + DPRINTF("+++ C+ mode transmit reading %d bytes from host memory at " + DMA_ADDR_FMT" to offset %d\n", txsize, tx_addr, + s->cplus_txbuffer_offset); + + pci_dma_read(d, tx_addr, + s->cplus_txbuffer + s->cplus_txbuffer_offset, txsize); + s->cplus_txbuffer_offset += txsize; + + /* seek to next Rx descriptor */ + if (txdw0 & CP_TX_EOR) + { + s->currCPlusTxDesc = 0; + } + else + { + ++s->currCPlusTxDesc; + if (s->currCPlusTxDesc >= 64) + s->currCPlusTxDesc = 0; + } + + /* transfer ownership to target */ + txdw0 &= ~CP_RX_OWN; + + /* reset error indicator bits */ + txdw0 &= ~CP_TX_STATUS_UNF; + txdw0 &= ~CP_TX_STATUS_TES; + txdw0 &= ~CP_TX_STATUS_OWC; + txdw0 &= ~CP_TX_STATUS_LNKF; + txdw0 &= ~CP_TX_STATUS_EXC; + + /* update ring data */ + val = cpu_to_le32(txdw0); + pci_dma_write(d, cplus_tx_ring_desc, (uint8_t *)&val, 4); + + /* Now decide if descriptor being processed is holding the last segment of packet */ + if (txdw0 & CP_TX_LS) + { + uint8_t dot1q_buffer_space[VLAN_HLEN]; + uint16_t *dot1q_buffer; + + DPRINTF("+++ C+ Tx mode : descriptor %d is last segment descriptor\n", + descriptor); + + /* can transfer fully assembled packet */ + + uint8_t *saved_buffer = s->cplus_txbuffer; + int saved_size = s->cplus_txbuffer_offset; + int saved_buffer_len = s->cplus_txbuffer_len; + + /* create vlan tag */ + if (txdw1 & CP_TX_TAGC) { + /* the vlan tag is in BE byte order in the descriptor + * BE + le_to_cpu() + ~swap()~ = cpu */ + DPRINTF("+++ C+ Tx mode : inserting vlan tag with ""tci: %u\n", + bswap16(txdw1 & CP_TX_VLAN_TAG_MASK)); + + dot1q_buffer = (uint16_t *) dot1q_buffer_space; + dot1q_buffer[0] = cpu_to_be16(ETH_P_8021Q); + /* BE + le_to_cpu() + ~cpu_to_le()~ = BE */ + dot1q_buffer[1] = cpu_to_le16(txdw1 & CP_TX_VLAN_TAG_MASK); + } else { + dot1q_buffer = NULL; + } + + /* reset the card space to protect from recursive call */ + s->cplus_txbuffer = NULL; + s->cplus_txbuffer_offset = 0; + s->cplus_txbuffer_len = 0; + + if (txdw0 & (CP_TX_IPCS | CP_TX_UDPCS | CP_TX_TCPCS | CP_TX_LGSEN)) + { + DPRINTF("+++ C+ mode offloaded task checksum\n"); + + /* Large enough for Ethernet and IP headers? */ + if (saved_size < ETH_HLEN + sizeof(ip_header)) { + goto skip_offload; + } + + /* ip packet header */ + ip_header *ip = NULL; + int hlen = 0; + uint8_t ip_protocol = 0; + uint16_t ip_data_len = 0; + + uint8_t *eth_payload_data = NULL; + size_t eth_payload_len = 0; + + int proto = be16_to_cpu(*(uint16_t *)(saved_buffer + 12)); + if (proto != ETH_P_IP) + { + goto skip_offload; + } + + DPRINTF("+++ C+ mode has IP packet\n"); + + /* not aligned */ + eth_payload_data = saved_buffer + ETH_HLEN; + eth_payload_len = saved_size - ETH_HLEN; + + ip = (ip_header*)eth_payload_data; + + if (IP_HEADER_VERSION(ip) != IP_HEADER_VERSION_4) { + DPRINTF("+++ C+ mode packet has bad IP version %d " + "expected %d\n", IP_HEADER_VERSION(ip), + IP_HEADER_VERSION_4); + goto skip_offload; + } + + hlen = IP_HEADER_LENGTH(ip); + if (hlen < sizeof(ip_header) || hlen > eth_payload_len) { + goto skip_offload; + } + + ip_protocol = ip->ip_p; + + ip_data_len = be16_to_cpu(ip->ip_len); + if (ip_data_len < hlen || ip_data_len > eth_payload_len) { + goto skip_offload; + } + ip_data_len -= hlen; + + if (txdw0 & CP_TX_IPCS) + { + DPRINTF("+++ C+ mode need IP checksum\n"); + + ip->ip_sum = 0; + ip->ip_sum = ip_checksum(ip, hlen); + DPRINTF("+++ C+ mode IP header len=%d checksum=%04x\n", + hlen, ip->ip_sum); + } + + if ((txdw0 & CP_TX_LGSEN) && ip_protocol == IP_PROTO_TCP) + { + /* Large enough for the TCP header? */ + if (ip_data_len < sizeof(tcp_header)) { + goto skip_offload; + } + + int large_send_mss = (txdw0 >> 16) & CP_TC_LGSEN_MSS_MASK; + + DPRINTF("+++ C+ mode offloaded task TSO MTU=%d IP data %d " + "frame data %d specified MSS=%d\n", ETH_MTU, + ip_data_len, saved_size - ETH_HLEN, large_send_mss); + + int tcp_send_offset = 0; + int send_count = 0; + + /* maximum IP header length is 60 bytes */ + uint8_t saved_ip_header[60]; + + /* save IP header template; data area is used in tcp checksum calculation */ + memcpy(saved_ip_header, eth_payload_data, hlen); + + /* a placeholder for checksum calculation routine in tcp case */ + uint8_t *data_to_checksum = eth_payload_data + hlen - 12; + // size_t data_to_checksum_len = eth_payload_len - hlen + 12; + + /* pointer to TCP header */ + tcp_header *p_tcp_hdr = (tcp_header*)(eth_payload_data + hlen); + + int tcp_hlen = TCP_HEADER_DATA_OFFSET(p_tcp_hdr); + + /* Invalid TCP data offset? */ + if (tcp_hlen < sizeof(tcp_header) || tcp_hlen > ip_data_len) { + goto skip_offload; + } + + /* ETH_MTU = ip header len + tcp header len + payload */ + int tcp_data_len = ip_data_len - tcp_hlen; + int tcp_chunk_size = ETH_MTU - hlen - tcp_hlen; + + DPRINTF("+++ C+ mode TSO IP data len %d TCP hlen %d TCP " + "data len %d TCP chunk size %d\n", ip_data_len, + tcp_hlen, tcp_data_len, tcp_chunk_size); + + /* note the cycle below overwrites IP header data, + but restores it from saved_ip_header before sending packet */ + + int is_last_frame = 0; + + for (tcp_send_offset = 0; tcp_send_offset < tcp_data_len; tcp_send_offset += tcp_chunk_size) + { + uint16_t chunk_size = tcp_chunk_size; + + /* check if this is the last frame */ + if (tcp_send_offset + tcp_chunk_size >= tcp_data_len) + { + is_last_frame = 1; + chunk_size = tcp_data_len - tcp_send_offset; + } + + DPRINTF("+++ C+ mode TSO TCP seqno %08x\n", + be32_to_cpu(p_tcp_hdr->th_seq)); + + /* add 4 TCP pseudoheader fields */ + /* copy IP source and destination fields */ + memcpy(data_to_checksum, saved_ip_header + 12, 8); + + DPRINTF("+++ C+ mode TSO calculating TCP checksum for " + "packet with %d bytes data\n", tcp_hlen + + chunk_size); + + if (tcp_send_offset) + { + memcpy((uint8_t*)p_tcp_hdr + tcp_hlen, (uint8_t*)p_tcp_hdr + tcp_hlen + tcp_send_offset, chunk_size); + } + + /* keep PUSH and FIN flags only for the last frame */ + if (!is_last_frame) + { + TCP_HEADER_CLEAR_FLAGS(p_tcp_hdr, TCP_FLAG_PUSH|TCP_FLAG_FIN); + } + + /* recalculate TCP checksum */ + ip_pseudo_header *p_tcpip_hdr = (ip_pseudo_header *)data_to_checksum; + p_tcpip_hdr->zeros = 0; + p_tcpip_hdr->ip_proto = IP_PROTO_TCP; + p_tcpip_hdr->ip_payload = cpu_to_be16(tcp_hlen + chunk_size); + + p_tcp_hdr->th_sum = 0; + + int tcp_checksum = ip_checksum(data_to_checksum, tcp_hlen + chunk_size + 12); + DPRINTF("+++ C+ mode TSO TCP checksum %04x\n", + tcp_checksum); + + p_tcp_hdr->th_sum = tcp_checksum; + + /* restore IP header */ + memcpy(eth_payload_data, saved_ip_header, hlen); + + /* set IP data length and recalculate IP checksum */ + ip->ip_len = cpu_to_be16(hlen + tcp_hlen + chunk_size); + + /* increment IP id for subsequent frames */ + ip->ip_id = cpu_to_be16(tcp_send_offset/tcp_chunk_size + be16_to_cpu(ip->ip_id)); + + ip->ip_sum = 0; + ip->ip_sum = ip_checksum(eth_payload_data, hlen); + DPRINTF("+++ C+ mode TSO IP header len=%d " + "checksum=%04x\n", hlen, ip->ip_sum); + + int tso_send_size = ETH_HLEN + hlen + tcp_hlen + chunk_size; + DPRINTF("+++ C+ mode TSO transferring packet size " + "%d\n", tso_send_size); + rtl8139_transfer_frame(s, saved_buffer, tso_send_size, + 0, (uint8_t *) dot1q_buffer); + + /* add transferred count to TCP sequence number */ + p_tcp_hdr->th_seq = cpu_to_be32(chunk_size + be32_to_cpu(p_tcp_hdr->th_seq)); + ++send_count; + } + + /* Stop sending this frame */ + saved_size = 0; + } + else if (txdw0 & (CP_TX_TCPCS|CP_TX_UDPCS)) + { + DPRINTF("+++ C+ mode need TCP or UDP checksum\n"); + + /* maximum IP header length is 60 bytes */ + uint8_t saved_ip_header[60]; + memcpy(saved_ip_header, eth_payload_data, hlen); + + uint8_t *data_to_checksum = eth_payload_data + hlen - 12; + // size_t data_to_checksum_len = eth_payload_len - hlen + 12; + + /* add 4 TCP pseudoheader fields */ + /* copy IP source and destination fields */ + memcpy(data_to_checksum, saved_ip_header + 12, 8); + + if ((txdw0 & CP_TX_TCPCS) && ip_protocol == IP_PROTO_TCP) + { + DPRINTF("+++ C+ mode calculating TCP checksum for " + "packet with %d bytes data\n", ip_data_len); + + ip_pseudo_header *p_tcpip_hdr = (ip_pseudo_header *)data_to_checksum; + p_tcpip_hdr->zeros = 0; + p_tcpip_hdr->ip_proto = IP_PROTO_TCP; + p_tcpip_hdr->ip_payload = cpu_to_be16(ip_data_len); + + tcp_header* p_tcp_hdr = (tcp_header *) (data_to_checksum+12); + + p_tcp_hdr->th_sum = 0; + + int tcp_checksum = ip_checksum(data_to_checksum, ip_data_len + 12); + DPRINTF("+++ C+ mode TCP checksum %04x\n", + tcp_checksum); + + p_tcp_hdr->th_sum = tcp_checksum; + } + else if ((txdw0 & CP_TX_UDPCS) && ip_protocol == IP_PROTO_UDP) + { + DPRINTF("+++ C+ mode calculating UDP checksum for " + "packet with %d bytes data\n", ip_data_len); + + ip_pseudo_header *p_udpip_hdr = (ip_pseudo_header *)data_to_checksum; + p_udpip_hdr->zeros = 0; + p_udpip_hdr->ip_proto = IP_PROTO_UDP; + p_udpip_hdr->ip_payload = cpu_to_be16(ip_data_len); + + udp_header *p_udp_hdr = (udp_header *) (data_to_checksum+12); + + p_udp_hdr->uh_sum = 0; + + int udp_checksum = ip_checksum(data_to_checksum, ip_data_len + 12); + DPRINTF("+++ C+ mode UDP checksum %04x\n", + udp_checksum); + + p_udp_hdr->uh_sum = udp_checksum; + } + + /* restore IP header */ + memcpy(eth_payload_data, saved_ip_header, hlen); + } + } + +skip_offload: + /* update tally counter */ + ++s->tally_counters.TxOk; + + DPRINTF("+++ C+ mode transmitting %d bytes packet\n", saved_size); + + rtl8139_transfer_frame(s, saved_buffer, saved_size, 1, + (uint8_t *) dot1q_buffer); + + /* restore card space if there was no recursion and reset offset */ + if (!s->cplus_txbuffer) + { + s->cplus_txbuffer = saved_buffer; + s->cplus_txbuffer_len = saved_buffer_len; + s->cplus_txbuffer_offset = 0; + } + else + { + g_free(saved_buffer); + } + } + else + { + DPRINTF("+++ C+ mode transmission continue to next descriptor\n"); + } + + return 1; +} + +static void rtl8139_cplus_transmit(RTL8139State *s) +{ + int txcount = 0; + + while (rtl8139_cplus_transmit_one(s)) + { + ++txcount; + } + + /* Mark transfer completed */ + if (!txcount) + { + DPRINTF("C+ mode : transmitter queue stalled, current TxDesc = %d\n", + s->currCPlusTxDesc); + } + else + { + /* update interrupt status */ + s->IntrStatus |= TxOK; + rtl8139_update_irq(s); + } +} + +static void rtl8139_transmit(RTL8139State *s) +{ + int descriptor = s->currTxDesc, txcount = 0; + + /*while*/ + if (rtl8139_transmit_one(s, descriptor)) + { + ++s->currTxDesc; + s->currTxDesc %= 4; + ++txcount; + } + + /* Mark transfer completed */ + if (!txcount) + { + DPRINTF("transmitter queue stalled, current TxDesc = %d\n", + s->currTxDesc); + } +} + +static void rtl8139_TxStatus_write(RTL8139State *s, uint32_t txRegOffset, uint32_t val) +{ + + int descriptor = txRegOffset/4; + + /* handle C+ transmit mode register configuration */ + + if (s->cplus_enabled) + { + DPRINTF("RTL8139C+ DTCCR write offset=0x%x val=0x%08x " + "descriptor=%d\n", txRegOffset, val, descriptor); + + /* handle Dump Tally Counters command */ + s->TxStatus[descriptor] = val; + + if (descriptor == 0 && (val & 0x8)) + { + hwaddr tc_addr = rtl8139_addr64(s->TxStatus[0] & ~0x3f, s->TxStatus[1]); + + /* dump tally counters to specified memory location */ + RTL8139TallyCounters_dma_write(s, tc_addr); + + /* mark dump completed */ + s->TxStatus[0] &= ~0x8; + } + + return; + } + + DPRINTF("TxStatus write offset=0x%x val=0x%08x descriptor=%d\n", + txRegOffset, val, descriptor); + + /* mask only reserved bits */ + val &= ~0xff00c000; /* these bits are reset on write */ + val = SET_MASKED(val, 0x00c00000, s->TxStatus[descriptor]); + + s->TxStatus[descriptor] = val; + + /* attempt to start transmission */ + rtl8139_transmit(s); +} + +static uint32_t rtl8139_TxStatus_TxAddr_read(RTL8139State *s, uint32_t regs[], + uint32_t base, uint8_t addr, + int size) +{ + uint32_t reg = (addr - base) / 4; + uint32_t offset = addr & 0x3; + uint32_t ret = 0; + + if (addr & (size - 1)) { + DPRINTF("not implemented read for TxStatus/TxAddr " + "addr=0x%x size=0x%x\n", addr, size); + return ret; + } + + switch (size) { + case 1: /* fall through */ + case 2: /* fall through */ + case 4: + ret = (regs[reg] >> offset * 8) & (((uint64_t)1 << (size * 8)) - 1); + DPRINTF("TxStatus/TxAddr[%d] read addr=0x%x size=0x%x val=0x%08x\n", + reg, addr, size, ret); + break; + default: + DPRINTF("unsupported size 0x%x of TxStatus/TxAddr reading\n", size); + break; + } + + return ret; +} + +static uint16_t rtl8139_TSAD_read(RTL8139State *s) +{ + uint16_t ret = 0; + + /* Simulate TSAD, it is read only anyway */ + + ret = ((s->TxStatus[3] & TxStatOK )?TSAD_TOK3:0) + |((s->TxStatus[2] & TxStatOK )?TSAD_TOK2:0) + |((s->TxStatus[1] & TxStatOK )?TSAD_TOK1:0) + |((s->TxStatus[0] & TxStatOK )?TSAD_TOK0:0) + + |((s->TxStatus[3] & TxUnderrun)?TSAD_TUN3:0) + |((s->TxStatus[2] & TxUnderrun)?TSAD_TUN2:0) + |((s->TxStatus[1] & TxUnderrun)?TSAD_TUN1:0) + |((s->TxStatus[0] & TxUnderrun)?TSAD_TUN0:0) + + |((s->TxStatus[3] & TxAborted )?TSAD_TABT3:0) + |((s->TxStatus[2] & TxAborted )?TSAD_TABT2:0) + |((s->TxStatus[1] & TxAborted )?TSAD_TABT1:0) + |((s->TxStatus[0] & TxAborted )?TSAD_TABT0:0) + + |((s->TxStatus[3] & TxHostOwns )?TSAD_OWN3:0) + |((s->TxStatus[2] & TxHostOwns )?TSAD_OWN2:0) + |((s->TxStatus[1] & TxHostOwns )?TSAD_OWN1:0) + |((s->TxStatus[0] & TxHostOwns )?TSAD_OWN0:0) ; + + + DPRINTF("TSAD read val=0x%04x\n", ret); + + return ret; +} + +static uint16_t rtl8139_CSCR_read(RTL8139State *s) +{ + uint16_t ret = s->CSCR; + + DPRINTF("CSCR read val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_TxAddr_write(RTL8139State *s, uint32_t txAddrOffset, uint32_t val) +{ + DPRINTF("TxAddr write offset=0x%x val=0x%08x\n", txAddrOffset, val); + + s->TxAddr[txAddrOffset/4] = val; +} + +static uint32_t rtl8139_TxAddr_read(RTL8139State *s, uint32_t txAddrOffset) +{ + uint32_t ret = s->TxAddr[txAddrOffset/4]; + + DPRINTF("TxAddr read offset=0x%x val=0x%08x\n", txAddrOffset, ret); + + return ret; +} + +static void rtl8139_RxBufPtr_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("RxBufPtr write val=0x%04x\n", val); + + /* this value is off by 16 */ + s->RxBufPtr = MOD2(val + 0x10, s->RxBufferSize); + + /* more buffer space may be available so try to receive */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + + DPRINTF(" CAPR write: rx buffer length %d head 0x%04x read 0x%04x\n", + s->RxBufferSize, s->RxBufAddr, s->RxBufPtr); +} + +static uint32_t rtl8139_RxBufPtr_read(RTL8139State *s) +{ + /* this value is off by 16 */ + uint32_t ret = s->RxBufPtr - 0x10; + + DPRINTF("RxBufPtr read val=0x%04x\n", ret); + + return ret; +} + +static uint32_t rtl8139_RxBufAddr_read(RTL8139State *s) +{ + /* this value is NOT off by 16 */ + uint32_t ret = s->RxBufAddr; + + DPRINTF("RxBufAddr read val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_RxBuf_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("RxBuf write val=0x%08x\n", val); + + s->RxBuf = val; + + /* may need to reset rxring here */ +} + +static uint32_t rtl8139_RxBuf_read(RTL8139State *s) +{ + uint32_t ret = s->RxBuf; + + DPRINTF("RxBuf read val=0x%08x\n", ret); + + return ret; +} + +static void rtl8139_IntrMask_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("IntrMask write(w) val=0x%04x\n", val); + + /* mask unwritable bits */ + val = SET_MASKED(val, 0x1e00, s->IntrMask); + + s->IntrMask = val; + + rtl8139_update_irq(s); + +} + +static uint32_t rtl8139_IntrMask_read(RTL8139State *s) +{ + uint32_t ret = s->IntrMask; + + DPRINTF("IntrMask read(w) val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_IntrStatus_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("IntrStatus write(w) val=0x%04x\n", val); + +#if 0 + + /* writing to ISR has no effect */ + + return; + +#else + uint16_t newStatus = s->IntrStatus & ~val; + + /* mask unwritable bits */ + newStatus = SET_MASKED(newStatus, 0x1e00, s->IntrStatus); + + /* writing 1 to interrupt status register bit clears it */ + s->IntrStatus = 0; + rtl8139_update_irq(s); + + s->IntrStatus = newStatus; + rtl8139_set_next_tctr_time(s); + rtl8139_update_irq(s); + +#endif +} + +static uint32_t rtl8139_IntrStatus_read(RTL8139State *s) +{ + uint32_t ret = s->IntrStatus; + + DPRINTF("IntrStatus read(w) val=0x%04x\n", ret); + +#if 0 + + /* reading ISR clears all interrupts */ + s->IntrStatus = 0; + + rtl8139_update_irq(s); + +#endif + + return ret; +} + +static void rtl8139_MultiIntr_write(RTL8139State *s, uint32_t val) +{ + DPRINTF("MultiIntr write(w) val=0x%04x\n", val); + + /* mask unwritable bits */ + val = SET_MASKED(val, 0xf000, s->MultiIntr); + + s->MultiIntr = val; +} + +static uint32_t rtl8139_MultiIntr_read(RTL8139State *s) +{ + uint32_t ret = s->MultiIntr; + + DPRINTF("MultiIntr read(w) val=0x%04x\n", ret); + + return ret; +} + +static void rtl8139_io_writeb(void *opaque, uint8_t addr, uint32_t val) +{ + RTL8139State *s = opaque; + + switch (addr) + { + case MAC0 ... MAC0+4: + s->phys[addr - MAC0] = val; + break; + case MAC0+5: + s->phys[addr - MAC0] = val; + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys); + break; + case MAC0+6 ... MAC0+7: + /* reserved */ + break; + case MAR0 ... MAR0+7: + s->mult[addr - MAR0] = val; + break; + case ChipCmd: + rtl8139_ChipCmd_write(s, val); + break; + case Cfg9346: + rtl8139_Cfg9346_write(s, val); + break; + case TxConfig: /* windows driver sometimes writes using byte-lenth call */ + rtl8139_TxConfig_writeb(s, val); + break; + case Config0: + rtl8139_Config0_write(s, val); + break; + case Config1: + rtl8139_Config1_write(s, val); + break; + case Config3: + rtl8139_Config3_write(s, val); + break; + case Config4: + rtl8139_Config4_write(s, val); + break; + case Config5: + rtl8139_Config5_write(s, val); + break; + case MediaStatus: + /* ignore */ + DPRINTF("not implemented write(b) to MediaStatus val=0x%02x\n", + val); + break; + + case HltClk: + DPRINTF("HltClk write val=0x%08x\n", val); + if (val == 'R') + { + s->clock_enabled = 1; + } + else if (val == 'H') + { + s->clock_enabled = 0; + } + break; + + case TxThresh: + DPRINTF("C+ TxThresh write(b) val=0x%02x\n", val); + s->TxThresh = val; + break; + + case TxPoll: + DPRINTF("C+ TxPoll write(b) val=0x%02x\n", val); + if (val & (1 << 7)) + { + DPRINTF("C+ TxPoll high priority transmission (not " + "implemented)\n"); + //rtl8139_cplus_transmit(s); + } + if (val & (1 << 6)) + { + DPRINTF("C+ TxPoll normal priority transmission\n"); + rtl8139_cplus_transmit(s); + } + + break; + + default: + DPRINTF("not implemented write(b) addr=0x%x val=0x%02x\n", addr, + val); + break; + } +} + +static void rtl8139_io_writew(void *opaque, uint8_t addr, uint32_t val) +{ + RTL8139State *s = opaque; + + switch (addr) + { + case IntrMask: + rtl8139_IntrMask_write(s, val); + break; + + case IntrStatus: + rtl8139_IntrStatus_write(s, val); + break; + + case MultiIntr: + rtl8139_MultiIntr_write(s, val); + break; + + case RxBufPtr: + rtl8139_RxBufPtr_write(s, val); + break; + + case BasicModeCtrl: + rtl8139_BasicModeCtrl_write(s, val); + break; + case BasicModeStatus: + rtl8139_BasicModeStatus_write(s, val); + break; + case NWayAdvert: + DPRINTF("NWayAdvert write(w) val=0x%04x\n", val); + s->NWayAdvert = val; + break; + case NWayLPAR: + DPRINTF("forbidden NWayLPAR write(w) val=0x%04x\n", val); + break; + case NWayExpansion: + DPRINTF("NWayExpansion write(w) val=0x%04x\n", val); + s->NWayExpansion = val; + break; + + case CpCmd: + rtl8139_CpCmd_write(s, val); + break; + + case IntrMitigate: + rtl8139_IntrMitigate_write(s, val); + break; + + default: + DPRINTF("ioport write(w) addr=0x%x val=0x%04x via write(b)\n", + addr, val); + + rtl8139_io_writeb(opaque, addr, val & 0xff); + rtl8139_io_writeb(opaque, addr + 1, (val >> 8) & 0xff); + break; + } +} + +static void rtl8139_set_next_tctr_time(RTL8139State *s) +{ + const uint64_t ns_per_period = + muldiv64(0x100000000LL, get_ticks_per_sec(), PCI_FREQUENCY); + + DPRINTF("entered rtl8139_set_next_tctr_time\n"); + + /* This function is called at least once per period, so it is a good + * place to update the timer base. + * + * After one iteration of this loop the value in the Timer register does + * not change, but the device model is counting up by 2^32 ticks (approx. + * 130 seconds). + */ + while (s->TCTR_base + ns_per_period <= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)) { + s->TCTR_base += ns_per_period; + } + + if (!s->TimerInt) { + timer_del(s->timer); + } else { + uint64_t delta = muldiv64(s->TimerInt, get_ticks_per_sec(), PCI_FREQUENCY); + if (s->TCTR_base + delta <= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)) { + delta += ns_per_period; + } + timer_mod(s->timer, s->TCTR_base + delta); + } +} + +static void rtl8139_io_writel(void *opaque, uint8_t addr, uint32_t val) +{ + RTL8139State *s = opaque; + + switch (addr) + { + case RxMissed: + DPRINTF("RxMissed clearing on write\n"); + s->RxMissed = 0; + break; + + case TxConfig: + rtl8139_TxConfig_write(s, val); + break; + + case RxConfig: + rtl8139_RxConfig_write(s, val); + break; + + case TxStatus0 ... TxStatus0+4*4-1: + rtl8139_TxStatus_write(s, addr-TxStatus0, val); + break; + + case TxAddr0 ... TxAddr0+4*4-1: + rtl8139_TxAddr_write(s, addr-TxAddr0, val); + break; + + case RxBuf: + rtl8139_RxBuf_write(s, val); + break; + + case RxRingAddrLO: + DPRINTF("C+ RxRing low bits write val=0x%08x\n", val); + s->RxRingAddrLO = val; + break; + + case RxRingAddrHI: + DPRINTF("C+ RxRing high bits write val=0x%08x\n", val); + s->RxRingAddrHI = val; + break; + + case Timer: + DPRINTF("TCTR Timer reset on write\n"); + s->TCTR_base = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + rtl8139_set_next_tctr_time(s); + break; + + case FlashReg: + DPRINTF("FlashReg TimerInt write val=0x%08x\n", val); + if (s->TimerInt != val) { + s->TimerInt = val; + rtl8139_set_next_tctr_time(s); + } + break; + + default: + DPRINTF("ioport write(l) addr=0x%x val=0x%08x via write(b)\n", + addr, val); + rtl8139_io_writeb(opaque, addr, val & 0xff); + rtl8139_io_writeb(opaque, addr + 1, (val >> 8) & 0xff); + rtl8139_io_writeb(opaque, addr + 2, (val >> 16) & 0xff); + rtl8139_io_writeb(opaque, addr + 3, (val >> 24) & 0xff); + break; + } +} + +static uint32_t rtl8139_io_readb(void *opaque, uint8_t addr) +{ + RTL8139State *s = opaque; + int ret; + + switch (addr) + { + case MAC0 ... MAC0+5: + ret = s->phys[addr - MAC0]; + break; + case MAC0+6 ... MAC0+7: + ret = 0; + break; + case MAR0 ... MAR0+7: + ret = s->mult[addr - MAR0]; + break; + case TxStatus0 ... TxStatus0+4*4-1: + ret = rtl8139_TxStatus_TxAddr_read(s, s->TxStatus, TxStatus0, + addr, 1); + break; + case ChipCmd: + ret = rtl8139_ChipCmd_read(s); + break; + case Cfg9346: + ret = rtl8139_Cfg9346_read(s); + break; + case Config0: + ret = rtl8139_Config0_read(s); + break; + case Config1: + ret = rtl8139_Config1_read(s); + break; + case Config3: + ret = rtl8139_Config3_read(s); + break; + case Config4: + ret = rtl8139_Config4_read(s); + break; + case Config5: + ret = rtl8139_Config5_read(s); + break; + + case MediaStatus: + /* The LinkDown bit of MediaStatus is inverse with link status */ + ret = 0xd0 | (~s->BasicModeStatus & 0x04); + DPRINTF("MediaStatus read 0x%x\n", ret); + break; + + case HltClk: + ret = s->clock_enabled; + DPRINTF("HltClk read 0x%x\n", ret); + break; + + case PCIRevisionID: + ret = RTL8139_PCI_REVID; + DPRINTF("PCI Revision ID read 0x%x\n", ret); + break; + + case TxThresh: + ret = s->TxThresh; + DPRINTF("C+ TxThresh read(b) val=0x%02x\n", ret); + break; + + case 0x43: /* Part of TxConfig register. Windows driver tries to read it */ + ret = s->TxConfig >> 24; + DPRINTF("RTL8139C TxConfig at 0x43 read(b) val=0x%02x\n", ret); + break; + + default: + DPRINTF("not implemented read(b) addr=0x%x\n", addr); + ret = 0; + break; + } + + return ret; +} + +static uint32_t rtl8139_io_readw(void *opaque, uint8_t addr) +{ + RTL8139State *s = opaque; + uint32_t ret; + + switch (addr) + { + case TxAddr0 ... TxAddr0+4*4-1: + ret = rtl8139_TxStatus_TxAddr_read(s, s->TxAddr, TxAddr0, addr, 2); + break; + case IntrMask: + ret = rtl8139_IntrMask_read(s); + break; + + case IntrStatus: + ret = rtl8139_IntrStatus_read(s); + break; + + case MultiIntr: + ret = rtl8139_MultiIntr_read(s); + break; + + case RxBufPtr: + ret = rtl8139_RxBufPtr_read(s); + break; + + case RxBufAddr: + ret = rtl8139_RxBufAddr_read(s); + break; + + case BasicModeCtrl: + ret = rtl8139_BasicModeCtrl_read(s); + break; + case BasicModeStatus: + ret = rtl8139_BasicModeStatus_read(s); + break; + case NWayAdvert: + ret = s->NWayAdvert; + DPRINTF("NWayAdvert read(w) val=0x%04x\n", ret); + break; + case NWayLPAR: + ret = s->NWayLPAR; + DPRINTF("NWayLPAR read(w) val=0x%04x\n", ret); + break; + case NWayExpansion: + ret = s->NWayExpansion; + DPRINTF("NWayExpansion read(w) val=0x%04x\n", ret); + break; + + case CpCmd: + ret = rtl8139_CpCmd_read(s); + break; + + case IntrMitigate: + ret = rtl8139_IntrMitigate_read(s); + break; + + case TxSummary: + ret = rtl8139_TSAD_read(s); + break; + + case CSCR: + ret = rtl8139_CSCR_read(s); + break; + + default: + DPRINTF("ioport read(w) addr=0x%x via read(b)\n", addr); + + ret = rtl8139_io_readb(opaque, addr); + ret |= rtl8139_io_readb(opaque, addr + 1) << 8; + + DPRINTF("ioport read(w) addr=0x%x val=0x%04x\n", addr, ret); + break; + } + + return ret; +} + +static uint32_t rtl8139_io_readl(void *opaque, uint8_t addr) +{ + RTL8139State *s = opaque; + uint32_t ret; + + switch (addr) + { + case RxMissed: + ret = s->RxMissed; + + DPRINTF("RxMissed read val=0x%08x\n", ret); + break; + + case TxConfig: + ret = rtl8139_TxConfig_read(s); + break; + + case RxConfig: + ret = rtl8139_RxConfig_read(s); + break; + + case TxStatus0 ... TxStatus0+4*4-1: + ret = rtl8139_TxStatus_TxAddr_read(s, s->TxStatus, TxStatus0, + addr, 4); + break; + + case TxAddr0 ... TxAddr0+4*4-1: + ret = rtl8139_TxAddr_read(s, addr-TxAddr0); + break; + + case RxBuf: + ret = rtl8139_RxBuf_read(s); + break; + + case RxRingAddrLO: + ret = s->RxRingAddrLO; + DPRINTF("C+ RxRing low bits read val=0x%08x\n", ret); + break; + + case RxRingAddrHI: + ret = s->RxRingAddrHI; + DPRINTF("C+ RxRing high bits read val=0x%08x\n", ret); + break; + + case Timer: + ret = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->TCTR_base, + PCI_FREQUENCY, get_ticks_per_sec()); + DPRINTF("TCTR Timer read val=0x%08x\n", ret); + break; + + case FlashReg: + ret = s->TimerInt; + DPRINTF("FlashReg TimerInt read val=0x%08x\n", ret); + break; + + default: + DPRINTF("ioport read(l) addr=0x%x via read(b)\n", addr); + + ret = rtl8139_io_readb(opaque, addr); + ret |= rtl8139_io_readb(opaque, addr + 1) << 8; + ret |= rtl8139_io_readb(opaque, addr + 2) << 16; + ret |= rtl8139_io_readb(opaque, addr + 3) << 24; + + DPRINTF("read(l) addr=0x%x val=%08x\n", addr, ret); + break; + } + + return ret; +} + +/* */ + +static void rtl8139_mmio_writeb(void *opaque, hwaddr addr, uint32_t val) +{ + rtl8139_io_writeb(opaque, addr & 0xFF, val); +} + +static void rtl8139_mmio_writew(void *opaque, hwaddr addr, uint32_t val) +{ + rtl8139_io_writew(opaque, addr & 0xFF, val); +} + +static void rtl8139_mmio_writel(void *opaque, hwaddr addr, uint32_t val) +{ + rtl8139_io_writel(opaque, addr & 0xFF, val); +} + +static uint32_t rtl8139_mmio_readb(void *opaque, hwaddr addr) +{ + return rtl8139_io_readb(opaque, addr & 0xFF); +} + +static uint32_t rtl8139_mmio_readw(void *opaque, hwaddr addr) +{ + uint32_t val = rtl8139_io_readw(opaque, addr & 0xFF); + return val; +} + +static uint32_t rtl8139_mmio_readl(void *opaque, hwaddr addr) +{ + uint32_t val = rtl8139_io_readl(opaque, addr & 0xFF); + return val; +} + +static int rtl8139_post_load(void *opaque, int version_id) +{ + RTL8139State* s = opaque; + rtl8139_set_next_tctr_time(s); + if (version_id < 4) { + s->cplus_enabled = s->CpCmd != 0; + } + + /* nc.link_down can't be migrated, so infer link_down according + * to link status bit in BasicModeStatus */ + qemu_get_queue(s->nic)->link_down = (s->BasicModeStatus & 0x04) == 0; + + return 0; +} + +static bool rtl8139_hotplug_ready_needed(void *opaque) +{ + return qdev_machine_modified(); +} + +static const VMStateDescription vmstate_rtl8139_hotplug_ready ={ + .name = "rtl8139/hotplug_ready", + .version_id = 1, + .minimum_version_id = 1, + .needed = rtl8139_hotplug_ready_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +static void rtl8139_pre_save(void *opaque) +{ + RTL8139State* s = opaque; + int64_t current_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + /* for migration to older versions */ + s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY, + get_ticks_per_sec()); + s->rtl8139_mmio_io_addr_dummy = 0; +} + +static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", + .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, RTL8139State), + VMSTATE_PARTIAL_BUFFER(phys, RTL8139State, 6), + VMSTATE_BUFFER(mult, RTL8139State), + VMSTATE_UINT32_ARRAY(TxStatus, RTL8139State, 4), + VMSTATE_UINT32_ARRAY(TxAddr, RTL8139State, 4), + + VMSTATE_UINT32(RxBuf, RTL8139State), + VMSTATE_UINT32(RxBufferSize, RTL8139State), + VMSTATE_UINT32(RxBufPtr, RTL8139State), + VMSTATE_UINT32(RxBufAddr, RTL8139State), + + VMSTATE_UINT16(IntrStatus, RTL8139State), + VMSTATE_UINT16(IntrMask, RTL8139State), + + VMSTATE_UINT32(TxConfig, RTL8139State), + VMSTATE_UINT32(RxConfig, RTL8139State), + VMSTATE_UINT32(RxMissed, RTL8139State), + VMSTATE_UINT16(CSCR, RTL8139State), + + VMSTATE_UINT8(Cfg9346, RTL8139State), + VMSTATE_UINT8(Config0, RTL8139State), + VMSTATE_UINT8(Config1, RTL8139State), + VMSTATE_UINT8(Config3, RTL8139State), + VMSTATE_UINT8(Config4, RTL8139State), + VMSTATE_UINT8(Config5, RTL8139State), + + VMSTATE_UINT8(clock_enabled, RTL8139State), + VMSTATE_UINT8(bChipCmdState, RTL8139State), + + VMSTATE_UINT16(MultiIntr, RTL8139State), + + VMSTATE_UINT16(BasicModeCtrl, RTL8139State), + VMSTATE_UINT16(BasicModeStatus, RTL8139State), + VMSTATE_UINT16(NWayAdvert, RTL8139State), + VMSTATE_UINT16(NWayLPAR, RTL8139State), + VMSTATE_UINT16(NWayExpansion, RTL8139State), + + VMSTATE_UINT16(CpCmd, RTL8139State), + VMSTATE_UINT8(TxThresh, RTL8139State), + + VMSTATE_UNUSED(4), + VMSTATE_MACADDR(conf.macaddr, RTL8139State), + VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State), + + VMSTATE_UINT32(currTxDesc, RTL8139State), + VMSTATE_UINT32(currCPlusRxDesc, RTL8139State), + VMSTATE_UINT32(currCPlusTxDesc, RTL8139State), + VMSTATE_UINT32(RxRingAddrLO, RTL8139State), + VMSTATE_UINT32(RxRingAddrHI, RTL8139State), + + VMSTATE_UINT16_ARRAY(eeprom.contents, RTL8139State, EEPROM_9346_SIZE), + VMSTATE_INT32(eeprom.mode, RTL8139State), + VMSTATE_UINT32(eeprom.tick, RTL8139State), + VMSTATE_UINT8(eeprom.address, RTL8139State), + VMSTATE_UINT16(eeprom.input, RTL8139State), + VMSTATE_UINT16(eeprom.output, RTL8139State), + + VMSTATE_UINT8(eeprom.eecs, RTL8139State), + VMSTATE_UINT8(eeprom.eesk, RTL8139State), + VMSTATE_UINT8(eeprom.eedi, RTL8139State), + VMSTATE_UINT8(eeprom.eedo, RTL8139State), + + VMSTATE_UINT32(TCTR, RTL8139State), + VMSTATE_UINT32(TimerInt, RTL8139State), + VMSTATE_INT64(TCTR_base, RTL8139State), + + VMSTATE_STRUCT(tally_counters, RTL8139State, 0, + vmstate_tally_counters, RTL8139TallyCounters), + + VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_rtl8139_hotplug_ready, + NULL + } +}; + +/***********************************************************/ +/* PCI RTL8139 definitions */ + +static void rtl8139_ioport_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + switch (size) { + case 1: + rtl8139_io_writeb(opaque, addr, val); + break; + case 2: + rtl8139_io_writew(opaque, addr, val); + break; + case 4: + rtl8139_io_writel(opaque, addr, val); + break; + } +} + +static uint64_t rtl8139_ioport_read(void *opaque, hwaddr addr, + unsigned size) +{ + switch (size) { + case 1: + return rtl8139_io_readb(opaque, addr); + case 2: + return rtl8139_io_readw(opaque, addr); + case 4: + return rtl8139_io_readl(opaque, addr); + } + + return -1; +} + +static const MemoryRegionOps rtl8139_io_ops = { + .read = rtl8139_ioport_read, + .write = rtl8139_ioport_write, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static const MemoryRegionOps rtl8139_mmio_ops = { + .old_mmio = { + .read = { + rtl8139_mmio_readb, + rtl8139_mmio_readw, + rtl8139_mmio_readl, + }, + .write = { + rtl8139_mmio_writeb, + rtl8139_mmio_writew, + rtl8139_mmio_writel, + }, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void rtl8139_timer(void *opaque) +{ + RTL8139State *s = opaque; + + if (!s->clock_enabled) + { + DPRINTF(">>> timer: clock is not running\n"); + return; + } + + s->IntrStatus |= PCSTimeout; + rtl8139_update_irq(s); + rtl8139_set_next_tctr_time(s); +} + +static void pci_rtl8139_uninit(PCIDevice *dev) +{ + RTL8139State *s = RTL8139(dev); + + if (s->cplus_txbuffer) { + g_free(s->cplus_txbuffer); + s->cplus_txbuffer = NULL; + } + timer_del(s->timer); + timer_free(s->timer); + qemu_del_nic(s->nic); +} + +static void rtl8139_set_link_status(NetClientState *nc) +{ + RTL8139State *s = qemu_get_nic_opaque(nc); + + if (nc->link_down) { + s->BasicModeStatus &= ~0x04; + } else { + s->BasicModeStatus |= 0x04; + } + + s->IntrStatus |= RxUnderrun; + rtl8139_update_irq(s); +} + +static NetClientInfo net_rtl8139_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = rtl8139_can_receive, + .receive = rtl8139_receive, + .link_status_changed = rtl8139_set_link_status, +}; + +static void pci_rtl8139_realize(PCIDevice *dev, Error **errp) +{ + RTL8139State *s = RTL8139(dev); + DeviceState *d = DEVICE(dev); + uint8_t *pci_conf; + + pci_conf = dev->config; + pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ + /* TODO: start of capability list, but no capability + * list bit in status register, and offset 0xdc seems unused. */ + pci_conf[PCI_CAPABILITY_LIST] = 0xdc; + + memory_region_init_io(&s->bar_io, OBJECT(s), &rtl8139_io_ops, s, + "rtl8139", 0x100); + memory_region_init_io(&s->bar_mem, OBJECT(s), &rtl8139_mmio_ops, s, + "rtl8139", 0x100); + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->bar_io); + pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar_mem); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + /* prepare eeprom */ + s->eeprom.contents[0] = 0x8129; +#if 1 + /* PCI vendor and device ID should be mirrored here */ + s->eeprom.contents[1] = PCI_VENDOR_ID_REALTEK; + s->eeprom.contents[2] = PCI_DEVICE_ID_REALTEK_8139; +#endif + s->eeprom.contents[7] = s->conf.macaddr.a[0] | s->conf.macaddr.a[1] << 8; + s->eeprom.contents[8] = s->conf.macaddr.a[2] | s->conf.macaddr.a[3] << 8; + s->eeprom.contents[9] = s->conf.macaddr.a[4] | s->conf.macaddr.a[5] << 8; + + s->nic = qemu_new_nic(&net_rtl8139_info, &s->conf, + object_get_typename(OBJECT(dev)), d->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + s->cplus_txbuffer = NULL; + s->cplus_txbuffer_len = 0; + s->cplus_txbuffer_offset = 0; + + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, rtl8139_timer, s); +} + +static void rtl8139_instance_init(Object *obj) +{ + RTL8139State *s = RTL8139(obj); + + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static Property rtl8139_properties[] = { + DEFINE_NIC_PROPERTIES(RTL8139State, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void rtl8139_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = pci_rtl8139_realize; + k->exit = pci_rtl8139_uninit; + k->romfile = "efi-rtl8139.rom"; + k->vendor_id = PCI_VENDOR_ID_REALTEK; + k->device_id = PCI_DEVICE_ID_REALTEK_8139; + k->revision = RTL8139_PCI_REVID; /* >=0x20 is for 8139C+ */ + k->class_id = PCI_CLASS_NETWORK_ETHERNET; + dc->reset = rtl8139_reset; + dc->vmsd = &vmstate_rtl8139; + dc->props = rtl8139_properties; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static const TypeInfo rtl8139_info = { + .name = TYPE_RTL8139, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(RTL8139State), + .class_init = rtl8139_class_init, + .instance_init = rtl8139_instance_init, +}; + +static void rtl8139_register_types(void) +{ + type_register_static(&rtl8139_info); +} + +type_init(rtl8139_register_types) diff --git a/qemu/hw/net/smc91c111.c b/qemu/hw/net/smc91c111.c new file mode 100644 index 000000000..74e06e6c7 --- /dev/null +++ b/qemu/hw/net/smc91c111.c @@ -0,0 +1,807 @@ +/* + * SMSC 91C111 Ethernet interface emulation + * + * Copyright (c) 2005 CodeSourcery, LLC. + * Written by Paul Brook + * + * This code is licensed under the GPL + */ + +#include "hw/sysbus.h" +#include "net/net.h" +#include "hw/devices.h" +/* For crc32 */ +#include <zlib.h> + +/* Number of 2k memory pages available. */ +#define NUM_PACKETS 4 + +#define TYPE_SMC91C111 "smc91c111" +#define SMC91C111(obj) OBJECT_CHECK(smc91c111_state, (obj), TYPE_SMC91C111) + +typedef struct { + SysBusDevice parent_obj; + + NICState *nic; + NICConf conf; + uint16_t tcr; + uint16_t rcr; + uint16_t cr; + uint16_t ctr; + uint16_t gpr; + uint16_t ptr; + uint16_t ercv; + qemu_irq irq; + int bank; + int packet_num; + int tx_alloc; + /* Bitmask of allocated packets. */ + int allocated; + int tx_fifo_len; + int tx_fifo[NUM_PACKETS]; + int rx_fifo_len; + int rx_fifo[NUM_PACKETS]; + int tx_fifo_done_len; + int tx_fifo_done[NUM_PACKETS]; + /* Packet buffer memory. */ + uint8_t data[NUM_PACKETS][2048]; + uint8_t int_level; + uint8_t int_mask; + MemoryRegion mmio; +} smc91c111_state; + +static const VMStateDescription vmstate_smc91c111 = { + .name = "smc91c111", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(tcr, smc91c111_state), + VMSTATE_UINT16(rcr, smc91c111_state), + VMSTATE_UINT16(cr, smc91c111_state), + VMSTATE_UINT16(ctr, smc91c111_state), + VMSTATE_UINT16(gpr, smc91c111_state), + VMSTATE_UINT16(ptr, smc91c111_state), + VMSTATE_UINT16(ercv, smc91c111_state), + VMSTATE_INT32(bank, smc91c111_state), + VMSTATE_INT32(packet_num, smc91c111_state), + VMSTATE_INT32(tx_alloc, smc91c111_state), + VMSTATE_INT32(allocated, smc91c111_state), + VMSTATE_INT32(tx_fifo_len, smc91c111_state), + VMSTATE_INT32_ARRAY(tx_fifo, smc91c111_state, NUM_PACKETS), + VMSTATE_INT32(rx_fifo_len, smc91c111_state), + VMSTATE_INT32_ARRAY(rx_fifo, smc91c111_state, NUM_PACKETS), + VMSTATE_INT32(tx_fifo_done_len, smc91c111_state), + VMSTATE_INT32_ARRAY(tx_fifo_done, smc91c111_state, NUM_PACKETS), + VMSTATE_BUFFER_UNSAFE(data, smc91c111_state, 0, NUM_PACKETS * 2048), + VMSTATE_UINT8(int_level, smc91c111_state), + VMSTATE_UINT8(int_mask, smc91c111_state), + VMSTATE_END_OF_LIST() + } +}; + +#define RCR_SOFT_RST 0x8000 +#define RCR_STRIP_CRC 0x0200 +#define RCR_RXEN 0x0100 + +#define TCR_EPH_LOOP 0x2000 +#define TCR_NOCRC 0x0100 +#define TCR_PAD_EN 0x0080 +#define TCR_FORCOL 0x0004 +#define TCR_LOOP 0x0002 +#define TCR_TXEN 0x0001 + +#define INT_MD 0x80 +#define INT_ERCV 0x40 +#define INT_EPH 0x20 +#define INT_RX_OVRN 0x10 +#define INT_ALLOC 0x08 +#define INT_TX_EMPTY 0x04 +#define INT_TX 0x02 +#define INT_RCV 0x01 + +#define CTR_AUTO_RELEASE 0x0800 +#define CTR_RELOAD 0x0002 +#define CTR_STORE 0x0001 + +#define RS_ALGNERR 0x8000 +#define RS_BRODCAST 0x4000 +#define RS_BADCRC 0x2000 +#define RS_ODDFRAME 0x1000 +#define RS_TOOLONG 0x0800 +#define RS_TOOSHORT 0x0400 +#define RS_MULTICAST 0x0001 + +/* Update interrupt status. */ +static void smc91c111_update(smc91c111_state *s) +{ + int level; + + if (s->tx_fifo_len == 0) + s->int_level |= INT_TX_EMPTY; + if (s->tx_fifo_done_len != 0) + s->int_level |= INT_TX; + level = (s->int_level & s->int_mask) != 0; + qemu_set_irq(s->irq, level); +} + +/* Try to allocate a packet. Returns 0x80 on failure. */ +static int smc91c111_allocate_packet(smc91c111_state *s) +{ + int i; + if (s->allocated == (1 << NUM_PACKETS) - 1) { + return 0x80; + } + + for (i = 0; i < NUM_PACKETS; i++) { + if ((s->allocated & (1 << i)) == 0) + break; + } + s->allocated |= 1 << i; + return i; +} + + +/* Process a pending TX allocate. */ +static void smc91c111_tx_alloc(smc91c111_state *s) +{ + s->tx_alloc = smc91c111_allocate_packet(s); + if (s->tx_alloc == 0x80) + return; + s->int_level |= INT_ALLOC; + smc91c111_update(s); +} + +/* Remove and item from the RX FIFO. */ +static void smc91c111_pop_rx_fifo(smc91c111_state *s) +{ + int i; + + s->rx_fifo_len--; + if (s->rx_fifo_len) { + for (i = 0; i < s->rx_fifo_len; i++) + s->rx_fifo[i] = s->rx_fifo[i + 1]; + s->int_level |= INT_RCV; + } else { + s->int_level &= ~INT_RCV; + } + smc91c111_update(s); +} + +/* Remove an item from the TX completion FIFO. */ +static void smc91c111_pop_tx_fifo_done(smc91c111_state *s) +{ + int i; + + if (s->tx_fifo_done_len == 0) + return; + s->tx_fifo_done_len--; + for (i = 0; i < s->tx_fifo_done_len; i++) + s->tx_fifo_done[i] = s->tx_fifo_done[i + 1]; +} + +/* Release the memory allocated to a packet. */ +static void smc91c111_release_packet(smc91c111_state *s, int packet) +{ + s->allocated &= ~(1 << packet); + if (s->tx_alloc == 0x80) + smc91c111_tx_alloc(s); + qemu_flush_queued_packets(qemu_get_queue(s->nic)); +} + +/* Flush the TX FIFO. */ +static void smc91c111_do_tx(smc91c111_state *s) +{ + int i; + int len; + int control; + int packetnum; + uint8_t *p; + + if ((s->tcr & TCR_TXEN) == 0) + return; + if (s->tx_fifo_len == 0) + return; + for (i = 0; i < s->tx_fifo_len; i++) { + packetnum = s->tx_fifo[i]; + p = &s->data[packetnum][0]; + /* Set status word. */ + *(p++) = 0x01; + *(p++) = 0x40; + len = *(p++); + len |= ((int)*(p++)) << 8; + len -= 6; + control = p[len + 1]; + if (control & 0x20) + len++; + /* ??? This overwrites the data following the buffer. + Don't know what real hardware does. */ + if (len < 64 && (s->tcr & TCR_PAD_EN)) { + memset(p + len, 0, 64 - len); + len = 64; + } +#if 0 + { + int add_crc; + + /* The card is supposed to append the CRC to the frame. + However none of the other network traffic has the CRC + appended. Suspect this is low level ethernet detail we + don't need to worry about. */ + add_crc = (control & 0x10) || (s->tcr & TCR_NOCRC) == 0; + if (add_crc) { + uint32_t crc; + + crc = crc32(~0, p, len); + memcpy(p + len, &crc, 4); + len += 4; + } + } +#endif + if (s->ctr & CTR_AUTO_RELEASE) + /* Race? */ + smc91c111_release_packet(s, packetnum); + else if (s->tx_fifo_done_len < NUM_PACKETS) + s->tx_fifo_done[s->tx_fifo_done_len++] = packetnum; + qemu_send_packet(qemu_get_queue(s->nic), p, len); + } + s->tx_fifo_len = 0; + smc91c111_update(s); +} + +/* Add a packet to the TX FIFO. */ +static void smc91c111_queue_tx(smc91c111_state *s, int packet) +{ + if (s->tx_fifo_len == NUM_PACKETS) + return; + s->tx_fifo[s->tx_fifo_len++] = packet; + smc91c111_do_tx(s); +} + +static void smc91c111_reset(DeviceState *dev) +{ + smc91c111_state *s = SMC91C111(dev); + + s->bank = 0; + s->tx_fifo_len = 0; + s->tx_fifo_done_len = 0; + s->rx_fifo_len = 0; + s->allocated = 0; + s->packet_num = 0; + s->tx_alloc = 0; + s->tcr = 0; + s->rcr = 0; + s->cr = 0xa0b1; + s->ctr = 0x1210; + s->ptr = 0; + s->ercv = 0x1f; + s->int_level = INT_TX_EMPTY; + s->int_mask = 0; + smc91c111_update(s); +} + +#define SET_LOW(name, val) s->name = (s->name & 0xff00) | val +#define SET_HIGH(name, val) s->name = (s->name & 0xff) | (val << 8) + +static void smc91c111_writeb(void *opaque, hwaddr offset, + uint32_t value) +{ + smc91c111_state *s = (smc91c111_state *)opaque; + + offset = offset & 0xf; + if (offset == 14) { + s->bank = value; + return; + } + if (offset == 15) + return; + switch (s->bank) { + case 0: + switch (offset) { + case 0: /* TCR */ + SET_LOW(tcr, value); + return; + case 1: + SET_HIGH(tcr, value); + return; + case 4: /* RCR */ + SET_LOW(rcr, value); + return; + case 5: + SET_HIGH(rcr, value); + if (s->rcr & RCR_SOFT_RST) { + smc91c111_reset(DEVICE(s)); + } + return; + case 10: case 11: /* RPCR */ + /* Ignored */ + return; + case 12: case 13: /* Reserved */ + return; + } + break; + + case 1: + switch (offset) { + case 0: /* CONFIG */ + SET_LOW(cr, value); + return; + case 1: + SET_HIGH(cr,value); + return; + case 2: case 3: /* BASE */ + case 4: case 5: case 6: case 7: case 8: case 9: /* IA */ + /* Not implemented. */ + return; + case 10: /* Genral Purpose */ + SET_LOW(gpr, value); + return; + case 11: + SET_HIGH(gpr, value); + return; + case 12: /* Control */ + if (value & 1) + fprintf(stderr, "smc91c111:EEPROM store not implemented\n"); + if (value & 2) + fprintf(stderr, "smc91c111:EEPROM reload not implemented\n"); + value &= ~3; + SET_LOW(ctr, value); + return; + case 13: + SET_HIGH(ctr, value); + return; + } + break; + + case 2: + switch (offset) { + case 0: /* MMU Command */ + switch (value >> 5) { + case 0: /* no-op */ + break; + case 1: /* Allocate for TX. */ + s->tx_alloc = 0x80; + s->int_level &= ~INT_ALLOC; + smc91c111_update(s); + smc91c111_tx_alloc(s); + break; + case 2: /* Reset MMU. */ + s->allocated = 0; + s->tx_fifo_len = 0; + s->tx_fifo_done_len = 0; + s->rx_fifo_len = 0; + s->tx_alloc = 0; + break; + case 3: /* Remove from RX FIFO. */ + smc91c111_pop_rx_fifo(s); + break; + case 4: /* Remove from RX FIFO and release. */ + if (s->rx_fifo_len > 0) { + smc91c111_release_packet(s, s->rx_fifo[0]); + } + smc91c111_pop_rx_fifo(s); + break; + case 5: /* Release. */ + smc91c111_release_packet(s, s->packet_num); + break; + case 6: /* Add to TX FIFO. */ + smc91c111_queue_tx(s, s->packet_num); + break; + case 7: /* Reset TX FIFO. */ + s->tx_fifo_len = 0; + s->tx_fifo_done_len = 0; + break; + } + return; + case 1: + /* Ignore. */ + return; + case 2: /* Packet Number Register */ + s->packet_num = value; + return; + case 3: case 4: case 5: + /* Should be readonly, but linux writes to them anyway. Ignore. */ + return; + case 6: /* Pointer */ + SET_LOW(ptr, value); + return; + case 7: + SET_HIGH(ptr, value); + return; + case 8: case 9: case 10: case 11: /* Data */ + { + int p; + int n; + + if (s->ptr & 0x8000) + n = s->rx_fifo[0]; + else + n = s->packet_num; + p = s->ptr & 0x07ff; + if (s->ptr & 0x4000) { + s->ptr = (s->ptr & 0xf800) | ((s->ptr + 1) & 0x7ff); + } else { + p += (offset & 3); + } + s->data[n][p] = value; + } + return; + case 12: /* Interrupt ACK. */ + s->int_level &= ~(value & 0xd6); + if (value & INT_TX) + smc91c111_pop_tx_fifo_done(s); + smc91c111_update(s); + return; + case 13: /* Interrupt mask. */ + s->int_mask = value; + smc91c111_update(s); + return; + } + break; + + case 3: + switch (offset) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + /* Multicast table. */ + /* Not implemented. */ + return; + case 8: case 9: /* Management Interface. */ + /* Not implemented. */ + return; + case 12: /* Early receive. */ + s->ercv = value & 0x1f; + return; + case 13: + /* Ignore. */ + return; + } + break; + } + hw_error("smc91c111_write: Bad reg %d:%x\n", s->bank, (int)offset); +} + +static uint32_t smc91c111_readb(void *opaque, hwaddr offset) +{ + smc91c111_state *s = (smc91c111_state *)opaque; + + offset = offset & 0xf; + if (offset == 14) { + return s->bank; + } + if (offset == 15) + return 0x33; + switch (s->bank) { + case 0: + switch (offset) { + case 0: /* TCR */ + return s->tcr & 0xff; + case 1: + return s->tcr >> 8; + case 2: /* EPH Status */ + return 0; + case 3: + return 0x40; + case 4: /* RCR */ + return s->rcr & 0xff; + case 5: + return s->rcr >> 8; + case 6: /* Counter */ + case 7: + /* Not implemented. */ + return 0; + case 8: /* Memory size. */ + return NUM_PACKETS; + case 9: /* Free memory available. */ + { + int i; + int n; + n = 0; + for (i = 0; i < NUM_PACKETS; i++) { + if (s->allocated & (1 << i)) + n++; + } + return n; + } + case 10: case 11: /* RPCR */ + /* Not implemented. */ + return 0; + case 12: case 13: /* Reserved */ + return 0; + } + break; + + case 1: + switch (offset) { + case 0: /* CONFIG */ + return s->cr & 0xff; + case 1: + return s->cr >> 8; + case 2: case 3: /* BASE */ + /* Not implemented. */ + return 0; + case 4: case 5: case 6: case 7: case 8: case 9: /* IA */ + return s->conf.macaddr.a[offset - 4]; + case 10: /* General Purpose */ + return s->gpr & 0xff; + case 11: + return s->gpr >> 8; + case 12: /* Control */ + return s->ctr & 0xff; + case 13: + return s->ctr >> 8; + } + break; + + case 2: + switch (offset) { + case 0: case 1: /* MMUCR Busy bit. */ + return 0; + case 2: /* Packet Number. */ + return s->packet_num; + case 3: /* Allocation Result. */ + return s->tx_alloc; + case 4: /* TX FIFO */ + if (s->tx_fifo_done_len == 0) + return 0x80; + else + return s->tx_fifo_done[0]; + case 5: /* RX FIFO */ + if (s->rx_fifo_len == 0) + return 0x80; + else + return s->rx_fifo[0]; + case 6: /* Pointer */ + return s->ptr & 0xff; + case 7: + return (s->ptr >> 8) & 0xf7; + case 8: case 9: case 10: case 11: /* Data */ + { + int p; + int n; + + if (s->ptr & 0x8000) + n = s->rx_fifo[0]; + else + n = s->packet_num; + p = s->ptr & 0x07ff; + if (s->ptr & 0x4000) { + s->ptr = (s->ptr & 0xf800) | ((s->ptr + 1) & 0x07ff); + } else { + p += (offset & 3); + } + return s->data[n][p]; + } + case 12: /* Interrupt status. */ + return s->int_level; + case 13: /* Interrupt mask. */ + return s->int_mask; + } + break; + + case 3: + switch (offset) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + /* Multicast table. */ + /* Not implemented. */ + return 0; + case 8: /* Management Interface. */ + /* Not implemented. */ + return 0x30; + case 9: + return 0x33; + case 10: /* Revision. */ + return 0x91; + case 11: + return 0x33; + case 12: + return s->ercv; + case 13: + return 0; + } + break; + } + hw_error("smc91c111_read: Bad reg %d:%x\n", s->bank, (int)offset); + return 0; +} + +static void smc91c111_writew(void *opaque, hwaddr offset, + uint32_t value) +{ + smc91c111_writeb(opaque, offset, value & 0xff); + smc91c111_writeb(opaque, offset + 1, value >> 8); +} + +static void smc91c111_writel(void *opaque, hwaddr offset, + uint32_t value) +{ + /* 32-bit writes to offset 0xc only actually write to the bank select + register (offset 0xe) */ + if (offset != 0xc) + smc91c111_writew(opaque, offset, value & 0xffff); + smc91c111_writew(opaque, offset + 2, value >> 16); +} + +static uint32_t smc91c111_readw(void *opaque, hwaddr offset) +{ + uint32_t val; + val = smc91c111_readb(opaque, offset); + val |= smc91c111_readb(opaque, offset + 1) << 8; + return val; +} + +static uint32_t smc91c111_readl(void *opaque, hwaddr offset) +{ + uint32_t val; + val = smc91c111_readw(opaque, offset); + val |= smc91c111_readw(opaque, offset + 2) << 16; + return val; +} + +static int smc91c111_can_receive(NetClientState *nc) +{ + smc91c111_state *s = qemu_get_nic_opaque(nc); + + if ((s->rcr & RCR_RXEN) == 0 || (s->rcr & RCR_SOFT_RST)) + return 1; + if (s->allocated == (1 << NUM_PACKETS) - 1) + return 0; + return 1; +} + +static ssize_t smc91c111_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + smc91c111_state *s = qemu_get_nic_opaque(nc); + int status; + int packetsize; + uint32_t crc; + int packetnum; + uint8_t *p; + + if ((s->rcr & RCR_RXEN) == 0 || (s->rcr & RCR_SOFT_RST)) + return -1; + /* Short packets are padded with zeros. Receiving a packet + < 64 bytes long is considered an error condition. */ + if (size < 64) + packetsize = 64; + else + packetsize = (size & ~1); + packetsize += 6; + crc = (s->rcr & RCR_STRIP_CRC) == 0; + if (crc) + packetsize += 4; + /* TODO: Flag overrun and receive errors. */ + if (packetsize > 2048) + return -1; + packetnum = smc91c111_allocate_packet(s); + if (packetnum == 0x80) + return -1; + s->rx_fifo[s->rx_fifo_len++] = packetnum; + + p = &s->data[packetnum][0]; + /* ??? Multicast packets? */ + status = 0; + if (size > 1518) + status |= RS_TOOLONG; + if (size & 1) + status |= RS_ODDFRAME; + *(p++) = status & 0xff; + *(p++) = status >> 8; + *(p++) = packetsize & 0xff; + *(p++) = packetsize >> 8; + memcpy(p, buf, size & ~1); + p += (size & ~1); + /* Pad short packets. */ + if (size < 64) { + int pad; + + if (size & 1) + *(p++) = buf[size - 1]; + pad = 64 - size; + memset(p, 0, pad); + p += pad; + size = 64; + } + /* It's not clear if the CRC should go before or after the last byte in + odd sized packets. Linux disables the CRC, so that's no help. + The pictures in the documentation show the CRC aligned on a 16-bit + boundary before the last odd byte, so that's what we do. */ + if (crc) { + crc = crc32(~0, buf, size); + *(p++) = crc & 0xff; crc >>= 8; + *(p++) = crc & 0xff; crc >>= 8; + *(p++) = crc & 0xff; crc >>= 8; + *(p++) = crc & 0xff; + } + if (size & 1) { + *(p++) = buf[size - 1]; + *p = 0x60; + } else { + *(p++) = 0; + *p = 0x40; + } + /* TODO: Raise early RX interrupt? */ + s->int_level |= INT_RCV; + smc91c111_update(s); + + return size; +} + +static const MemoryRegionOps smc91c111_mem_ops = { + /* The special case for 32 bit writes to 0xc means we can't just + * set .impl.min/max_access_size to 1, unfortunately + */ + .old_mmio = { + .read = { smc91c111_readb, smc91c111_readw, smc91c111_readl, }, + .write = { smc91c111_writeb, smc91c111_writew, smc91c111_writel, }, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static NetClientInfo net_smc91c111_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = smc91c111_can_receive, + .receive = smc91c111_receive, +}; + +static int smc91c111_init1(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + smc91c111_state *s = SMC91C111(dev); + + memory_region_init_io(&s->mmio, OBJECT(s), &smc91c111_mem_ops, s, + "smc91c111-mmio", 16); + sysbus_init_mmio(sbd, &s->mmio); + sysbus_init_irq(sbd, &s->irq); + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_smc91c111_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + /* ??? Save/restore. */ + return 0; +} + +static Property smc91c111_properties[] = { + DEFINE_NIC_PROPERTIES(smc91c111_state, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void smc91c111_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = smc91c111_init1; + dc->reset = smc91c111_reset; + dc->vmsd = &vmstate_smc91c111; + dc->props = smc91c111_properties; +} + +static const TypeInfo smc91c111_info = { + .name = TYPE_SMC91C111, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(smc91c111_state), + .class_init = smc91c111_class_init, +}; + +static void smc91c111_register_types(void) +{ + type_register_static(&smc91c111_info); +} + +/* Legacy helper function. Should go away when machine config files are + implemented. */ +void smc91c111_init(NICInfo *nd, uint32_t base, qemu_irq irq) +{ + DeviceState *dev; + SysBusDevice *s; + + qemu_check_nic_model(nd, "smc91c111"); + dev = qdev_create(NULL, TYPE_SMC91C111); + qdev_set_nic_properties(dev, nd); + qdev_init_nofail(dev); + s = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(s, 0, base); + sysbus_connect_irq(s, 0, irq); +} + +type_init(smc91c111_register_types) diff --git a/qemu/hw/net/spapr_llan.c b/qemu/hw/net/spapr_llan.c new file mode 100644 index 000000000..1ca5e9ce6 --- /dev/null +++ b/qemu/hw/net/spapr_llan.c @@ -0,0 +1,569 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * PAPR Inter-VM Logical Lan, aka ibmveth + * + * Copyright (c) 2010,2011 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#include "hw/hw.h" +#include "net/net.h" +#include "hw/qdev.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "sysemu/sysemu.h" + +#include <libfdt.h> + +#define ETH_ALEN 6 +#define MAX_PACKET_SIZE 65536 + +/*#define DEBUG*/ + +#ifdef DEBUG +#define DPRINTF(fmt...) do { fprintf(stderr, fmt); } while (0) +#else +#define DPRINTF(fmt...) +#endif + +/* + * Virtual LAN device + */ + +typedef uint64_t vlan_bd_t; + +#define VLAN_BD_VALID 0x8000000000000000ULL +#define VLAN_BD_TOGGLE 0x4000000000000000ULL +#define VLAN_BD_NO_CSUM 0x0200000000000000ULL +#define VLAN_BD_CSUM_GOOD 0x0100000000000000ULL +#define VLAN_BD_LEN_MASK 0x00ffffff00000000ULL +#define VLAN_BD_LEN(bd) (((bd) & VLAN_BD_LEN_MASK) >> 32) +#define VLAN_BD_ADDR_MASK 0x00000000ffffffffULL +#define VLAN_BD_ADDR(bd) ((bd) & VLAN_BD_ADDR_MASK) + +#define VLAN_VALID_BD(addr, len) (VLAN_BD_VALID | \ + (((len) << 32) & VLAN_BD_LEN_MASK) | \ + (addr & VLAN_BD_ADDR_MASK)) + +#define VLAN_RXQC_TOGGLE 0x80 +#define VLAN_RXQC_VALID 0x40 +#define VLAN_RXQC_NO_CSUM 0x02 +#define VLAN_RXQC_CSUM_GOOD 0x01 + +#define VLAN_RQ_ALIGNMENT 16 +#define VLAN_RXQ_BD_OFF 0 +#define VLAN_FILTER_BD_OFF 8 +#define VLAN_RX_BDS_OFF 16 +/* + * The final 8 bytes of the buffer list is a counter of frames dropped + * because there was not a buffer in the buffer list capable of holding + * the frame. We must avoid it, or the operating system will report garbage + * for this statistic. + */ +#define VLAN_RX_BDS_LEN (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8) +#define VLAN_MAX_BUFS (VLAN_RX_BDS_LEN / 8) + +#define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan" +#define VIO_SPAPR_VLAN_DEVICE(obj) \ + OBJECT_CHECK(VIOsPAPRVLANDevice, (obj), TYPE_VIO_SPAPR_VLAN_DEVICE) + +typedef struct VIOsPAPRVLANDevice { + VIOsPAPRDevice sdev; + NICConf nicconf; + NICState *nic; + bool isopen; + target_ulong buf_list; + uint32_t add_buf_ptr, use_buf_ptr, rx_bufs; + target_ulong rxq_ptr; +} VIOsPAPRVLANDevice; + +static int spapr_vlan_can_receive(NetClientState *nc) +{ + VIOsPAPRVLANDevice *dev = qemu_get_nic_opaque(nc); + + return (dev->isopen && dev->rx_bufs > 0); +} + +static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + VIOsPAPRVLANDevice *dev = qemu_get_nic_opaque(nc); + VIOsPAPRDevice *sdev = VIO_SPAPR_DEVICE(dev); + vlan_bd_t rxq_bd = vio_ldq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF); + vlan_bd_t bd; + int buf_ptr = dev->use_buf_ptr; + uint64_t handle; + uint8_t control; + + DPRINTF("spapr_vlan_receive() [%s] rx_bufs=%d\n", sdev->qdev.id, + dev->rx_bufs); + + if (!dev->isopen) { + return -1; + } + + if (!dev->rx_bufs) { + return -1; + } + + do { + buf_ptr += 8; + if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { + buf_ptr = VLAN_RX_BDS_OFF; + } + + bd = vio_ldq(sdev, dev->buf_list + buf_ptr); + DPRINTF("use_buf_ptr=%d bd=0x%016llx\n", + buf_ptr, (unsigned long long)bd); + } while ((!(bd & VLAN_BD_VALID) || (VLAN_BD_LEN(bd) < (size + 8))) + && (buf_ptr != dev->use_buf_ptr)); + + if (!(bd & VLAN_BD_VALID) || (VLAN_BD_LEN(bd) < (size + 8))) { + /* Failed to find a suitable buffer */ + return -1; + } + + /* Remove the buffer from the pool */ + dev->rx_bufs--; + dev->use_buf_ptr = buf_ptr; + vio_stq(sdev, dev->buf_list + dev->use_buf_ptr, 0); + + DPRINTF("Found buffer: ptr=%d num=%d\n", dev->use_buf_ptr, dev->rx_bufs); + + /* Transfer the packet data */ + if (spapr_vio_dma_write(sdev, VLAN_BD_ADDR(bd) + 8, buf, size) < 0) { + return -1; + } + + DPRINTF("spapr_vlan_receive: DMA write completed\n"); + + /* Update the receive queue */ + control = VLAN_RXQC_TOGGLE | VLAN_RXQC_VALID; + if (rxq_bd & VLAN_BD_TOGGLE) { + control ^= VLAN_RXQC_TOGGLE; + } + + handle = vio_ldq(sdev, VLAN_BD_ADDR(bd)); + vio_stq(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 8, handle); + vio_stl(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 4, size); + vio_sth(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 2, 8); + vio_stb(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr, control); + + DPRINTF("wrote rxq entry (ptr=0x%llx): 0x%016llx 0x%016llx\n", + (unsigned long long)dev->rxq_ptr, + (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) + + dev->rxq_ptr), + (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) + + dev->rxq_ptr + 8)); + + dev->rxq_ptr += 16; + if (dev->rxq_ptr >= VLAN_BD_LEN(rxq_bd)) { + dev->rxq_ptr = 0; + vio_stq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF, rxq_bd ^ VLAN_BD_TOGGLE); + } + + if (sdev->signal_state & 1) { + qemu_irq_pulse(spapr_vio_qirq(sdev)); + } + + return size; +} + +static NetClientInfo net_spapr_vlan_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = spapr_vlan_can_receive, + .receive = spapr_vlan_receive, +}; + +static void spapr_vlan_reset(VIOsPAPRDevice *sdev) +{ + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + + dev->buf_list = 0; + dev->rx_bufs = 0; + dev->isopen = 0; +} + +static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp) +{ + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + + qemu_macaddr_default_if_unset(&dev->nicconf.macaddr); + + dev->nic = qemu_new_nic(&net_spapr_vlan_info, &dev->nicconf, + object_get_typename(OBJECT(sdev)), sdev->qdev.id, dev); + qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a); +} + +static void spapr_vlan_instance_init(Object *obj) +{ + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(obj); + + device_add_bootindex_property(obj, &dev->nicconf.bootindex, + "bootindex", "", + DEVICE(dev), NULL); +} + +void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd) +{ + DeviceState *dev; + + dev = qdev_create(&bus->bus, "spapr-vlan"); + + qdev_set_nic_properties(dev, nd); + + qdev_init_nofail(dev); +} + +static int spapr_vlan_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) +{ + VIOsPAPRVLANDevice *vdev = VIO_SPAPR_VLAN_DEVICE(dev); + uint8_t padded_mac[8] = {0, 0}; + int ret; + + /* Some old phyp versions give the mac address in an 8-byte + * property. The kernel driver has an insane workaround for this; + * rather than doing the obvious thing and checking the property + * length, it checks whether the first byte has 0b10 in the low + * bits. If a correct 6-byte property has a different first byte + * the kernel will get the wrong mac address, overrunning its + * buffer in the process (read only, thank goodness). + * + * Here we workaround the kernel workaround by always supplying an + * 8-byte property, with the mac address in the last six bytes */ + memcpy(&padded_mac[2], &vdev->nicconf.macaddr, ETH_ALEN); + ret = fdt_setprop(fdt, node_off, "local-mac-address", + padded_mac, sizeof(padded_mac)); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop_cell(fdt, node_off, "ibm,mac-address-filters", 0); + if (ret < 0) { + return ret; + } + + return 0; +} + +static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd, + target_ulong alignment) +{ + if ((VLAN_BD_ADDR(bd) % alignment) + || (VLAN_BD_LEN(bd) % alignment)) { + return -1; + } + + if (!spapr_vio_dma_valid(&dev->sdev, VLAN_BD_ADDR(bd), + VLAN_BD_LEN(bd), DMA_DIRECTION_FROM_DEVICE) + || !spapr_vio_dma_valid(&dev->sdev, VLAN_BD_ADDR(bd), + VLAN_BD_LEN(bd), DMA_DIRECTION_TO_DEVICE)) { + return -1; + } + + return 0; +} + +static target_ulong h_register_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong reg = args[0]; + target_ulong buf_list = args[1]; + target_ulong rec_queue = args[2]; + target_ulong filter_list = args[3]; + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + vlan_bd_t filter_list_bd; + + if (!dev) { + return H_PARAMETER; + } + + if (dev->isopen) { + hcall_dprintf("H_REGISTER_LOGICAL_LAN called twice without " + "H_FREE_LOGICAL_LAN\n"); + return H_RESOURCE; + } + + if (check_bd(dev, VLAN_VALID_BD(buf_list, SPAPR_TCE_PAGE_SIZE), + SPAPR_TCE_PAGE_SIZE) < 0) { + hcall_dprintf("Bad buf_list 0x" TARGET_FMT_lx "\n", buf_list); + return H_PARAMETER; + } + + filter_list_bd = VLAN_VALID_BD(filter_list, SPAPR_TCE_PAGE_SIZE); + if (check_bd(dev, filter_list_bd, SPAPR_TCE_PAGE_SIZE) < 0) { + hcall_dprintf("Bad filter_list 0x" TARGET_FMT_lx "\n", filter_list); + return H_PARAMETER; + } + + if (!(rec_queue & VLAN_BD_VALID) + || (check_bd(dev, rec_queue, VLAN_RQ_ALIGNMENT) < 0)) { + hcall_dprintf("Bad receive queue\n"); + return H_PARAMETER; + } + + dev->buf_list = buf_list; + sdev->signal_state = 0; + + rec_queue &= ~VLAN_BD_TOGGLE; + + /* Initialize the buffer list */ + vio_stq(sdev, buf_list, rec_queue); + vio_stq(sdev, buf_list + 8, filter_list_bd); + spapr_vio_dma_set(sdev, buf_list + VLAN_RX_BDS_OFF, 0, + SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF); + dev->add_buf_ptr = VLAN_RX_BDS_OFF - 8; + dev->use_buf_ptr = VLAN_RX_BDS_OFF - 8; + dev->rx_bufs = 0; + dev->rxq_ptr = 0; + + /* Initialize the receive queue */ + spapr_vio_dma_set(sdev, VLAN_BD_ADDR(rec_queue), 0, VLAN_BD_LEN(rec_queue)); + + dev->isopen = 1; + qemu_flush_queued_packets(qemu_get_queue(dev->nic)); + + return H_SUCCESS; +} + + +static target_ulong h_free_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + + if (!dev) { + return H_PARAMETER; + } + + if (!dev->isopen) { + hcall_dprintf("H_FREE_LOGICAL_LAN called without " + "H_REGISTER_LOGICAL_LAN\n"); + return H_RESOURCE; + } + + spapr_vlan_reset(sdev); + return H_SUCCESS; +} + +static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong reg = args[0]; + target_ulong buf = args[1]; + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + vlan_bd_t bd; + + DPRINTF("H_ADD_LOGICAL_LAN_BUFFER(0x" TARGET_FMT_lx + ", 0x" TARGET_FMT_lx ")\n", reg, buf); + + if (!sdev) { + hcall_dprintf("Bad device\n"); + return H_PARAMETER; + } + + if ((check_bd(dev, buf, 4) < 0) + || (VLAN_BD_LEN(buf) < 16)) { + hcall_dprintf("Bad buffer enqueued\n"); + return H_PARAMETER; + } + + if (!dev->isopen || dev->rx_bufs >= VLAN_MAX_BUFS) { + return H_RESOURCE; + } + + do { + dev->add_buf_ptr += 8; + if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { + dev->add_buf_ptr = VLAN_RX_BDS_OFF; + } + + bd = vio_ldq(sdev, dev->buf_list + dev->add_buf_ptr); + } while (bd & VLAN_BD_VALID); + + vio_stq(sdev, dev->buf_list + dev->add_buf_ptr, buf); + + dev->rx_bufs++; + + qemu_flush_queued_packets(qemu_get_queue(dev->nic)); + + DPRINTF("h_add_logical_lan_buffer(): Added buf ptr=%d rx_bufs=%d" + " bd=0x%016llx\n", dev->add_buf_ptr, dev->rx_bufs, + (unsigned long long)buf); + + return H_SUCCESS; +} + +static target_ulong h_send_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + target_ulong *bufs = args + 1; + target_ulong continue_token = args[7]; + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + unsigned total_len; + uint8_t *lbuf, *p; + int i, nbufs; + int ret; + + DPRINTF("H_SEND_LOGICAL_LAN(0x" TARGET_FMT_lx ", <bufs>, 0x" + TARGET_FMT_lx ")\n", reg, continue_token); + + if (!sdev) { + return H_PARAMETER; + } + + DPRINTF("rxbufs = %d\n", dev->rx_bufs); + + if (!dev->isopen) { + return H_DROPPED; + } + + if (continue_token) { + return H_HARDWARE; /* FIXME actually handle this */ + } + + total_len = 0; + for (i = 0; i < 6; i++) { + DPRINTF(" buf desc: 0x" TARGET_FMT_lx "\n", bufs[i]); + if (!(bufs[i] & VLAN_BD_VALID)) { + break; + } + total_len += VLAN_BD_LEN(bufs[i]); + } + + nbufs = i; + DPRINTF("h_send_logical_lan() %d buffers, total length 0x%x\n", + nbufs, total_len); + + if (total_len == 0) { + return H_SUCCESS; + } + + if (total_len > MAX_PACKET_SIZE) { + /* Don't let the guest force too large an allocation */ + return H_RESOURCE; + } + + lbuf = alloca(total_len); + p = lbuf; + for (i = 0; i < nbufs; i++) { + ret = spapr_vio_dma_read(sdev, VLAN_BD_ADDR(bufs[i]), + p, VLAN_BD_LEN(bufs[i])); + if (ret < 0) { + return ret; + } + + p += VLAN_BD_LEN(bufs[i]); + } + + qemu_send_packet(qemu_get_queue(dev->nic), lbuf, total_len); + + return H_SUCCESS; +} + +static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + + if (!dev) { + return H_PARAMETER; + } + + return H_SUCCESS; +} + +static Property spapr_vlan_properties[] = { + DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev), + DEFINE_NIC_PROPERTIES(VIOsPAPRVLANDevice, nicconf), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_spapr_llan = { + .name = "spapr_llan", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_SPAPR_VIO(sdev, VIOsPAPRVLANDevice), + /* LLAN state */ + VMSTATE_BOOL(isopen, VIOsPAPRVLANDevice), + VMSTATE_UINTTL(buf_list, VIOsPAPRVLANDevice), + VMSTATE_UINT32(add_buf_ptr, VIOsPAPRVLANDevice), + VMSTATE_UINT32(use_buf_ptr, VIOsPAPRVLANDevice), + VMSTATE_UINT32(rx_bufs, VIOsPAPRVLANDevice), + VMSTATE_UINTTL(rxq_ptr, VIOsPAPRVLANDevice), + + VMSTATE_END_OF_LIST() + }, +}; + +static void spapr_vlan_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VIOsPAPRDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); + + k->realize = spapr_vlan_realize; + k->reset = spapr_vlan_reset; + k->devnode = spapr_vlan_devnode; + k->dt_name = "l-lan"; + k->dt_type = "network"; + k->dt_compatible = "IBM,l-lan"; + k->signal_mask = 0x1; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->props = spapr_vlan_properties; + k->rtce_window_size = 0x10000000; + dc->vmsd = &vmstate_spapr_llan; +} + +static const TypeInfo spapr_vlan_info = { + .name = TYPE_VIO_SPAPR_VLAN_DEVICE, + .parent = TYPE_VIO_SPAPR_DEVICE, + .instance_size = sizeof(VIOsPAPRVLANDevice), + .class_init = spapr_vlan_class_init, + .instance_init = spapr_vlan_instance_init, +}; + +static void spapr_vlan_register_types(void) +{ + spapr_register_hypercall(H_REGISTER_LOGICAL_LAN, h_register_logical_lan); + spapr_register_hypercall(H_FREE_LOGICAL_LAN, h_free_logical_lan); + spapr_register_hypercall(H_SEND_LOGICAL_LAN, h_send_logical_lan); + spapr_register_hypercall(H_ADD_LOGICAL_LAN_BUFFER, + h_add_logical_lan_buffer); + spapr_register_hypercall(H_MULTICAST_CTRL, h_multicast_ctrl); + type_register_static(&spapr_vlan_info); +} + +type_init(spapr_vlan_register_types) diff --git a/qemu/hw/net/stellaris_enet.c b/qemu/hw/net/stellaris_enet.c new file mode 100644 index 000000000..21a47735d --- /dev/null +++ b/qemu/hw/net/stellaris_enet.c @@ -0,0 +1,503 @@ +/* + * Luminary Micro Stellaris Ethernet Controller + * + * Copyright (c) 2007 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GPL. + */ +#include "hw/sysbus.h" +#include "net/net.h" +#include <zlib.h> + +//#define DEBUG_STELLARIS_ENET 1 + +#ifdef DEBUG_STELLARIS_ENET +#define DPRINTF(fmt, ...) \ +do { printf("stellaris_enet: " fmt , ## __VA_ARGS__); } while (0) +#define BADF(fmt, ...) \ +do { fprintf(stderr, "stellaris_enet: error: " fmt , ## __VA_ARGS__); exit(1);} while (0) +#else +#define DPRINTF(fmt, ...) do {} while(0) +#define BADF(fmt, ...) \ +do { fprintf(stderr, "stellaris_enet: error: " fmt , ## __VA_ARGS__);} while (0) +#endif + +#define SE_INT_RX 0x01 +#define SE_INT_TXER 0x02 +#define SE_INT_TXEMP 0x04 +#define SE_INT_FOV 0x08 +#define SE_INT_RXER 0x10 +#define SE_INT_MD 0x20 +#define SE_INT_PHY 0x40 + +#define SE_RCTL_RXEN 0x01 +#define SE_RCTL_AMUL 0x02 +#define SE_RCTL_PRMS 0x04 +#define SE_RCTL_BADCRC 0x08 +#define SE_RCTL_RSTFIFO 0x10 + +#define SE_TCTL_TXEN 0x01 +#define SE_TCTL_PADEN 0x02 +#define SE_TCTL_CRC 0x04 +#define SE_TCTL_DUPLEX 0x08 + +#define TYPE_STELLARIS_ENET "stellaris_enet" +#define STELLARIS_ENET(obj) \ + OBJECT_CHECK(stellaris_enet_state, (obj), TYPE_STELLARIS_ENET) + +typedef struct { + uint8_t data[2048]; + uint32_t len; +} StellarisEnetRxFrame; + +typedef struct { + SysBusDevice parent_obj; + + uint32_t ris; + uint32_t im; + uint32_t rctl; + uint32_t tctl; + uint32_t thr; + uint32_t mctl; + uint32_t mdv; + uint32_t mtxd; + uint32_t mrxd; + uint32_t np; + uint32_t tx_fifo_len; + uint8_t tx_fifo[2048]; + /* Real hardware has a 2k fifo, which works out to be at most 31 packets. + We implement a full 31 packet fifo. */ + StellarisEnetRxFrame rx[31]; + uint32_t rx_fifo_offset; + uint32_t next_packet; + NICState *nic; + NICConf conf; + qemu_irq irq; + MemoryRegion mmio; +} stellaris_enet_state; + +static const VMStateDescription vmstate_rx_frame = { + .name = "stellaris_enet/rx_frame", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(data, StellarisEnetRxFrame, 2048), + VMSTATE_UINT32(len, StellarisEnetRxFrame), + VMSTATE_END_OF_LIST() + } +}; + +static int stellaris_enet_post_load(void *opaque, int version_id) +{ + stellaris_enet_state *s = opaque; + int i; + + /* Sanitize inbound state. Note that next_packet is an index but + * np is a size; hence their valid upper bounds differ. + */ + if (s->next_packet >= ARRAY_SIZE(s->rx)) { + return -1; + } + + if (s->np > ARRAY_SIZE(s->rx)) { + return -1; + } + + for (i = 0; i < ARRAY_SIZE(s->rx); i++) { + if (s->rx[i].len > ARRAY_SIZE(s->rx[i].data)) { + return -1; + } + } + + if (s->rx_fifo_offset > ARRAY_SIZE(s->rx[0].data) - 4) { + return -1; + } + + if (s->tx_fifo_len > ARRAY_SIZE(s->tx_fifo)) { + return -1; + } + + return 0; +} + +static const VMStateDescription vmstate_stellaris_enet = { + .name = "stellaris_enet", + .version_id = 2, + .minimum_version_id = 2, + .post_load = stellaris_enet_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(ris, stellaris_enet_state), + VMSTATE_UINT32(im, stellaris_enet_state), + VMSTATE_UINT32(rctl, stellaris_enet_state), + VMSTATE_UINT32(tctl, stellaris_enet_state), + VMSTATE_UINT32(thr, stellaris_enet_state), + VMSTATE_UINT32(mctl, stellaris_enet_state), + VMSTATE_UINT32(mdv, stellaris_enet_state), + VMSTATE_UINT32(mtxd, stellaris_enet_state), + VMSTATE_UINT32(mrxd, stellaris_enet_state), + VMSTATE_UINT32(np, stellaris_enet_state), + VMSTATE_UINT32(tx_fifo_len, stellaris_enet_state), + VMSTATE_UINT8_ARRAY(tx_fifo, stellaris_enet_state, 2048), + VMSTATE_STRUCT_ARRAY(rx, stellaris_enet_state, 31, 1, + vmstate_rx_frame, StellarisEnetRxFrame), + VMSTATE_UINT32(rx_fifo_offset, stellaris_enet_state), + VMSTATE_UINT32(next_packet, stellaris_enet_state), + VMSTATE_END_OF_LIST() + } +}; + +static void stellaris_enet_update(stellaris_enet_state *s) +{ + qemu_set_irq(s->irq, (s->ris & s->im) != 0); +} + +/* Return the data length of the packet currently being assembled + * in the TX fifo. + */ +static inline int stellaris_txpacket_datalen(stellaris_enet_state *s) +{ + return s->tx_fifo[0] | (s->tx_fifo[1] << 8); +} + +/* Return true if the packet currently in the TX FIFO is complete, +* ie the FIFO holds enough bytes for the data length, ethernet header, +* payload and optionally CRC. +*/ +static inline bool stellaris_txpacket_complete(stellaris_enet_state *s) +{ + int framelen = stellaris_txpacket_datalen(s); + framelen += 16; + if (!(s->tctl & SE_TCTL_CRC)) { + framelen += 4; + } + /* Cover the corner case of a 2032 byte payload with auto-CRC disabled: + * this requires more bytes than will fit in the FIFO. It's not totally + * clear how the h/w handles this, but if using threshold-based TX + * it will definitely try to transmit something. + */ + framelen = MIN(framelen, ARRAY_SIZE(s->tx_fifo)); + return s->tx_fifo_len >= framelen; +} + +/* Return true if the TX FIFO threshold is enabled and the FIFO + * has filled enough to reach it. + */ +static inline bool stellaris_tx_thr_reached(stellaris_enet_state *s) +{ + return (s->thr < 0x3f && + (s->tx_fifo_len >= 4 * (s->thr * 8 + 1))); +} + +/* Send the packet currently in the TX FIFO */ +static void stellaris_enet_send(stellaris_enet_state *s) +{ + int framelen = stellaris_txpacket_datalen(s); + + /* Ethernet header is in the FIFO but not in the datacount. + * We don't implement explicit CRC, so just ignore any + * CRC value in the FIFO. + */ + framelen += 14; + if ((s->tctl & SE_TCTL_PADEN) && framelen < 60) { + memset(&s->tx_fifo[framelen + 2], 0, 60 - framelen); + framelen = 60; + } + /* This MIN will have no effect unless the FIFO data is corrupt + * (eg bad data from an incoming migration); otherwise the check + * on the datalen at the start of writing the data into the FIFO + * will have caught this. Silently write a corrupt half-packet, + * which is what the hardware does in FIFO underrun situations. + */ + framelen = MIN(framelen, ARRAY_SIZE(s->tx_fifo) - 2); + qemu_send_packet(qemu_get_queue(s->nic), s->tx_fifo + 2, framelen); + s->tx_fifo_len = 0; + s->ris |= SE_INT_TXEMP; + stellaris_enet_update(s); + DPRINTF("Done TX\n"); +} + +/* TODO: Implement MAC address filtering. */ +static ssize_t stellaris_enet_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + stellaris_enet_state *s = qemu_get_nic_opaque(nc); + int n; + uint8_t *p; + uint32_t crc; + + if ((s->rctl & SE_RCTL_RXEN) == 0) + return -1; + if (s->np >= 31) { + return 0; + } + + DPRINTF("Received packet len=%zu\n", size); + n = s->next_packet + s->np; + if (n >= 31) + n -= 31; + s->np++; + + s->rx[n].len = size + 6; + p = s->rx[n].data; + *(p++) = (size + 6); + *(p++) = (size + 6) >> 8; + memcpy (p, buf, size); + p += size; + crc = crc32(~0, buf, size); + *(p++) = crc; + *(p++) = crc >> 8; + *(p++) = crc >> 16; + *(p++) = crc >> 24; + /* Clear the remaining bytes in the last word. */ + if ((size & 3) != 2) { + memset(p, 0, (6 - size) & 3); + } + + s->ris |= SE_INT_RX; + stellaris_enet_update(s); + + return size; +} + +static int stellaris_enet_can_receive(stellaris_enet_state *s) +{ + return (s->np < 31); +} + +static uint64_t stellaris_enet_read(void *opaque, hwaddr offset, + unsigned size) +{ + stellaris_enet_state *s = (stellaris_enet_state *)opaque; + uint32_t val; + + switch (offset) { + case 0x00: /* RIS */ + DPRINTF("IRQ status %02x\n", s->ris); + return s->ris; + case 0x04: /* IM */ + return s->im; + case 0x08: /* RCTL */ + return s->rctl; + case 0x0c: /* TCTL */ + return s->tctl; + case 0x10: /* DATA */ + { + uint8_t *rx_fifo; + + if (s->np == 0) { + BADF("RX underflow\n"); + return 0; + } + + rx_fifo = s->rx[s->next_packet].data + s->rx_fifo_offset; + + val = rx_fifo[0] | (rx_fifo[1] << 8) | (rx_fifo[2] << 16) + | (rx_fifo[3] << 24); + s->rx_fifo_offset += 4; + if (s->rx_fifo_offset >= s->rx[s->next_packet].len) { + s->rx_fifo_offset = 0; + s->next_packet++; + if (s->next_packet >= 31) + s->next_packet = 0; + s->np--; + DPRINTF("RX done np=%d\n", s->np); + if (!s->np && stellaris_enet_can_receive(s)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + } + return val; + } + case 0x14: /* IA0 */ + return s->conf.macaddr.a[0] | (s->conf.macaddr.a[1] << 8) + | (s->conf.macaddr.a[2] << 16) + | ((uint32_t)s->conf.macaddr.a[3] << 24); + case 0x18: /* IA1 */ + return s->conf.macaddr.a[4] | (s->conf.macaddr.a[5] << 8); + case 0x1c: /* THR */ + return s->thr; + case 0x20: /* MCTL */ + return s->mctl; + case 0x24: /* MDV */ + return s->mdv; + case 0x28: /* MADD */ + return 0; + case 0x2c: /* MTXD */ + return s->mtxd; + case 0x30: /* MRXD */ + return s->mrxd; + case 0x34: /* NP */ + return s->np; + case 0x38: /* TR */ + return 0; + case 0x3c: /* Undocuented: Timestamp? */ + return 0; + default: + hw_error("stellaris_enet_read: Bad offset %x\n", (int)offset); + return 0; + } +} + +static void stellaris_enet_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + stellaris_enet_state *s = (stellaris_enet_state *)opaque; + + switch (offset) { + case 0x00: /* IACK */ + s->ris &= ~value; + DPRINTF("IRQ ack %02" PRIx64 "/%02x\n", value, s->ris); + stellaris_enet_update(s); + /* Clearing TXER also resets the TX fifo. */ + if (value & SE_INT_TXER) { + s->tx_fifo_len = 0; + } + break; + case 0x04: /* IM */ + DPRINTF("IRQ mask %02" PRIx64 "/%02x\n", value, s->ris); + s->im = value; + stellaris_enet_update(s); + break; + case 0x08: /* RCTL */ + s->rctl = value; + if (value & SE_RCTL_RSTFIFO) { + s->np = 0; + s->rx_fifo_offset = 0; + stellaris_enet_update(s); + } + break; + case 0x0c: /* TCTL */ + s->tctl = value; + break; + case 0x10: /* DATA */ + if (s->tx_fifo_len == 0) { + /* The first word is special, it contains the data length */ + int framelen = value & 0xffff; + if (framelen > 2032) { + DPRINTF("TX frame too long (%d)\n", framelen); + s->ris |= SE_INT_TXER; + stellaris_enet_update(s); + break; + } + } + + if (s->tx_fifo_len + 4 <= ARRAY_SIZE(s->tx_fifo)) { + s->tx_fifo[s->tx_fifo_len++] = value; + s->tx_fifo[s->tx_fifo_len++] = value >> 8; + s->tx_fifo[s->tx_fifo_len++] = value >> 16; + s->tx_fifo[s->tx_fifo_len++] = value >> 24; + } + + if (stellaris_tx_thr_reached(s) && stellaris_txpacket_complete(s)) { + stellaris_enet_send(s); + } + break; + case 0x14: /* IA0 */ + s->conf.macaddr.a[0] = value; + s->conf.macaddr.a[1] = value >> 8; + s->conf.macaddr.a[2] = value >> 16; + s->conf.macaddr.a[3] = value >> 24; + break; + case 0x18: /* IA1 */ + s->conf.macaddr.a[4] = value; + s->conf.macaddr.a[5] = value >> 8; + break; + case 0x1c: /* THR */ + s->thr = value; + break; + case 0x20: /* MCTL */ + s->mctl = value; + break; + case 0x24: /* MDV */ + s->mdv = value; + break; + case 0x28: /* MADD */ + /* ignored. */ + break; + case 0x2c: /* MTXD */ + s->mtxd = value & 0xff; + break; + case 0x38: /* TR */ + if (value & 1) { + stellaris_enet_send(s); + } + break; + case 0x30: /* MRXD */ + case 0x34: /* NP */ + /* Ignored. */ + case 0x3c: /* Undocuented: Timestamp? */ + /* Ignored. */ + break; + default: + hw_error("stellaris_enet_write: Bad offset %x\n", (int)offset); + } +} + +static const MemoryRegionOps stellaris_enet_ops = { + .read = stellaris_enet_read, + .write = stellaris_enet_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void stellaris_enet_reset(stellaris_enet_state *s) +{ + s->mdv = 0x80; + s->rctl = SE_RCTL_BADCRC; + s->im = SE_INT_PHY | SE_INT_MD | SE_INT_RXER | SE_INT_FOV | SE_INT_TXEMP + | SE_INT_TXER | SE_INT_RX; + s->thr = 0x3f; + s->tx_fifo_len = 0; +} + +static NetClientInfo net_stellaris_enet_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = stellaris_enet_receive, +}; + +static int stellaris_enet_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + stellaris_enet_state *s = STELLARIS_ENET(dev); + + memory_region_init_io(&s->mmio, OBJECT(s), &stellaris_enet_ops, s, + "stellaris_enet", 0x1000); + sysbus_init_mmio(sbd, &s->mmio); + sysbus_init_irq(sbd, &s->irq); + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + s->nic = qemu_new_nic(&net_stellaris_enet_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + stellaris_enet_reset(s); + return 0; +} + +static Property stellaris_enet_properties[] = { + DEFINE_NIC_PROPERTIES(stellaris_enet_state, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void stellaris_enet_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = stellaris_enet_init; + dc->props = stellaris_enet_properties; + dc->vmsd = &vmstate_stellaris_enet; +} + +static const TypeInfo stellaris_enet_info = { + .name = TYPE_STELLARIS_ENET, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(stellaris_enet_state), + .class_init = stellaris_enet_class_init, +}; + +static void stellaris_enet_register_types(void) +{ + type_register_static(&stellaris_enet_info); +} + +type_init(stellaris_enet_register_types) diff --git a/qemu/hw/net/vhost_net.c b/qemu/hw/net/vhost_net.c new file mode 100644 index 000000000..5c1d11f51 --- /dev/null +++ b/qemu/hw/net/vhost_net.c @@ -0,0 +1,459 @@ +/* + * vhost-net support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "net/net.h" +#include "net/tap.h" +#include "net/vhost-user.h" + +#include "hw/virtio/virtio-net.h" +#include "net/vhost_net.h" +#include "qemu/error-report.h" + +#include "config.h" + +#ifdef CONFIG_VHOST_NET +#include <linux/vhost.h> +#include <sys/socket.h> +#include <linux/kvm.h> +#include <fcntl.h> +#include <netpacket/packet.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> + +#include <stdio.h> + +#include "standard-headers/linux/virtio_ring.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" + +struct vhost_net { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + int backend; + NetClientState *nc; +}; + +/* Features supported by host kernel. */ +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_F_VERSION_1, + VHOST_INVALID_FEATURE_BIT +}; + +/* Features supported by others. */ +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_STATUS, + VIRTIO_NET_F_CTRL_VQ, + VIRTIO_NET_F_CTRL_RX, + VIRTIO_NET_F_CTRL_VLAN, + VIRTIO_NET_F_CTRL_RX_EXTRA, + VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; + +static const int *vhost_net_get_feature_bits(struct vhost_net *net) +{ + const int *feature_bits = 0; + + switch (net->nc->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + feature_bits = kernel_feature_bits; + break; + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + feature_bits = user_feature_bits; + break; + default: + error_report("Feature bits not defined for this type: %d", + net->nc->info->type); + break; + } + + return feature_bits; +} + +uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) +{ + return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), + features); +} + +void vhost_net_ack_features(struct vhost_net *net, uint64_t features) +{ + net->dev.acked_features = net->dev.backend_features; + vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); +} + +static int vhost_net_get_fd(NetClientState *backend) +{ + switch (backend->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + return tap_get_fd(backend); + default: + fprintf(stderr, "vhost-net requires tap backend\n"); + return -EBADFD; + } +} + +struct vhost_net *vhost_net_init(VhostNetOptions *options) +{ + int r; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; + struct vhost_net *net = g_malloc(sizeof *net); + + if (!options->net_backend) { + fprintf(stderr, "vhost-net requires net backend to be setup\n"); + goto fail; + } + + if (backend_kernel) { + r = vhost_net_get_fd(options->net_backend); + if (r < 0) { + goto fail; + } + net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) + ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); + net->backend = r; + } else { + net->dev.backend_features = 0; + net->backend = -1; + } + net->nc = options->net_backend; + + net->dev.nvqs = 2; + net->dev.vqs = net->vqs; + + r = vhost_dev_init(&net->dev, options->opaque, + options->backend_type); + if (r < 0) { + goto fail; + } + if (backend_kernel) { + if (!qemu_has_vnet_hdr_len(options->net_backend, + sizeof(struct virtio_net_hdr_mrg_rxbuf))) { + net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); + } + if (~net->dev.features & net->dev.backend_features) { + fprintf(stderr, "vhost lacks feature mask %" PRIu64 + " for backend\n", + (uint64_t)(~net->dev.features & net->dev.backend_features)); + vhost_dev_cleanup(&net->dev); + goto fail; + } + } + /* Set sane init value. Override when guest acks. */ + vhost_net_ack_features(net, 0); + return net; +fail: + g_free(net); + return NULL; +} + +static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index) +{ + net->dev.vq_index = vq_index; +} + +static int vhost_net_set_vnet_endian(VirtIODevice *dev, NetClientState *peer, + bool set) +{ + int r = 0; + + if (virtio_has_feature(dev, VIRTIO_F_VERSION_1) || + (virtio_legacy_is_cross_endian(dev) && !virtio_is_big_endian(dev))) { + r = qemu_set_vnet_le(peer, set); + if (r) { + error_report("backend does not support LE vnet headers"); + } + } else if (virtio_legacy_is_cross_endian(dev)) { + r = qemu_set_vnet_be(peer, set); + if (r) { + error_report("backend does not support BE vnet headers"); + } + } + + return r; +} + +static int vhost_net_start_one(struct vhost_net *net, + VirtIODevice *dev) +{ + struct vhost_vring_file file = { }; + int r; + + net->dev.nvqs = 2; + net->dev.vqs = net->vqs; + + r = vhost_dev_enable_notifiers(&net->dev, dev); + if (r < 0) { + goto fail_notifiers; + } + + r = vhost_dev_start(&net->dev, dev); + if (r < 0) { + goto fail_start; + } + + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, false); + } + + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + qemu_set_fd_handler(net->backend, NULL, NULL, NULL); + file.fd = net->backend; + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + if (r < 0) { + r = -errno; + goto fail; + } + } + } + return 0; +fail: + file.fd = -1; + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + while (file.index-- > 0) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + assert(r >= 0); + } + } + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, true); + } + vhost_dev_stop(&net->dev, dev); +fail_start: + vhost_dev_disable_notifiers(&net->dev, dev); +fail_notifiers: + return r; +} + +static void vhost_net_stop_one(struct vhost_net *net, + VirtIODevice *dev) +{ + struct vhost_vring_file file = { .fd = -1 }; + + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + assert(r >= 0); + } + } else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER, + NULL); + assert(r >= 0); + } + } + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, true); + } + vhost_dev_stop(&net->dev, dev); + vhost_dev_disable_notifiers(&net->dev, dev); +} + +int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + int total_queues) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); + int r, e, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + r = -ENOSYS; + goto err; + } + + r = vhost_net_set_vnet_endian(dev, ncs[0].peer, true); + if (r < 0) { + goto err; + } + + for (i = 0; i < total_queues; i++) { + vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2); + } + + r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); + if (r < 0) { + error_report("Error binding guest notifier: %d", -r); + goto err_endian; + } + + for (i = 0; i < total_queues; i++) { + r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev); + + if (r < 0) { + goto err_start; + } + } + + return 0; + +err_start: + while (--i >= 0) { + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); + } + e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); + if (e < 0) { + fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); + fflush(stderr); + } +err_endian: + vhost_net_set_vnet_endian(dev, ncs[0].peer, false); +err: + return r; +} + +void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, + int total_queues) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); + int i, r; + + for (i = 0; i < total_queues; i++) { + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); + } + + r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); + if (r < 0) { + fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); + fflush(stderr); + } + assert(r >= 0); + + assert(vhost_net_set_vnet_endian(dev, ncs[0].peer, false) >= 0); +} + +void vhost_net_cleanup(struct vhost_net *net) +{ + vhost_dev_cleanup(&net->dev); + g_free(net); +} + +bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) +{ + return vhost_virtqueue_pending(&net->dev, idx); +} + +void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask) +{ + vhost_virtqueue_mask(&net->dev, dev, idx, mask); +} + +VHostNetState *get_vhost_net(NetClientState *nc) +{ + VHostNetState *vhost_net = 0; + + if (!nc) { + return 0; + } + + switch (nc->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + vhost_net = tap_get_vhost_net(nc); + break; + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + vhost_net = vhost_user_get_vhost_net(nc); + break; + default: + break; + } + + return vhost_net; +} +#else +struct vhost_net *vhost_net_init(VhostNetOptions *options) +{ + error_report("vhost-net support is not compiled in"); + return NULL; +} + +int vhost_net_start(VirtIODevice *dev, + NetClientState *ncs, + int total_queues) +{ + return -ENOSYS; +} +void vhost_net_stop(VirtIODevice *dev, + NetClientState *ncs, + int total_queues) +{ +} + +void vhost_net_cleanup(struct vhost_net *net) +{ +} + +uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) +{ + return features; +} +void vhost_net_ack_features(struct vhost_net *net, uint64_t features) +{ +} + +bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) +{ + return false; +} + +void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask) +{ +} + +VHostNetState *get_vhost_net(NetClientState *nc) +{ + return 0; +} +#endif diff --git a/qemu/hw/net/virtio-net.c b/qemu/hw/net/virtio-net.c new file mode 100644 index 000000000..151083954 --- /dev/null +++ b/qemu/hw/net/virtio-net.c @@ -0,0 +1,1863 @@ +/* + * Virtio Network Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu/iov.h" +#include "hw/virtio/virtio.h" +#include "net/net.h" +#include "net/checksum.h" +#include "net/tap.h" +#include "qemu/error-report.h" +#include "qemu/timer.h" +#include "hw/virtio/virtio-net.h" +#include "net/vhost_net.h" +#include "hw/virtio/virtio-bus.h" +#include "qapi/qmp/qjson.h" +#include "qapi-event.h" +#include "hw/virtio/virtio-access.h" + +#define VIRTIO_NET_VM_VERSION 11 + +#define MAC_TABLE_ENTRIES 64 +#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ + +/* + * Calculate the number of bytes up to and including the given 'field' of + * 'container'. + */ +#define endof(container, field) \ + (offsetof(container, field) + sizeof(((container *)0)->field)) + +typedef struct VirtIOFeature { + uint32_t flags; + size_t end; +} VirtIOFeature; + +static VirtIOFeature feature_sizes[] = { + {.flags = 1 << VIRTIO_NET_F_MAC, + .end = endof(struct virtio_net_config, mac)}, + {.flags = 1 << VIRTIO_NET_F_STATUS, + .end = endof(struct virtio_net_config, status)}, + {.flags = 1 << VIRTIO_NET_F_MQ, + .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, + {} +}; + +static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + + return &n->vqs[nc->queue_index]; +} + +static int vq2q(int queue_index) +{ + return queue_index / 2; +} + +/* TODO + * - we could suppress RX interrupt if we were so inclined. + */ + +static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIONet *n = VIRTIO_NET(vdev); + struct virtio_net_config netcfg; + + virtio_stw_p(vdev, &netcfg.status, n->status); + virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); + memcpy(netcfg.mac, n->mac, ETH_ALEN); + memcpy(config, &netcfg, n->config_size); +} + +static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) +{ + VirtIONet *n = VIRTIO_NET(vdev); + struct virtio_net_config netcfg = {}; + + memcpy(&netcfg, config, n->config_size); + + if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && + !virtio_has_feature(vdev, VIRTIO_F_VERSION_1) && + memcmp(netcfg.mac, n->mac, ETH_ALEN)) { + memcpy(n->mac, netcfg.mac, ETH_ALEN); + qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); + } +} + +static bool virtio_net_started(VirtIONet *n, uint8_t status) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + return (status & VIRTIO_CONFIG_S_DRIVER_OK) && + (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; +} + +static void virtio_net_announce_timer(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + n->announce_counter--; + n->status |= VIRTIO_NET_S_ANNOUNCE; + virtio_notify_config(vdev); +} + +static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + NetClientState *nc = qemu_get_queue(n->nic); + int queues = n->multiqueue ? n->max_queues : 1; + + if (!get_vhost_net(nc->peer)) { + return; + } + + if ((virtio_net_started(n, status) && !nc->peer->link_down) == + !!n->vhost_started) { + return; + } + if (!n->vhost_started) { + int r, i; + + /* Any packets outstanding? Purge them to avoid touching rings + * when vhost is running. + */ + for (i = 0; i < queues; i++) { + NetClientState *qnc = qemu_get_subqueue(n->nic, i); + + /* Purge both directions: TX and RX. */ + qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); + qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); + } + + n->vhost_started = 1; + r = vhost_net_start(vdev, n->nic->ncs, queues); + if (r < 0) { + error_report("unable to start vhost net: %d: " + "falling back on userspace virtio", -r); + n->vhost_started = 0; + } + } else { + vhost_net_stop(vdev, n->nic->ncs, queues); + n->vhost_started = 0; + } +} + +static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +{ + VirtIONet *n = VIRTIO_NET(vdev); + VirtIONetQueue *q; + int i; + uint8_t queue_status; + + virtio_net_vhost_status(n, status); + + for (i = 0; i < n->max_queues; i++) { + NetClientState *ncs = qemu_get_subqueue(n->nic, i); + bool queue_started; + q = &n->vqs[i]; + + if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { + queue_status = 0; + } else { + queue_status = status; + } + queue_started = + virtio_net_started(n, queue_status) && !n->vhost_started; + + if (queue_started) { + qemu_flush_queued_packets(ncs); + } + + if (!q->tx_waiting) { + continue; + } + + if (queue_started) { + if (q->tx_timer) { + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + } else { + qemu_bh_schedule(q->tx_bh); + } + } else { + if (q->tx_timer) { + timer_del(q->tx_timer); + } else { + qemu_bh_cancel(q->tx_bh); + } + } + } +} + +static void virtio_net_set_link_status(NetClientState *nc) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t old_status = n->status; + + if (nc->link_down) + n->status &= ~VIRTIO_NET_S_LINK_UP; + else + n->status |= VIRTIO_NET_S_LINK_UP; + + if (n->status != old_status) + virtio_notify_config(vdev); + + virtio_net_set_status(vdev, vdev->status); +} + +static void rxfilter_notify(NetClientState *nc) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + + if (nc->rxfilter_notify_enabled) { + gchar *path = object_get_canonical_path(OBJECT(n->qdev)); + qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, + n->netclient_name, path, &error_abort); + g_free(path); + + /* disable event notification to avoid events flooding */ + nc->rxfilter_notify_enabled = 0; + } +} + +static intList *get_vlan_table(VirtIONet *n) +{ + intList *list, *entry; + int i, j; + + list = NULL; + for (i = 0; i < MAX_VLAN >> 5; i++) { + for (j = 0; n->vlans[i] && j <= 0x1f; j++) { + if (n->vlans[i] & (1U << j)) { + entry = g_malloc0(sizeof(*entry)); + entry->value = (i << 5) + j; + entry->next = list; + list = entry; + } + } + } + + return list; +} + +static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + VirtIODevice *vdev = VIRTIO_DEVICE(n); + RxFilterInfo *info; + strList *str_list, *entry; + int i; + + info = g_malloc0(sizeof(*info)); + info->name = g_strdup(nc->name); + info->promiscuous = n->promisc; + + if (n->nouni) { + info->unicast = RX_STATE_NONE; + } else if (n->alluni) { + info->unicast = RX_STATE_ALL; + } else { + info->unicast = RX_STATE_NORMAL; + } + + if (n->nomulti) { + info->multicast = RX_STATE_NONE; + } else if (n->allmulti) { + info->multicast = RX_STATE_ALL; + } else { + info->multicast = RX_STATE_NORMAL; + } + + info->broadcast_allowed = n->nobcast; + info->multicast_overflow = n->mac_table.multi_overflow; + info->unicast_overflow = n->mac_table.uni_overflow; + + info->main_mac = qemu_mac_strdup_printf(n->mac); + + str_list = NULL; + for (i = 0; i < n->mac_table.first_multi; i++) { + entry = g_malloc0(sizeof(*entry)); + entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); + entry->next = str_list; + str_list = entry; + } + info->unicast_table = str_list; + + str_list = NULL; + for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { + entry = g_malloc0(sizeof(*entry)); + entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); + entry->next = str_list; + str_list = entry; + } + info->multicast_table = str_list; + info->vlan_table = get_vlan_table(n); + + if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { + info->vlan = RX_STATE_ALL; + } else if (!info->vlan_table) { + info->vlan = RX_STATE_NONE; + } else { + info->vlan = RX_STATE_NORMAL; + } + + /* enable event notification after query */ + nc->rxfilter_notify_enabled = 1; + + return info; +} + +static void virtio_net_reset(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + + /* Reset back to compatibility mode */ + n->promisc = 1; + n->allmulti = 0; + n->alluni = 0; + n->nomulti = 0; + n->nouni = 0; + n->nobcast = 0; + /* multiqueue is disabled by default */ + n->curr_queues = 1; + timer_del(n->announce_timer); + n->announce_counter = 0; + n->status &= ~VIRTIO_NET_S_ANNOUNCE; + + /* Flush any MAC and VLAN filter table state */ + n->mac_table.in_use = 0; + n->mac_table.first_multi = 0; + n->mac_table.multi_overflow = 0; + n->mac_table.uni_overflow = 0; + memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); + memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); + qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); + memset(n->vlans, 0, MAX_VLAN >> 3); +} + +static void peer_test_vnet_hdr(VirtIONet *n) +{ + NetClientState *nc = qemu_get_queue(n->nic); + if (!nc->peer) { + return; + } + + n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); +} + +static int peer_has_vnet_hdr(VirtIONet *n) +{ + return n->has_vnet_hdr; +} + +static int peer_has_ufo(VirtIONet *n) +{ + if (!peer_has_vnet_hdr(n)) + return 0; + + n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); + + return n->has_ufo; +} + +static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + int version_1) +{ + int i; + NetClientState *nc; + + n->mergeable_rx_bufs = mergeable_rx_bufs; + + if (version_1) { + n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } else { + n->guest_hdr_len = n->mergeable_rx_bufs ? + sizeof(struct virtio_net_hdr_mrg_rxbuf) : + sizeof(struct virtio_net_hdr); + } + + for (i = 0; i < n->max_queues; i++) { + nc = qemu_get_subqueue(n->nic, i); + + if (peer_has_vnet_hdr(n) && + qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { + qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); + n->host_hdr_len = n->guest_hdr_len; + } + } +} + +static int peer_attach(VirtIONet *n, int index) +{ + NetClientState *nc = qemu_get_subqueue(n->nic, index); + + if (!nc->peer) { + return 0; + } + + if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { + return 0; + } + + return tap_enable(nc->peer); +} + +static int peer_detach(VirtIONet *n, int index) +{ + NetClientState *nc = qemu_get_subqueue(n->nic, index); + + if (!nc->peer) { + return 0; + } + + if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { + return 0; + } + + return tap_disable(nc->peer); +} + +static void virtio_net_set_queues(VirtIONet *n) +{ + int i; + int r; + + for (i = 0; i < n->max_queues; i++) { + if (i < n->curr_queues) { + r = peer_attach(n, i); + assert(!r); + } else { + r = peer_detach(n, i); + assert(!r); + } + } +} + +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); + +static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); + + /* Firstly sync all virtio-net possible supported features */ + features |= n->host_features; + + virtio_add_feature(&features, VIRTIO_NET_F_MAC); + + if (!peer_has_vnet_hdr(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + } + + if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); + } + + if (!get_vhost_net(nc->peer)) { + return features; + } + return vhost_net_get_features(get_vhost_net(nc->peer), features); +} + +static uint64_t virtio_net_bad_features(VirtIODevice *vdev) +{ + uint64_t features = 0; + + /* Linux kernel 2.6.25. It understood MAC (as everyone must), + * but also these: */ + virtio_add_feature(&features, VIRTIO_NET_F_MAC); + virtio_add_feature(&features, VIRTIO_NET_F_CSUM); + virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); + virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); + virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); + + return features; +} + +static void virtio_net_apply_guest_offloads(VirtIONet *n) +{ + qemu_set_offload(qemu_get_queue(n->nic)->peer, + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); +} + +static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) +{ + static const uint64_t guest_offloads_mask = + (1ULL << VIRTIO_NET_F_GUEST_CSUM) | + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | + (1ULL << VIRTIO_NET_F_GUEST_ECN) | + (1ULL << VIRTIO_NET_F_GUEST_UFO); + + return guest_offloads_mask & features; +} + +static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + return virtio_net_guest_offloads_by_features(vdev->guest_features); +} + +static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) +{ + VirtIONet *n = VIRTIO_NET(vdev); + int i; + + virtio_net_set_multiqueue(n, + __virtio_has_feature(features, VIRTIO_NET_F_MQ)); + + virtio_net_set_mrg_rx_bufs(n, + __virtio_has_feature(features, + VIRTIO_NET_F_MRG_RXBUF), + __virtio_has_feature(features, + VIRTIO_F_VERSION_1)); + + if (n->has_vnet_hdr) { + n->curr_guest_offloads = + virtio_net_guest_offloads_by_features(features); + virtio_net_apply_guest_offloads(n); + } + + for (i = 0; i < n->max_queues; i++) { + NetClientState *nc = qemu_get_subqueue(n->nic, i); + + if (!get_vhost_net(nc->peer)) { + continue; + } + vhost_net_ack_features(get_vhost_net(nc->peer), features); + } + + if (__virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { + memset(n->vlans, 0, MAX_VLAN >> 3); + } else { + memset(n->vlans, 0xff, MAX_VLAN >> 3); + } +} + +static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + uint8_t on; + size_t s; + NetClientState *nc = qemu_get_queue(n->nic); + + s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); + if (s != sizeof(on)) { + return VIRTIO_NET_ERR; + } + + if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { + n->promisc = on; + } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { + n->allmulti = on; + } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { + n->alluni = on; + } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { + n->nomulti = on; + } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { + n->nouni = on; + } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { + n->nobcast = on; + } else { + return VIRTIO_NET_ERR; + } + + rxfilter_notify(nc); + + return VIRTIO_NET_OK; +} + +static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint64_t offloads; + size_t s; + + if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + return VIRTIO_NET_ERR; + } + + s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); + if (s != sizeof(offloads)) { + return VIRTIO_NET_ERR; + } + + if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { + uint64_t supported_offloads; + + if (!n->has_vnet_hdr) { + return VIRTIO_NET_ERR; + } + + supported_offloads = virtio_net_supported_guest_offloads(n); + if (offloads & ~supported_offloads) { + return VIRTIO_NET_ERR; + } + + n->curr_guest_offloads = offloads; + virtio_net_apply_guest_offloads(n); + + return VIRTIO_NET_OK; + } else { + return VIRTIO_NET_ERR; + } +} + +static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct virtio_net_ctrl_mac mac_data; + size_t s; + NetClientState *nc = qemu_get_queue(n->nic); + + if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { + if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { + return VIRTIO_NET_ERR; + } + s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); + assert(s == sizeof(n->mac)); + qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); + rxfilter_notify(nc); + + return VIRTIO_NET_OK; + } + + if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { + return VIRTIO_NET_ERR; + } + + int in_use = 0; + int first_multi = 0; + uint8_t uni_overflow = 0; + uint8_t multi_overflow = 0; + uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); + + s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, + sizeof(mac_data.entries)); + mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); + if (s != sizeof(mac_data.entries)) { + goto error; + } + iov_discard_front(&iov, &iov_cnt, s); + + if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { + goto error; + } + + if (mac_data.entries <= MAC_TABLE_ENTRIES) { + s = iov_to_buf(iov, iov_cnt, 0, macs, + mac_data.entries * ETH_ALEN); + if (s != mac_data.entries * ETH_ALEN) { + goto error; + } + in_use += mac_data.entries; + } else { + uni_overflow = 1; + } + + iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); + + first_multi = in_use; + + s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, + sizeof(mac_data.entries)); + mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); + if (s != sizeof(mac_data.entries)) { + goto error; + } + + iov_discard_front(&iov, &iov_cnt, s); + + if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { + goto error; + } + + if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { + s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], + mac_data.entries * ETH_ALEN); + if (s != mac_data.entries * ETH_ALEN) { + goto error; + } + in_use += mac_data.entries; + } else { + multi_overflow = 1; + } + + n->mac_table.in_use = in_use; + n->mac_table.first_multi = first_multi; + n->mac_table.uni_overflow = uni_overflow; + n->mac_table.multi_overflow = multi_overflow; + memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); + g_free(macs); + rxfilter_notify(nc); + + return VIRTIO_NET_OK; + +error: + g_free(macs); + return VIRTIO_NET_ERR; +} + +static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t vid; + size_t s; + NetClientState *nc = qemu_get_queue(n->nic); + + s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); + vid = virtio_lduw_p(vdev, &vid); + if (s != sizeof(vid)) { + return VIRTIO_NET_ERR; + } + + if (vid >= MAX_VLAN) + return VIRTIO_NET_ERR; + + if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) + n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); + else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) + n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); + else + return VIRTIO_NET_ERR; + + rxfilter_notify(nc); + + return VIRTIO_NET_OK; +} + +static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && + n->status & VIRTIO_NET_S_ANNOUNCE) { + n->status &= ~VIRTIO_NET_S_ANNOUNCE; + if (n->announce_counter) { + timer_mod(n->announce_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + self_announce_delay(n->announce_counter)); + } + return VIRTIO_NET_OK; + } else { + return VIRTIO_NET_ERR; + } +} + +static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct virtio_net_ctrl_mq mq; + size_t s; + uint16_t queues; + + s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); + if (s != sizeof(mq)) { + return VIRTIO_NET_ERR; + } + + if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + return VIRTIO_NET_ERR; + } + + queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); + + if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || + queues > n->max_queues || + !n->multiqueue) { + return VIRTIO_NET_ERR; + } + + n->curr_queues = queues; + /* stop the backend before changing the number of queues to avoid handling a + * disabled queue */ + virtio_net_set_status(vdev, vdev->status); + virtio_net_set_queues(n); + + return VIRTIO_NET_OK; +} +static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIONet *n = VIRTIO_NET(vdev); + struct virtio_net_ctrl_hdr ctrl; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + VirtQueueElement elem; + size_t s; + struct iovec *iov, *iov2; + unsigned int iov_cnt; + + while (virtqueue_pop(vq, &elem)) { + if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) || + iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) { + error_report("virtio-net ctrl missing headers"); + exit(1); + } + + iov_cnt = elem.out_num; + iov2 = iov = g_memdup(elem.out_sg, sizeof(struct iovec) * elem.out_num); + s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); + iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); + if (s != sizeof(ctrl)) { + status = VIRTIO_NET_ERR; + } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { + status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { + status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { + status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { + status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { + status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { + status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); + } + + s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status)); + assert(s == sizeof(status)); + + virtqueue_push(vq, &elem, sizeof(status)); + virtio_notify(vdev, vq); + g_free(iov2); + } +} + +/* RX */ + +static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIONet *n = VIRTIO_NET(vdev); + int queue_index = vq2q(virtio_get_queue_index(vq)); + + qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); +} + +static int virtio_net_can_receive(NetClientState *nc) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + VirtIODevice *vdev = VIRTIO_DEVICE(n); + VirtIONetQueue *q = virtio_net_get_subqueue(nc); + + if (!vdev->vm_running) { + return 0; + } + + if (nc->queue_index >= n->curr_queues) { + return 0; + } + + if (!virtio_queue_ready(q->rx_vq) || + !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return 0; + } + + return 1; +} + +static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) +{ + VirtIONet *n = q->n; + if (virtio_queue_empty(q->rx_vq) || + (n->mergeable_rx_bufs && + !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { + virtio_queue_set_notification(q->rx_vq, 1); + + /* To avoid a race condition where the guest has made some buffers + * available after the above check but before notification was + * enabled, check for available buffers again. + */ + if (virtio_queue_empty(q->rx_vq) || + (n->mergeable_rx_bufs && + !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { + return 0; + } + } + + virtio_queue_set_notification(q->rx_vq, 0); + return 1; +} + +static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) +{ + virtio_tswap16s(vdev, &hdr->hdr_len); + virtio_tswap16s(vdev, &hdr->gso_size); + virtio_tswap16s(vdev, &hdr->csum_start); + virtio_tswap16s(vdev, &hdr->csum_offset); +} + +/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so + * it never finds out that the packets don't have valid checksums. This + * causes dhclient to get upset. Fedora's carried a patch for ages to + * fix this with Xen but it hasn't appeared in an upstream release of + * dhclient yet. + * + * To avoid breaking existing guests, we catch udp packets and add + * checksums. This is terrible but it's better than hacking the guest + * kernels. + * + * N.B. if we introduce a zero-copy API, this operation is no longer free so + * we should provide a mechanism to disable it to avoid polluting the host + * cache. + */ +static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, + uint8_t *buf, size_t size) +{ + if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ + (size > 27 && size < 1500) && /* normal sized MTU */ + (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ + (buf[23] == 17) && /* ip.protocol == UDP */ + (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ + net_checksum_calculate(buf, size); + hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; + } +} + +static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, + const void *buf, size_t size) +{ + if (n->has_vnet_hdr) { + /* FIXME this cast is evil */ + void *wbuf = (void *)buf; + work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, + size - n->host_hdr_len); + virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); + iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); + } else { + struct virtio_net_hdr hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); + } +} + +static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) +{ + static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static const uint8_t vlan[] = {0x81, 0x00}; + uint8_t *ptr = (uint8_t *)buf; + int i; + + if (n->promisc) + return 1; + + ptr += n->host_hdr_len; + + if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { + int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff; + if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) + return 0; + } + + if (ptr[0] & 1) { // multicast + if (!memcmp(ptr, bcast, sizeof(bcast))) { + return !n->nobcast; + } else if (n->nomulti) { + return 0; + } else if (n->allmulti || n->mac_table.multi_overflow) { + return 1; + } + + for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { + if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { + return 1; + } + } + } else { // unicast + if (n->nouni) { + return 0; + } else if (n->alluni || n->mac_table.uni_overflow) { + return 1; + } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { + return 1; + } + + for (i = 0; i < n->mac_table.first_multi; i++) { + if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { + return 1; + } + } + } + + return 0; +} + +static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + VirtIONetQueue *q = virtio_net_get_subqueue(nc); + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; + struct virtio_net_hdr_mrg_rxbuf mhdr; + unsigned mhdr_cnt = 0; + size_t offset, i, guest_offset; + + if (!virtio_net_can_receive(nc)) { + return -1; + } + + /* hdr_len refers to the header we supply to the guest */ + if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { + return 0; + } + + if (!receive_filter(n, buf, size)) + return size; + + offset = i = 0; + + while (offset < size) { + VirtQueueElement elem; + int len, total; + const struct iovec *sg = elem.in_sg; + + total = 0; + + if (virtqueue_pop(q->rx_vq, &elem) == 0) { + if (i == 0) + return -1; + error_report("virtio-net unexpected empty queue: " + "i %zd mergeable %d offset %zd, size %zd, " + "guest hdr len %zd, host hdr len %zd " + "guest features 0x%" PRIx64, + i, n->mergeable_rx_bufs, offset, size, + n->guest_hdr_len, n->host_hdr_len, + vdev->guest_features); + exit(1); + } + + if (elem.in_num < 1) { + error_report("virtio-net receive queue contains no in buffers"); + exit(1); + } + + if (i == 0) { + assert(offset == 0); + if (n->mergeable_rx_bufs) { + mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), + sg, elem.in_num, + offsetof(typeof(mhdr), num_buffers), + sizeof(mhdr.num_buffers)); + } + + receive_header(n, sg, elem.in_num, buf, size); + offset = n->host_hdr_len; + total += n->guest_hdr_len; + guest_offset = n->guest_hdr_len; + } else { + guest_offset = 0; + } + + /* copy in packet. ugh */ + len = iov_from_buf(sg, elem.in_num, guest_offset, + buf + offset, size - offset); + total += len; + offset += len; + /* If buffers can't be merged, at this point we + * must have consumed the complete packet. + * Otherwise, drop it. */ + if (!n->mergeable_rx_bufs && offset < size) { +#if 0 + error_report("virtio-net truncated non-mergeable packet: " + "i %zd mergeable %d offset %zd, size %zd, " + "guest hdr len %zd, host hdr len %zd", + i, n->mergeable_rx_bufs, + offset, size, n->guest_hdr_len, n->host_hdr_len); +#endif + return size; + } + + /* signal other side */ + virtqueue_fill(q->rx_vq, &elem, total, i++); + } + + if (mhdr_cnt) { + virtio_stw_p(vdev, &mhdr.num_buffers, i); + iov_from_buf(mhdr_sg, mhdr_cnt, + 0, + &mhdr.num_buffers, sizeof mhdr.num_buffers); + } + + virtqueue_flush(q->rx_vq, i); + virtio_notify(vdev, q->rx_vq); + + return size; +} + +static int32_t virtio_net_flush_tx(VirtIONetQueue *q); + +static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + VirtIONetQueue *q = virtio_net_get_subqueue(nc); + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + virtqueue_push(q->tx_vq, &q->async_tx.elem, 0); + virtio_notify(vdev, q->tx_vq); + + q->async_tx.elem.out_num = q->async_tx.len = 0; + + virtio_queue_set_notification(q->tx_vq, 1); + virtio_net_flush_tx(q); +} + +/* TX */ +static int32_t virtio_net_flush_tx(VirtIONetQueue *q) +{ + VirtIONet *n = q->n; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + VirtQueueElement elem; + int32_t num_packets = 0; + int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); + if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return num_packets; + } + + if (q->async_tx.elem.out_num) { + virtio_queue_set_notification(q->tx_vq, 0); + return num_packets; + } + + while (virtqueue_pop(q->tx_vq, &elem)) { + ssize_t ret, len; + unsigned int out_num = elem.out_num; + struct iovec *out_sg = &elem.out_sg[0]; + struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1]; + struct virtio_net_hdr_mrg_rxbuf mhdr; + + if (out_num < 1) { + error_report("virtio-net header not in first element"); + exit(1); + } + + if (n->has_vnet_hdr) { + if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < + n->guest_hdr_len) { + error_report("virtio-net header incorrect"); + exit(1); + } + if (virtio_needs_swap(vdev)) { + virtio_net_hdr_swap(vdev, (void *) &mhdr); + sg2[0].iov_base = &mhdr; + sg2[0].iov_len = n->guest_hdr_len; + out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, + out_sg, out_num, + n->guest_hdr_len, -1); + if (out_num == VIRTQUEUE_MAX_SIZE) { + goto drop; + } + out_num += 1; + out_sg = sg2; + } + } + /* + * If host wants to see the guest header as is, we can + * pass it on unchanged. Otherwise, copy just the parts + * that host is interested in. + */ + assert(n->host_hdr_len <= n->guest_hdr_len); + if (n->host_hdr_len != n->guest_hdr_len) { + unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), + out_sg, out_num, + 0, n->host_hdr_len); + sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, + out_sg, out_num, + n->guest_hdr_len, -1); + out_num = sg_num; + out_sg = sg; + } + + len = n->guest_hdr_len; + + ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), + out_sg, out_num, virtio_net_tx_complete); + if (ret == 0) { + virtio_queue_set_notification(q->tx_vq, 0); + q->async_tx.elem = elem; + q->async_tx.len = len; + return -EBUSY; + } + + len += ret; +drop: + virtqueue_push(q->tx_vq, &elem, 0); + virtio_notify(vdev, q->tx_vq); + + if (++num_packets >= n->tx_burst) { + break; + } + } + return num_packets; +} + +static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIONet *n = VIRTIO_NET(vdev); + VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; + + /* This happens when device was stopped but VCPU wasn't. */ + if (!vdev->vm_running) { + q->tx_waiting = 1; + return; + } + + if (q->tx_waiting) { + virtio_queue_set_notification(vq, 1); + timer_del(q->tx_timer); + q->tx_waiting = 0; + virtio_net_flush_tx(q); + } else { + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + q->tx_waiting = 1; + virtio_queue_set_notification(vq, 0); + } +} + +static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIONet *n = VIRTIO_NET(vdev); + VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; + + if (unlikely(q->tx_waiting)) { + return; + } + q->tx_waiting = 1; + /* This happens when device was stopped but VCPU wasn't. */ + if (!vdev->vm_running) { + return; + } + virtio_queue_set_notification(vq, 0); + qemu_bh_schedule(q->tx_bh); +} + +static void virtio_net_tx_timer(void *opaque) +{ + VirtIONetQueue *q = opaque; + VirtIONet *n = q->n; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + /* This happens when device was stopped but BH wasn't. */ + if (!vdev->vm_running) { + /* Make sure tx waiting is set, so we'll run when restarted. */ + assert(q->tx_waiting); + return; + } + + q->tx_waiting = 0; + + /* Just in case the driver is not ready on more */ + if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + virtio_queue_set_notification(q->tx_vq, 1); + virtio_net_flush_tx(q); +} + +static void virtio_net_tx_bh(void *opaque) +{ + VirtIONetQueue *q = opaque; + VirtIONet *n = q->n; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + int32_t ret; + + /* This happens when device was stopped but BH wasn't. */ + if (!vdev->vm_running) { + /* Make sure tx waiting is set, so we'll run when restarted. */ + assert(q->tx_waiting); + return; + } + + q->tx_waiting = 0; + + /* Just in case the driver is not ready on more */ + if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { + return; + } + + ret = virtio_net_flush_tx(q); + if (ret == -EBUSY) { + return; /* Notification re-enable handled by tx_complete */ + } + + /* If we flush a full burst of packets, assume there are + * more coming and immediately reschedule */ + if (ret >= n->tx_burst) { + qemu_bh_schedule(q->tx_bh); + q->tx_waiting = 1; + return; + } + + /* If less than a full burst, re-enable notification and flush + * anything that may have come in while we weren't looking. If + * we find something, assume the guest is still active and reschedule */ + virtio_queue_set_notification(q->tx_vq, 1); + if (virtio_net_flush_tx(q) > 0) { + virtio_queue_set_notification(q->tx_vq, 0); + qemu_bh_schedule(q->tx_bh); + q->tx_waiting = 1; + } +} + +static void virtio_net_add_queue(VirtIONet *n, int index) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx); + if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { + n->vqs[index].tx_vq = + virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); + n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + virtio_net_tx_timer, + &n->vqs[index]); + } else { + n->vqs[index].tx_vq = + virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh); + n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); + } + + n->vqs[index].tx_waiting = 0; + n->vqs[index].n = n; +} + +static void virtio_net_del_queue(VirtIONet *n, int index) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + VirtIONetQueue *q = &n->vqs[index]; + NetClientState *nc = qemu_get_subqueue(n->nic, index); + + qemu_purge_queued_packets(nc); + + virtio_del_queue(vdev, index * 2); + if (q->tx_timer) { + timer_del(q->tx_timer); + timer_free(q->tx_timer); + } else { + qemu_bh_delete(q->tx_bh); + } + virtio_del_queue(vdev, index * 2 + 1); +} + +static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + int old_num_queues = virtio_get_num_queues(vdev); + int new_num_queues = new_max_queues * 2 + 1; + int i; + + assert(old_num_queues >= 3); + assert(old_num_queues % 2 == 1); + + if (old_num_queues == new_num_queues) { + return; + } + + /* + * We always need to remove and add ctrl vq if + * old_num_queues != new_num_queues. Remove ctrl_vq first, + * and then we only enter one of the following too loops. + */ + virtio_del_queue(vdev, old_num_queues - 1); + + for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { + /* new_num_queues < old_num_queues */ + virtio_net_del_queue(n, i / 2); + } + + for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { + /* new_num_queues > old_num_queues */ + virtio_net_add_queue(n, i / 2); + } + + /* add ctrl_vq last */ + n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); +} + +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) +{ + int max = multiqueue ? n->max_queues : 1; + + n->multiqueue = multiqueue; + virtio_net_change_num_queues(n, max); + + virtio_net_set_queues(n); +} + +static void virtio_net_save(QEMUFile *f, void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + /* At this point, backend must be stopped, otherwise + * it might keep writing to memory. */ + assert(!n->vhost_started); + virtio_save(vdev, f); +} + +static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f) +{ + VirtIONet *n = VIRTIO_NET(vdev); + int i; + + qemu_put_buffer(f, n->mac, ETH_ALEN); + qemu_put_be32(f, n->vqs[0].tx_waiting); + qemu_put_be32(f, n->mergeable_rx_bufs); + qemu_put_be16(f, n->status); + qemu_put_byte(f, n->promisc); + qemu_put_byte(f, n->allmulti); + qemu_put_be32(f, n->mac_table.in_use); + qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN); + qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3); + qemu_put_be32(f, n->has_vnet_hdr); + qemu_put_byte(f, n->mac_table.multi_overflow); + qemu_put_byte(f, n->mac_table.uni_overflow); + qemu_put_byte(f, n->alluni); + qemu_put_byte(f, n->nomulti); + qemu_put_byte(f, n->nouni); + qemu_put_byte(f, n->nobcast); + qemu_put_byte(f, n->has_ufo); + if (n->max_queues > 1) { + qemu_put_be16(f, n->max_queues); + qemu_put_be16(f, n->curr_queues); + for (i = 1; i < n->curr_queues; i++) { + qemu_put_be32(f, n->vqs[i].tx_waiting); + } + } + + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + qemu_put_be64(f, n->curr_guest_offloads); + } +} + +static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION) + return -EINVAL; + + return virtio_load(vdev, f, version_id); +} + +static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, + int version_id) +{ + VirtIONet *n = VIRTIO_NET(vdev); + int i, link_down; + + qemu_get_buffer(f, n->mac, ETH_ALEN); + n->vqs[0].tx_waiting = qemu_get_be32(f); + + virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f), + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)); + + if (version_id >= 3) + n->status = qemu_get_be16(f); + + if (version_id >= 4) { + if (version_id < 8) { + n->promisc = qemu_get_be32(f); + n->allmulti = qemu_get_be32(f); + } else { + n->promisc = qemu_get_byte(f); + n->allmulti = qemu_get_byte(f); + } + } + + if (version_id >= 5) { + n->mac_table.in_use = qemu_get_be32(f); + /* MAC_TABLE_ENTRIES may be different from the saved image */ + if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { + qemu_get_buffer(f, n->mac_table.macs, + n->mac_table.in_use * ETH_ALEN); + } else { + int64_t i; + + /* Overflow detected - can happen if source has a larger MAC table. + * We simply set overflow flag so there's no need to maintain the + * table of addresses, discard them all. + * Note: 64 bit math to avoid integer overflow. + */ + for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) { + qemu_get_byte(f); + } + n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1; + n->mac_table.in_use = 0; + } + } + + if (version_id >= 6) + qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3); + + if (version_id >= 7) { + if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) { + error_report("virtio-net: saved image requires vnet_hdr=on"); + return -1; + } + } + + if (version_id >= 9) { + n->mac_table.multi_overflow = qemu_get_byte(f); + n->mac_table.uni_overflow = qemu_get_byte(f); + } + + if (version_id >= 10) { + n->alluni = qemu_get_byte(f); + n->nomulti = qemu_get_byte(f); + n->nouni = qemu_get_byte(f); + n->nobcast = qemu_get_byte(f); + } + + if (version_id >= 11) { + if (qemu_get_byte(f) && !peer_has_ufo(n)) { + error_report("virtio-net: saved image requires TUN_F_UFO support"); + return -1; + } + } + + if (n->max_queues > 1) { + if (n->max_queues != qemu_get_be16(f)) { + error_report("virtio-net: different max_queues "); + return -1; + } + + n->curr_queues = qemu_get_be16(f); + if (n->curr_queues > n->max_queues) { + error_report("virtio-net: curr_queues %x > max_queues %x", + n->curr_queues, n->max_queues); + return -1; + } + for (i = 1; i < n->curr_queues; i++) { + n->vqs[i].tx_waiting = qemu_get_be32(f); + } + } + + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + n->curr_guest_offloads = qemu_get_be64(f); + } else { + n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); + } + + if (peer_has_vnet_hdr(n)) { + virtio_net_apply_guest_offloads(n); + } + + virtio_net_set_queues(n); + + /* Find the first multicast entry in the saved MAC filter */ + for (i = 0; i < n->mac_table.in_use; i++) { + if (n->mac_table.macs[i * ETH_ALEN] & 1) { + break; + } + } + n->mac_table.first_multi = i; + + /* nc.link_down can't be migrated, so infer link_down according + * to link status bit in n->status */ + link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; + for (i = 0; i < n->max_queues; i++) { + qemu_get_subqueue(n->nic, i)->link_down = link_down; + } + + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && + virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { + n->announce_counter = SELF_ANNOUNCE_ROUNDS; + timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)); + } + + return 0; +} + +static NetClientInfo net_virtio_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = virtio_net_can_receive, + .receive = virtio_net_receive, + .link_status_changed = virtio_net_set_link_status, + .query_rx_filter = virtio_net_query_rxfilter, +}; + +static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); + assert(n->vhost_started); + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); +} + +static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); + assert(n->vhost_started); + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), + vdev, idx, mask); +} + +static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) +{ + int i, config_size = 0; + virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); + for (i = 0; feature_sizes[i].flags != 0; i++) { + if (host_features & feature_sizes[i].flags) { + config_size = MAX(feature_sizes[i].end, config_size); + } + } + n->config_size = config_size; +} + +void virtio_net_set_netclient_name(VirtIONet *n, const char *name, + const char *type) +{ + /* + * The name can be NULL, the netclient name will be type.x. + */ + assert(type != NULL); + + g_free(n->netclient_name); + g_free(n->netclient_type); + n->netclient_name = g_strdup(name); + n->netclient_type = g_strdup(type); +} + +static void virtio_net_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIONet *n = VIRTIO_NET(dev); + NetClientState *nc; + int i; + + virtio_net_set_config_size(n, n->host_features); + virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); + + n->max_queues = MAX(n->nic_conf.peers.queues, 1); + if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { + error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " + "must be a positive integer less than %d.", + n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); + virtio_cleanup(vdev); + return; + } + n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); + n->curr_queues = 1; + n->tx_timeout = n->net_conf.txtimer; + + if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") + && strcmp(n->net_conf.tx, "bh")) { + error_report("virtio-net: " + "Unknown option tx=%s, valid options: \"timer\" \"bh\"", + n->net_conf.tx); + error_report("Defaulting to \"bh\""); + } + + for (i = 0; i < n->max_queues; i++) { + virtio_net_add_queue(n, i); + } + + n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); + qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); + memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); + n->status = VIRTIO_NET_S_LINK_UP; + n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + virtio_net_announce_timer, n); + + if (n->netclient_type) { + /* + * Happen when virtio_net_set_netclient_name has been called. + */ + n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, + n->netclient_type, n->netclient_name, n); + } else { + n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, + object_get_typename(OBJECT(dev)), dev->id, n); + } + + peer_test_vnet_hdr(n); + if (peer_has_vnet_hdr(n)) { + for (i = 0; i < n->max_queues; i++) { + qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); + } + n->host_hdr_len = sizeof(struct virtio_net_hdr); + } else { + n->host_hdr_len = 0; + } + + qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); + + n->vqs[0].tx_waiting = 0; + n->tx_burst = n->net_conf.txburst; + virtio_net_set_mrg_rx_bufs(n, 0, 0); + n->promisc = 1; /* for compatibility */ + + n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); + + n->vlans = g_malloc0(MAX_VLAN >> 3); + + nc = qemu_get_queue(n->nic); + nc->rxfilter_notify_enabled = 1; + + n->qdev = dev; + register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION, + virtio_net_save, virtio_net_load, n); +} + +static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIONet *n = VIRTIO_NET(dev); + int i, max_queues; + + /* This will stop vhost backend if appropriate. */ + virtio_net_set_status(vdev, 0); + + unregister_savevm(dev, "virtio-net", n); + + g_free(n->netclient_name); + n->netclient_name = NULL; + g_free(n->netclient_type); + n->netclient_type = NULL; + + g_free(n->mac_table.macs); + g_free(n->vlans); + + max_queues = n->multiqueue ? n->max_queues : 1; + for (i = 0; i < max_queues; i++) { + virtio_net_del_queue(n, i); + } + + timer_del(n->announce_timer); + timer_free(n->announce_timer); + g_free(n->vqs); + qemu_del_nic(n->nic); + virtio_cleanup(vdev); +} + +static void virtio_net_instance_init(Object *obj) +{ + VirtIONet *n = VIRTIO_NET(obj); + + /* + * The default config_size is sizeof(struct virtio_net_config). + * Can be overriden with virtio_net_set_config_size. + */ + n->config_size = sizeof(struct virtio_net_config); + device_add_bootindex_property(obj, &n->nic_conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(n), NULL); +} + +static Property virtio_net_properties[] = { + DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), + DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_CSUM, true), + DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), + DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_TSO4, true), + DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_TSO6, true), + DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_ECN, true), + DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_UFO, true), + DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_ANNOUNCE, true), + DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features, + VIRTIO_NET_F_HOST_TSO4, true), + DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features, + VIRTIO_NET_F_HOST_TSO6, true), + DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features, + VIRTIO_NET_F_HOST_ECN, true), + DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features, + VIRTIO_NET_F_HOST_UFO, true), + DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features, + VIRTIO_NET_F_MRG_RXBUF, true), + DEFINE_PROP_BIT("status", VirtIONet, host_features, + VIRTIO_NET_F_STATUS, true), + DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features, + VIRTIO_NET_F_CTRL_VQ, true), + DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features, + VIRTIO_NET_F_CTRL_RX, true), + DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features, + VIRTIO_NET_F_CTRL_VLAN, true), + DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features, + VIRTIO_NET_F_CTRL_RX_EXTRA, true), + DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features, + VIRTIO_NET_F_CTRL_MAC_ADDR, true), + DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), + DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), + DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), + DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, + TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_net_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = virtio_net_properties; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + vdc->realize = virtio_net_device_realize; + vdc->unrealize = virtio_net_device_unrealize; + vdc->get_config = virtio_net_get_config; + vdc->set_config = virtio_net_set_config; + vdc->get_features = virtio_net_get_features; + vdc->set_features = virtio_net_set_features; + vdc->bad_features = virtio_net_bad_features; + vdc->reset = virtio_net_reset; + vdc->set_status = virtio_net_set_status; + vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; + vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; + vdc->load = virtio_net_load_device; + vdc->save = virtio_net_save_device; +} + +static const TypeInfo virtio_net_info = { + .name = TYPE_VIRTIO_NET, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIONet), + .instance_init = virtio_net_instance_init, + .class_init = virtio_net_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_net_info); +} + +type_init(virtio_register_types) diff --git a/qemu/hw/net/vmware_utils.h b/qemu/hw/net/vmware_utils.h new file mode 100644 index 000000000..1099df669 --- /dev/null +++ b/qemu/hw/net/vmware_utils.h @@ -0,0 +1,143 @@ +/* + * QEMU VMWARE paravirtual devices - auxiliary code + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VMWARE_UTILS_H +#define VMWARE_UTILS_H + +#include "qemu/range.h" + +#ifndef VMW_SHPRN +#define VMW_SHPRN(fmt, ...) do {} while (0) +#endif + +/* + * Shared memory access functions with byte swap support + * Each function contains printout for reverse-engineering needs + * + */ +static inline void +vmw_shmem_read(hwaddr addr, void *buf, int len) +{ + VMW_SHPRN("SHMEM r: %" PRIx64 ", len: %d to %p", addr, len, buf); + cpu_physical_memory_read(addr, buf, len); +} + +static inline void +vmw_shmem_write(hwaddr addr, void *buf, int len) +{ + VMW_SHPRN("SHMEM w: %" PRIx64 ", len: %d to %p", addr, len, buf); + cpu_physical_memory_write(addr, buf, len); +} + +static inline void +vmw_shmem_rw(hwaddr addr, void *buf, int len, int is_write) +{ + VMW_SHPRN("SHMEM r/w: %" PRIx64 ", len: %d (to %p), is write: %d", + addr, len, buf, is_write); + + cpu_physical_memory_rw(addr, buf, len, is_write); +} + +static inline void +vmw_shmem_set(hwaddr addr, uint8 val, int len) +{ + int i; + VMW_SHPRN("SHMEM set: %" PRIx64 ", len: %d (value 0x%X)", addr, len, val); + + for (i = 0; i < len; i++) { + cpu_physical_memory_write(addr + i, &val, 1); + } +} + +static inline uint32_t +vmw_shmem_ld8(hwaddr addr) +{ + uint8_t res = ldub_phys(&address_space_memory, addr); + VMW_SHPRN("SHMEM load8: %" PRIx64 " (value 0x%X)", addr, res); + return res; +} + +static inline void +vmw_shmem_st8(hwaddr addr, uint8_t value) +{ + VMW_SHPRN("SHMEM store8: %" PRIx64 " (value 0x%X)", addr, value); + stb_phys(&address_space_memory, addr, value); +} + +static inline uint32_t +vmw_shmem_ld16(hwaddr addr) +{ + uint16_t res = lduw_le_phys(&address_space_memory, addr); + VMW_SHPRN("SHMEM load16: %" PRIx64 " (value 0x%X)", addr, res); + return res; +} + +static inline void +vmw_shmem_st16(hwaddr addr, uint16_t value) +{ + VMW_SHPRN("SHMEM store16: %" PRIx64 " (value 0x%X)", addr, value); + stw_le_phys(&address_space_memory, addr, value); +} + +static inline uint32_t +vmw_shmem_ld32(hwaddr addr) +{ + uint32_t res = ldl_le_phys(&address_space_memory, addr); + VMW_SHPRN("SHMEM load32: %" PRIx64 " (value 0x%X)", addr, res); + return res; +} + +static inline void +vmw_shmem_st32(hwaddr addr, uint32_t value) +{ + VMW_SHPRN("SHMEM store32: %" PRIx64 " (value 0x%X)", addr, value); + stl_le_phys(&address_space_memory, addr, value); +} + +static inline uint64_t +vmw_shmem_ld64(hwaddr addr) +{ + uint64_t res = ldq_le_phys(&address_space_memory, addr); + VMW_SHPRN("SHMEM load64: %" PRIx64 " (value %" PRIx64 ")", addr, res); + return res; +} + +static inline void +vmw_shmem_st64(hwaddr addr, uint64_t value) +{ + VMW_SHPRN("SHMEM store64: %" PRIx64 " (value %" PRIx64 ")", addr, value); + stq_le_phys(&address_space_memory, addr, value); +} + +/* Macros for simplification of operations on array-style registers */ + +/* + * Whether <addr> lies inside of array-style register defined by <base>, + * number of elements (<cnt>) and element size (<regsize>) + * +*/ +#define VMW_IS_MULTIREG_ADDR(addr, base, cnt, regsize) \ + range_covers_byte(base, cnt * regsize, addr) + +/* + * Returns index of given register (<addr>) in array-style register defined by + * <base> and element size (<regsize>) + * +*/ +#define VMW_MULTIREG_IDX_BY_ADDR(addr, base, regsize) \ + (((addr) - (base)) / (regsize)) + +#endif diff --git a/qemu/hw/net/vmxnet3.c b/qemu/hw/net/vmxnet3.c new file mode 100644 index 000000000..59b06b841 --- /dev/null +++ b/qemu/hw/net/vmxnet3.c @@ -0,0 +1,2578 @@ +/* + * QEMU VMWARE VMXNET3 paravirtual NIC + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "net/net.h" +#include "net/tap.h" +#include "net/checksum.h" +#include "sysemu/sysemu.h" +#include "qemu-common.h" +#include "qemu/bswap.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" + +#include "vmxnet3.h" +#include "vmxnet_debug.h" +#include "vmware_utils.h" +#include "vmxnet_tx_pkt.h" +#include "vmxnet_rx_pkt.h" + +#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 +#define VMXNET3_MSIX_BAR_SIZE 0x2000 +#define MIN_BUF_SIZE 60 + +#define VMXNET3_BAR0_IDX (0) +#define VMXNET3_BAR1_IDX (1) +#define VMXNET3_MSIX_BAR_IDX (2) + +#define VMXNET3_OFF_MSIX_TABLE (0x000) +#define VMXNET3_OFF_MSIX_PBA (0x800) + +/* Link speed in Mbps should be shifted by 16 */ +#define VMXNET3_LINK_SPEED (1000 << 16) + +/* Link status: 1 - up, 0 - down. */ +#define VMXNET3_LINK_STATUS_UP 0x1 + +/* Least significant bit should be set for revision and version */ +#define VMXNET3_DEVICE_VERSION 0x1 +#define VMXNET3_DEVICE_REVISION 0x1 + +/* Number of interrupt vectors for non-MSIx modes */ +#define VMXNET3_MAX_NMSIX_INTRS (1) + +/* Macros for rings descriptors access */ +#define VMXNET3_READ_TX_QUEUE_DESCR8(dpa, field) \ + (vmw_shmem_ld8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) + +#define VMXNET3_WRITE_TX_QUEUE_DESCR8(dpa, field, value) \ + (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value))) + +#define VMXNET3_READ_TX_QUEUE_DESCR32(dpa, field) \ + (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) + +#define VMXNET3_WRITE_TX_QUEUE_DESCR32(dpa, field, value) \ + (vmw_shmem_st32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value)) + +#define VMXNET3_READ_TX_QUEUE_DESCR64(dpa, field) \ + (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field))) + +#define VMXNET3_WRITE_TX_QUEUE_DESCR64(dpa, field, value) \ + (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value)) + +#define VMXNET3_READ_RX_QUEUE_DESCR64(dpa, field) \ + (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field))) + +#define VMXNET3_READ_RX_QUEUE_DESCR32(dpa, field) \ + (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field))) + +#define VMXNET3_WRITE_RX_QUEUE_DESCR64(dpa, field, value) \ + (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value)) + +#define VMXNET3_WRITE_RX_QUEUE_DESCR8(dpa, field, value) \ + (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value)) + +/* Macros for guest driver shared area access */ +#define VMXNET3_READ_DRV_SHARED64(shpa, field) \ + (vmw_shmem_ld64(shpa + offsetof(struct Vmxnet3_DriverShared, field))) + +#define VMXNET3_READ_DRV_SHARED32(shpa, field) \ + (vmw_shmem_ld32(shpa + offsetof(struct Vmxnet3_DriverShared, field))) + +#define VMXNET3_WRITE_DRV_SHARED32(shpa, field, val) \ + (vmw_shmem_st32(shpa + offsetof(struct Vmxnet3_DriverShared, field), val)) + +#define VMXNET3_READ_DRV_SHARED16(shpa, field) \ + (vmw_shmem_ld16(shpa + offsetof(struct Vmxnet3_DriverShared, field))) + +#define VMXNET3_READ_DRV_SHARED8(shpa, field) \ + (vmw_shmem_ld8(shpa + offsetof(struct Vmxnet3_DriverShared, field))) + +#define VMXNET3_READ_DRV_SHARED(shpa, field, b, l) \ + (vmw_shmem_read(shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l)) + +#define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag)) + +#define TYPE_VMXNET3 "vmxnet3" +#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3) + +/* Cyclic ring abstraction */ +typedef struct { + hwaddr pa; + size_t size; + size_t cell_size; + size_t next; + uint8_t gen; +} Vmxnet3Ring; + +static inline void vmxnet3_ring_init(Vmxnet3Ring *ring, + hwaddr pa, + size_t size, + size_t cell_size, + bool zero_region) +{ + ring->pa = pa; + ring->size = size; + ring->cell_size = cell_size; + ring->gen = VMXNET3_INIT_GEN; + ring->next = 0; + + if (zero_region) { + vmw_shmem_set(pa, 0, size * cell_size); + } +} + +#define VMXNET3_RING_DUMP(macro, ring_name, ridx, r) \ + macro("%s#%d: base %" PRIx64 " size %lu cell_size %lu gen %d next %lu", \ + (ring_name), (ridx), \ + (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next) + +static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring) +{ + if (++ring->next >= ring->size) { + ring->next = 0; + ring->gen ^= 1; + } +} + +static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring) +{ + if (ring->next-- == 0) { + ring->next = ring->size - 1; + ring->gen ^= 1; + } +} + +static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring) +{ + return ring->pa + ring->next * ring->cell_size; +} + +static inline void vmxnet3_ring_read_curr_cell(Vmxnet3Ring *ring, void *buff) +{ + vmw_shmem_read(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size); +} + +static inline void vmxnet3_ring_write_curr_cell(Vmxnet3Ring *ring, void *buff) +{ + vmw_shmem_write(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size); +} + +static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring) +{ + return ring->next; +} + +static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring) +{ + return ring->gen; +} + +/* Debug trace-related functions */ +static inline void +vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr) +{ + VMW_PKPRN("TX DESCR: " + "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, " + "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, " + "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d", + le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd, + descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om, + descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci); +} + +static inline void +vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr) +{ + VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, " + "csum_start: %d, csum_offset: %d", + vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size, + vhdr->csum_start, vhdr->csum_offset); +} + +static inline void +vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr) +{ + VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, " + "dtype: %d, ext1: %d, btype: %d", + le64_to_cpu(descr->addr), descr->len, descr->gen, + descr->rsvd, descr->dtype, descr->ext1, descr->btype); +} + +/* Device state and helper functions */ +#define VMXNET3_RX_RINGS_PER_QUEUE (2) + +typedef struct { + Vmxnet3Ring tx_ring; + Vmxnet3Ring comp_ring; + + uint8_t intr_idx; + hwaddr tx_stats_pa; + struct UPT1_TxStats txq_stats; +} Vmxnet3TxqDescr; + +typedef struct { + Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE]; + Vmxnet3Ring comp_ring; + uint8_t intr_idx; + hwaddr rx_stats_pa; + struct UPT1_RxStats rxq_stats; +} Vmxnet3RxqDescr; + +typedef struct { + bool is_masked; + bool is_pending; + bool is_asserted; +} Vmxnet3IntState; + +typedef struct { + PCIDevice parent_obj; + NICState *nic; + NICConf conf; + MemoryRegion bar0; + MemoryRegion bar1; + MemoryRegion msix_bar; + + Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES]; + Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES]; + + /* Whether MSI-X support was installed successfully */ + bool msix_used; + /* Whether MSI support was installed successfully */ + bool msi_used; + hwaddr drv_shmem; + hwaddr temp_shared_guest_driver_memory; + + uint8_t txq_num; + + /* This boolean tells whether RX packet being indicated has to */ + /* be split into head and body chunks from different RX rings */ + bool rx_packets_compound; + + bool rx_vlan_stripping; + bool lro_supported; + + uint8_t rxq_num; + + /* Network MTU */ + uint32_t mtu; + + /* Maximum number of fragments for indicated TX packets */ + uint32_t max_tx_frags; + + /* Maximum number of fragments for indicated RX packets */ + uint16_t max_rx_frags; + + /* Index for events interrupt */ + uint8_t event_int_idx; + + /* Whether automatic interrupts masking enabled */ + bool auto_int_masking; + + bool peer_has_vhdr; + + /* TX packets to QEMU interface */ + struct VmxnetTxPkt *tx_pkt; + uint32_t offload_mode; + uint32_t cso_or_gso_size; + uint16_t tci; + bool needs_vlan; + + struct VmxnetRxPkt *rx_pkt; + + bool tx_sop; + bool skip_current_tx_pkt; + + uint32_t device_active; + uint32_t last_command; + + uint32_t link_status_and_speed; + + Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS]; + + uint32_t temp_mac; /* To store the low part first */ + + MACAddr perm_mac; + uint32_t vlan_table[VMXNET3_VFT_SIZE]; + uint32_t rx_mode; + MACAddr *mcast_list; + uint32_t mcast_list_len; + uint32_t mcast_list_buff_size; /* needed for live migration. */ +} VMXNET3State; + +/* Interrupt management */ + +/* + *This function returns sign whether interrupt line is in asserted state + * This depends on the type of interrupt used. For INTX interrupt line will + * be asserted until explicit deassertion, for MSI(X) interrupt line will + * be deasserted automatically due to notification semantics of the MSI(X) + * interrupts + */ +static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx) +{ + PCIDevice *d = PCI_DEVICE(s); + + if (s->msix_used && msix_enabled(d)) { + VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx); + msix_notify(d, int_idx); + return false; + } + if (s->msi_used && msi_enabled(d)) { + VMW_IRPRN("Sending MSI notification for vector %u", int_idx); + msi_notify(d, int_idx); + return false; + } + + VMW_IRPRN("Asserting line for interrupt %u", int_idx); + pci_irq_assert(d); + return true; +} + +static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx) +{ + PCIDevice *d = PCI_DEVICE(s); + + /* + * This function should never be called for MSI(X) interrupts + * because deassertion never required for message interrupts + */ + assert(!s->msix_used || !msix_enabled(d)); + /* + * This function should never be called for MSI(X) interrupts + * because deassertion never required for message interrupts + */ + assert(!s->msi_used || !msi_enabled(d)); + + VMW_IRPRN("Deasserting line for interrupt %u", lidx); + pci_irq_deassert(d); +} + +static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx) +{ + if (!s->interrupt_states[lidx].is_pending && + s->interrupt_states[lidx].is_asserted) { + VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx); + _vmxnet3_deassert_interrupt_line(s, lidx); + s->interrupt_states[lidx].is_asserted = false; + return; + } + + if (s->interrupt_states[lidx].is_pending && + !s->interrupt_states[lidx].is_masked && + !s->interrupt_states[lidx].is_asserted) { + VMW_IRPRN("New interrupt line state for index %d is UP", lidx); + s->interrupt_states[lidx].is_asserted = + _vmxnet3_assert_interrupt_line(s, lidx); + s->interrupt_states[lidx].is_pending = false; + return; + } +} + +static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx) +{ + PCIDevice *d = PCI_DEVICE(s); + s->interrupt_states[lidx].is_pending = true; + vmxnet3_update_interrupt_line_state(s, lidx); + + if (s->msix_used && msix_enabled(d) && s->auto_int_masking) { + goto do_automask; + } + + if (s->msi_used && msi_enabled(d) && s->auto_int_masking) { + goto do_automask; + } + + return; + +do_automask: + s->interrupt_states[lidx].is_masked = true; + vmxnet3_update_interrupt_line_state(s, lidx); +} + +static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx) +{ + return s->interrupt_states[lidx].is_asserted; +} + +static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx) +{ + s->interrupt_states[int_idx].is_pending = false; + if (s->auto_int_masking) { + s->interrupt_states[int_idx].is_masked = true; + } + vmxnet3_update_interrupt_line_state(s, int_idx); +} + +static void +vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked) +{ + s->interrupt_states[lidx].is_masked = is_masked; + vmxnet3_update_interrupt_line_state(s, lidx); +} + +static bool vmxnet3_verify_driver_magic(hwaddr dshmem) +{ + return (VMXNET3_READ_DRV_SHARED32(dshmem, magic) == VMXNET3_REV1_MAGIC); +} + +#define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF) +#define VMXNET3_MAKE_BYTE(byte_num, val) \ + (((uint32_t)((val) & 0xFF)) << (byte_num)*8) + +static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l) +{ + s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l, 0); + s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l, 1); + s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l, 2); + s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l, 3); + s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0); + s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1); + + VMW_CFPRN("Variable MAC: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a)); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); +} + +static uint64_t vmxnet3_get_mac_low(MACAddr *addr) +{ + return VMXNET3_MAKE_BYTE(0, addr->a[0]) | + VMXNET3_MAKE_BYTE(1, addr->a[1]) | + VMXNET3_MAKE_BYTE(2, addr->a[2]) | + VMXNET3_MAKE_BYTE(3, addr->a[3]); +} + +static uint64_t vmxnet3_get_mac_high(MACAddr *addr) +{ + return VMXNET3_MAKE_BYTE(0, addr->a[4]) | + VMXNET3_MAKE_BYTE(1, addr->a[5]); +} + +static void +vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx) +{ + vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring); +} + +static inline void +vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx) +{ + vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]); +} + +static inline void +vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx) +{ + vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring); +} + +static void +vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx) +{ + vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring); +} + +static void +vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx) +{ + vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring); +} + +static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32 tx_ridx) +{ + struct Vmxnet3_TxCompDesc txcq_descr; + + VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring); + + txcq_descr.txdIdx = tx_ridx; + txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring); + + vmxnet3_ring_write_curr_cell(&s->txq_descr[qidx].comp_ring, &txcq_descr); + + /* Flush changes in TX descriptor before changing the counter value */ + smp_wmb(); + + vmxnet3_inc_tx_completion_counter(s, qidx); + vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx); +} + +static bool +vmxnet3_setup_tx_offloads(VMXNET3State *s) +{ + switch (s->offload_mode) { + case VMXNET3_OM_NONE: + vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); + break; + + case VMXNET3_OM_CSUM: + vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); + VMW_PKPRN("L4 CSO requested\n"); + break; + + case VMXNET3_OM_TSO: + vmxnet_tx_pkt_build_vheader(s->tx_pkt, true, true, + s->cso_or_gso_size); + vmxnet_tx_pkt_update_ip_checksums(s->tx_pkt); + VMW_PKPRN("GSO offload requested."); + break; + + default: + g_assert_not_reached(); + return false; + } + + return true; +} + +static void +vmxnet3_tx_retrieve_metadata(VMXNET3State *s, + const struct Vmxnet3_TxDesc *txd) +{ + s->offload_mode = txd->om; + s->cso_or_gso_size = txd->msscof; + s->tci = txd->tci; + s->needs_vlan = txd->ti; +} + +typedef enum { + VMXNET3_PKT_STATUS_OK, + VMXNET3_PKT_STATUS_ERROR, + VMXNET3_PKT_STATUS_DISCARD,/* only for tx */ + VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */ +} Vmxnet3PktStatus; + +static void +vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx, + Vmxnet3PktStatus status) +{ + size_t tot_len = vmxnet_tx_pkt_get_total_len(s->tx_pkt); + struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats; + + switch (status) { + case VMXNET3_PKT_STATUS_OK: + switch (vmxnet_tx_pkt_get_packet_type(s->tx_pkt)) { + case ETH_PKT_BCAST: + stats->bcastPktsTxOK++; + stats->bcastBytesTxOK += tot_len; + break; + case ETH_PKT_MCAST: + stats->mcastPktsTxOK++; + stats->mcastBytesTxOK += tot_len; + break; + case ETH_PKT_UCAST: + stats->ucastPktsTxOK++; + stats->ucastBytesTxOK += tot_len; + break; + default: + g_assert_not_reached(); + } + + if (s->offload_mode == VMXNET3_OM_TSO) { + /* + * According to VMWARE headers this statistic is a number + * of packets after segmentation but since we don't have + * this information in QEMU model, the best we can do is to + * provide number of non-segmented packets + */ + stats->TSOPktsTxOK++; + stats->TSOBytesTxOK += tot_len; + } + break; + + case VMXNET3_PKT_STATUS_DISCARD: + stats->pktsTxDiscard++; + break; + + case VMXNET3_PKT_STATUS_ERROR: + stats->pktsTxError++; + break; + + default: + g_assert_not_reached(); + } +} + +static void +vmxnet3_on_rx_done_update_stats(VMXNET3State *s, + int qidx, + Vmxnet3PktStatus status) +{ + struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats; + size_t tot_len = vmxnet_rx_pkt_get_total_len(s->rx_pkt); + + switch (status) { + case VMXNET3_PKT_STATUS_OUT_OF_BUF: + stats->pktsRxOutOfBuf++; + break; + + case VMXNET3_PKT_STATUS_ERROR: + stats->pktsRxError++; + break; + case VMXNET3_PKT_STATUS_OK: + switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) { + case ETH_PKT_BCAST: + stats->bcastPktsRxOK++; + stats->bcastBytesRxOK += tot_len; + break; + case ETH_PKT_MCAST: + stats->mcastPktsRxOK++; + stats->mcastBytesRxOK += tot_len; + break; + case ETH_PKT_UCAST: + stats->ucastPktsRxOK++; + stats->ucastBytesRxOK += tot_len; + break; + default: + g_assert_not_reached(); + } + + if (tot_len > s->mtu) { + stats->LROPktsRxOK++; + stats->LROBytesRxOK += tot_len; + } + break; + default: + g_assert_not_reached(); + } +} + +static inline bool +vmxnet3_pop_next_tx_descr(VMXNET3State *s, + int qidx, + struct Vmxnet3_TxDesc *txd, + uint32_t *descr_idx) +{ + Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring; + + vmxnet3_ring_read_curr_cell(ring, txd); + if (txd->gen == vmxnet3_ring_curr_gen(ring)) { + /* Only read after generation field verification */ + smp_rmb(); + /* Re-read to be sure we got the latest version */ + vmxnet3_ring_read_curr_cell(ring, txd); + VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring); + *descr_idx = vmxnet3_ring_curr_cell_idx(ring); + vmxnet3_inc_tx_consumption_counter(s, qidx); + return true; + } + + return false; +} + +static bool +vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx) +{ + Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK; + + if (!vmxnet3_setup_tx_offloads(s)) { + status = VMXNET3_PKT_STATUS_ERROR; + goto func_exit; + } + + /* debug prints */ + vmxnet3_dump_virt_hdr(vmxnet_tx_pkt_get_vhdr(s->tx_pkt)); + vmxnet_tx_pkt_dump(s->tx_pkt); + + if (!vmxnet_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { + status = VMXNET3_PKT_STATUS_DISCARD; + goto func_exit; + } + +func_exit: + vmxnet3_on_tx_done_update_stats(s, qidx, status); + return (status == VMXNET3_PKT_STATUS_OK); +} + +static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) +{ + struct Vmxnet3_TxDesc txd; + uint32_t txd_idx; + uint32_t data_len; + hwaddr data_pa; + + for (;;) { + if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) { + break; + } + + vmxnet3_dump_tx_descr(&txd); + + if (!s->skip_current_tx_pkt) { + data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE; + data_pa = le64_to_cpu(txd.addr); + + if (!vmxnet_tx_pkt_add_raw_fragment(s->tx_pkt, + data_pa, + data_len)) { + s->skip_current_tx_pkt = true; + } + } + + if (s->tx_sop) { + vmxnet3_tx_retrieve_metadata(s, &txd); + s->tx_sop = false; + } + + if (txd.eop) { + if (!s->skip_current_tx_pkt) { + vmxnet_tx_pkt_parse(s->tx_pkt); + + if (s->needs_vlan) { + vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); + } + + vmxnet3_send_packet(s, qidx); + } else { + vmxnet3_on_tx_done_update_stats(s, qidx, + VMXNET3_PKT_STATUS_ERROR); + } + + vmxnet3_complete_packet(s, qidx, txd_idx); + s->tx_sop = true; + s->skip_current_tx_pkt = false; + vmxnet_tx_pkt_reset(s->tx_pkt); + } + } +} + +static inline void +vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx, + struct Vmxnet3_RxDesc *dbuf, uint32_t *didx) +{ + Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx]; + *didx = vmxnet3_ring_curr_cell_idx(ring); + vmxnet3_ring_read_curr_cell(ring, dbuf); +} + +static inline uint8_t +vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx) +{ + return s->rxq_descr[qidx].rx_ring[ridx].gen; +} + +static inline hwaddr +vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen) +{ + uint8_t ring_gen; + struct Vmxnet3_RxCompDesc rxcd; + + hwaddr daddr = + vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring); + + cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring); + + if (rxcd.gen != ring_gen) { + *descr_gen = ring_gen; + vmxnet3_inc_rx_completion_counter(s, qidx); + return daddr; + } + + return 0; +} + +static inline void +vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx) +{ + vmxnet3_dec_rx_completion_counter(s, qidx); +} + +#define RXQ_IDX (0) +#define RX_HEAD_BODY_RING (0) +#define RX_BODY_ONLY_RING (1) + +static bool +vmxnet3_get_next_head_rx_descr(VMXNET3State *s, + struct Vmxnet3_RxDesc *descr_buf, + uint32_t *descr_idx, + uint32_t *ridx) +{ + for (;;) { + uint32_t ring_gen; + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, + descr_buf, descr_idx); + + /* If no more free descriptors - return */ + ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING); + if (descr_buf->gen != ring_gen) { + return false; + } + + /* Only read after generation field verification */ + smp_rmb(); + /* Re-read to be sure we got the latest version */ + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, + descr_buf, descr_idx); + + /* Mark current descriptor as used/skipped */ + vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING); + + /* If this is what we are looking for - return */ + if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) { + *ridx = RX_HEAD_BODY_RING; + return true; + } + } +} + +static bool +vmxnet3_get_next_body_rx_descr(VMXNET3State *s, + struct Vmxnet3_RxDesc *d, + uint32_t *didx, + uint32_t *ridx) +{ + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx); + + /* Try to find corresponding descriptor in head/body ring */ + if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) { + /* Only read after generation field verification */ + smp_rmb(); + /* Re-read to be sure we got the latest version */ + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx); + if (d->btype == VMXNET3_RXD_BTYPE_BODY) { + vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING); + *ridx = RX_HEAD_BODY_RING; + return true; + } + } + + /* + * If there is no free descriptors on head/body ring or next free + * descriptor is a head descriptor switch to body only ring + */ + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx); + + /* If no more free descriptors - return */ + if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) { + /* Only read after generation field verification */ + smp_rmb(); + /* Re-read to be sure we got the latest version */ + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx); + assert(d->btype == VMXNET3_RXD_BTYPE_BODY); + *ridx = RX_BODY_ONLY_RING; + vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING); + return true; + } + + return false; +} + +static inline bool +vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head, + struct Vmxnet3_RxDesc *descr_buf, + uint32_t *descr_idx, + uint32_t *ridx) +{ + if (is_head || !s->rx_packets_compound) { + return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx); + } else { + return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx); + } +} + +/* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID), + * the implementation always passes an RxCompDesc with a "Checksum + * calculated and found correct" to the OS (cnc=0 and tuc=1, see + * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior. + * + * Therefore, if packet has the NEEDS_CSUM set, we must calculate + * and place a fully computed checksum into the tcp/udp header. + * Otherwise, the OS driver will receive a checksum-correct indication + * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field + * having just the pseudo header csum value. + * + * While this is not a problem if packet is destined for local delivery, + * in the case the host OS performs forwarding, it will forward an + * incorrectly checksummed packet. + */ +static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, + const void *pkt_data, + size_t pkt_len) +{ + struct virtio_net_hdr *vhdr; + bool isip4, isip6, istcp, isudp; + uint8_t *data; + int len; + + if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) { + return; + } + + vhdr = vmxnet_rx_pkt_get_vhdr(pkt); + if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) { + return; + } + + vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + if (!(isip4 || isip6) || !(istcp || isudp)) { + return; + } + + vmxnet3_dump_virt_hdr(vhdr); + + /* Validate packet len: csum_start + scum_offset + length of csum field */ + if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) { + VMW_PKPRN("packet len:%d < csum_start(%d) + csum_offset(%d) + 2, " + "cannot calculate checksum", + len, vhdr->csum_start, vhdr->csum_offset); + return; + } + + data = (uint8_t *)pkt_data + vhdr->csum_start; + len = pkt_len - vhdr->csum_start; + /* Put the checksum obtained into the packet */ + stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len)); + + vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; + vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; +} + +static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, + struct Vmxnet3_RxCompDesc *rxcd) +{ + int csum_ok, is_gso; + bool isip4, isip6, istcp, isudp; + struct virtio_net_hdr *vhdr; + uint8_t offload_type; + + if (vmxnet_rx_pkt_is_vlan_stripped(pkt)) { + rxcd->ts = 1; + rxcd->tci = vmxnet_rx_pkt_get_vlan_tag(pkt); + } + + if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) { + goto nocsum; + } + + vhdr = vmxnet_rx_pkt_get_vhdr(pkt); + /* + * Checksum is valid when lower level tell so or when lower level + * requires checksum offload telling that packet produced/bridged + * locally and did travel over network after last checksum calculation + * or production + */ + csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) || + VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM); + + offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0; + + if (!csum_ok && !is_gso) { + goto nocsum; + } + + vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + if ((!istcp && !isudp) || (!isip4 && !isip6)) { + goto nocsum; + } + + rxcd->cnc = 0; + rxcd->v4 = isip4 ? 1 : 0; + rxcd->v6 = isip6 ? 1 : 0; + rxcd->tcp = istcp ? 1 : 0; + rxcd->udp = isudp ? 1 : 0; + rxcd->fcs = rxcd->tuc = rxcd->ipc = 1; + return; + +nocsum: + rxcd->cnc = 1; + return; +} + +static void +vmxnet3_physical_memory_writev(const struct iovec *iov, + size_t start_iov_off, + hwaddr target_addr, + size_t bytes_to_copy) +{ + size_t curr_off = 0; + size_t copied = 0; + + while (bytes_to_copy) { + if (start_iov_off < (curr_off + iov->iov_len)) { + size_t chunk_len = + MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy); + + cpu_physical_memory_write(target_addr + copied, + iov->iov_base + start_iov_off - curr_off, + chunk_len); + + copied += chunk_len; + start_iov_off += chunk_len; + curr_off = start_iov_off; + bytes_to_copy -= chunk_len; + } else { + curr_off += iov->iov_len; + } + iov++; + } +} + +static bool +vmxnet3_indicate_packet(VMXNET3State *s) +{ + struct Vmxnet3_RxDesc rxd; + bool is_head = true; + uint32_t rxd_idx; + uint32_t rx_ridx = 0; + + struct Vmxnet3_RxCompDesc rxcd; + uint32_t new_rxcd_gen = VMXNET3_INIT_GEN; + hwaddr new_rxcd_pa = 0; + hwaddr ready_rxcd_pa = 0; + struct iovec *data = vmxnet_rx_pkt_get_iovec(s->rx_pkt); + size_t bytes_copied = 0; + size_t bytes_left = vmxnet_rx_pkt_get_total_len(s->rx_pkt); + uint16_t num_frags = 0; + size_t chunk_size; + + vmxnet_rx_pkt_dump(s->rx_pkt); + + while (bytes_left > 0) { + + /* cannot add more frags to packet */ + if (num_frags == s->max_rx_frags) { + break; + } + + new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen); + if (!new_rxcd_pa) { + break; + } + + if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) { + break; + } + + chunk_size = MIN(bytes_left, rxd.len); + vmxnet3_physical_memory_writev(data, bytes_copied, + le64_to_cpu(rxd.addr), chunk_size); + bytes_copied += chunk_size; + bytes_left -= chunk_size; + + vmxnet3_dump_rx_descr(&rxd); + + if (ready_rxcd_pa != 0) { + cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + } + + memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc)); + rxcd.rxdIdx = rxd_idx; + rxcd.len = chunk_size; + rxcd.sop = is_head; + rxcd.gen = new_rxcd_gen; + rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num; + + if (bytes_left == 0) { + vmxnet3_rx_update_descr(s->rx_pkt, &rxcd); + } + + VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu " + "sop %d csum_correct %lu", + (unsigned long) rx_ridx, + (unsigned long) rxcd.rxdIdx, + (unsigned long) rxcd.len, + (int) rxcd.sop, + (unsigned long) rxcd.tuc); + + is_head = false; + ready_rxcd_pa = new_rxcd_pa; + new_rxcd_pa = 0; + num_frags++; + } + + if (ready_rxcd_pa != 0) { + rxcd.eop = 1; + rxcd.err = (bytes_left != 0); + cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + + /* Flush RX descriptor changes */ + smp_wmb(); + } + + if (new_rxcd_pa != 0) { + vmxnet3_revert_rxc_descr(s, RXQ_IDX); + } + + vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx); + + if (bytes_left == 0) { + vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK); + return true; + } else if (num_frags == s->max_rx_frags) { + vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR); + return false; + } else { + vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, + VMXNET3_PKT_STATUS_OUT_OF_BUF); + return false; + } +} + +static void +vmxnet3_io_bar0_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + VMXNET3State *s = opaque; + + if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD, + VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) { + int tx_queue_idx = + VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD, + VMXNET3_REG_ALIGN); + assert(tx_queue_idx <= s->txq_num); + vmxnet3_process_tx_queue(s, tx_queue_idx); + return; + } + + if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, + VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { + int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, + VMXNET3_REG_ALIGN); + + VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val); + + vmxnet3_on_interrupt_mask_changed(s, l, val); + return; + } + + if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD, + VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) || + VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2, + VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) { + return; + } + + VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d", + (uint64_t) addr, val, size); +} + +static uint64_t +vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size) +{ + if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, + VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { + g_assert_not_reached(); + } + + VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size); + return 0; +} + +static void vmxnet3_reset_interrupt_states(VMXNET3State *s) +{ + int i; + for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) { + s->interrupt_states[i].is_asserted = false; + s->interrupt_states[i].is_pending = false; + s->interrupt_states[i].is_masked = true; + } +} + +static void vmxnet3_reset_mac(VMXNET3State *s) +{ + memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a)); + VMW_CFPRN("MAC address set to: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a)); +} + +static void vmxnet3_deactivate_device(VMXNET3State *s) +{ + VMW_CBPRN("Deactivating vmxnet3..."); + s->device_active = false; +} + +static void vmxnet3_reset(VMXNET3State *s) +{ + VMW_CBPRN("Resetting vmxnet3..."); + + vmxnet3_deactivate_device(s); + vmxnet3_reset_interrupt_states(s); + vmxnet_tx_pkt_reset(s->tx_pkt); + s->drv_shmem = 0; + s->tx_sop = true; + s->skip_current_tx_pkt = false; +} + +static void vmxnet3_update_rx_mode(VMXNET3State *s) +{ + s->rx_mode = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, + devRead.rxFilterConf.rxMode); + VMW_CFPRN("RX mode: 0x%08X", s->rx_mode); +} + +static void vmxnet3_update_vlan_filters(VMXNET3State *s) +{ + int i; + + /* Copy configuration from shared memory */ + VMXNET3_READ_DRV_SHARED(s->drv_shmem, + devRead.rxFilterConf.vfTable, + s->vlan_table, + sizeof(s->vlan_table)); + + /* Invert byte order when needed */ + for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) { + s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]); + } + + /* Dump configuration for debugging purposes */ + VMW_CFPRN("Configured VLANs:"); + for (i = 0; i < sizeof(s->vlan_table) * 8; i++) { + if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) { + VMW_CFPRN("\tVLAN %d is present", i); + } + } +} + +static void vmxnet3_update_mcast_filters(VMXNET3State *s) +{ + uint16_t list_bytes = + VMXNET3_READ_DRV_SHARED16(s->drv_shmem, + devRead.rxFilterConf.mfTableLen); + + s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]); + + s->mcast_list = g_realloc(s->mcast_list, list_bytes); + if (!s->mcast_list) { + if (s->mcast_list_len == 0) { + VMW_CFPRN("Current multicast list is empty"); + } else { + VMW_ERPRN("Failed to allocate multicast list of %d elements", + s->mcast_list_len); + } + s->mcast_list_len = 0; + } else { + int i; + hwaddr mcast_list_pa = + VMXNET3_READ_DRV_SHARED64(s->drv_shmem, + devRead.rxFilterConf.mfTablePA); + + cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes); + VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); + for (i = 0; i < s->mcast_list_len; i++) { + VMW_CFPRN("\t" VMXNET_MF, VMXNET_MA(s->mcast_list[i].a)); + } + } +} + +static void vmxnet3_setup_rx_filtering(VMXNET3State *s) +{ + vmxnet3_update_rx_mode(s); + vmxnet3_update_vlan_filters(s); + vmxnet3_update_mcast_filters(s); +} + +static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s) +{ + uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2); + VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode); + return interrupt_mode; +} + +static void vmxnet3_fill_stats(VMXNET3State *s) +{ + int i; + for (i = 0; i < s->txq_num; i++) { + cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa, + &s->txq_descr[i].txq_stats, + sizeof(s->txq_descr[i].txq_stats)); + } + + for (i = 0; i < s->rxq_num; i++) { + cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa, + &s->rxq_descr[i].rxq_stats, + sizeof(s->rxq_descr[i].rxq_stats)); + } +} + +static void vmxnet3_adjust_by_guest_type(VMXNET3State *s) +{ + struct Vmxnet3_GOSInfo gos; + + VMXNET3_READ_DRV_SHARED(s->drv_shmem, devRead.misc.driverInfo.gos, + &gos, sizeof(gos)); + s->rx_packets_compound = + (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true; + + VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound); +} + +static void +vmxnet3_dump_conf_descr(const char *name, + struct Vmxnet3_VariableLenConfDesc *pm_descr) +{ + VMW_CFPRN("%s descriptor dump: Version %u, Length %u", + name, pm_descr->confVer, pm_descr->confLen); + +}; + +static void vmxnet3_update_pm_state(VMXNET3State *s) +{ + struct Vmxnet3_VariableLenConfDesc pm_descr; + + pm_descr.confLen = + VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confLen); + pm_descr.confVer = + VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confVer); + pm_descr.confPA = + VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.pmConfDesc.confPA); + + vmxnet3_dump_conf_descr("PM State", &pm_descr); +} + +static void vmxnet3_update_features(VMXNET3State *s) +{ + uint32_t guest_features; + int rxcso_supported; + + guest_features = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, + devRead.misc.uptFeatures); + + rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM); + s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN); + s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO); + + VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d", + s->lro_supported, rxcso_supported, + s->rx_vlan_stripping); + if (s->peer_has_vhdr) { + qemu_set_offload(qemu_get_queue(s->nic)->peer, + rxcso_supported, + s->lro_supported, + s->lro_supported, + 0, + 0); + } +} + +static bool vmxnet3_verify_intx(VMXNET3State *s, int intx) +{ + return s->msix_used || s->msi_used || (intx == + (pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1)); +} + +static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx) +{ + int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS; + if (idx >= max_ints) { + hw_error("Bad interrupt index: %d\n", idx); + } +} + +static void vmxnet3_validate_interrupts(VMXNET3State *s) +{ + int i; + + VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx); + vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx); + + for (i = 0; i < s->txq_num; i++) { + int idx = s->txq_descr[i].intr_idx; + VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx); + vmxnet3_validate_interrupt_idx(s->msix_used, idx); + } + + for (i = 0; i < s->rxq_num; i++) { + int idx = s->rxq_descr[i].intr_idx; + VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx); + vmxnet3_validate_interrupt_idx(s->msix_used, idx); + } +} + +static void vmxnet3_validate_queues(VMXNET3State *s) +{ + /* + * txq_num and rxq_num are total number of queues + * configured by guest. These numbers must not + * exceed corresponding maximal values. + */ + + if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) { + hw_error("Bad TX queues number: %d\n", s->txq_num); + } + + if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) { + hw_error("Bad RX queues number: %d\n", s->rxq_num); + } +} + +static void vmxnet3_activate_device(VMXNET3State *s) +{ + int i; + static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1; + hwaddr qdescr_table_pa; + uint64_t pa; + uint32_t size; + + /* Verify configuration consistency */ + if (!vmxnet3_verify_driver_magic(s->drv_shmem)) { + VMW_ERPRN("Device configuration received from driver is invalid"); + return; + } + + vmxnet3_adjust_by_guest_type(s); + vmxnet3_update_features(s); + vmxnet3_update_pm_state(s); + vmxnet3_setup_rx_filtering(s); + /* Cache fields from shared memory */ + s->mtu = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.misc.mtu); + VMW_CFPRN("MTU is %u", s->mtu); + + s->max_rx_frags = + VMXNET3_READ_DRV_SHARED16(s->drv_shmem, devRead.misc.maxNumRxSG); + + if (s->max_rx_frags == 0) { + s->max_rx_frags = 1; + } + + VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags); + + s->event_int_idx = + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.eventIntrIdx); + assert(vmxnet3_verify_intx(s, s->event_int_idx)); + VMW_CFPRN("Events interrupt line is %u", s->event_int_idx); + + s->auto_int_masking = + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.autoMask); + VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking); + + s->txq_num = + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numTxQueues); + s->rxq_num = + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numRxQueues); + + VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num); + vmxnet3_validate_queues(s); + + qdescr_table_pa = + VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.misc.queueDescPA); + VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa); + + /* + * Worst-case scenario is a packet that holds all TX rings space so + * we calculate total size of all TX rings for max TX fragments number + */ + s->max_tx_frags = 0; + + /* TX queues */ + for (i = 0; i < s->txq_num; i++) { + hwaddr qdescr_pa = + qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc); + + /* Read interrupt number for this TX queue */ + s->txq_descr[i].intr_idx = + VMXNET3_READ_TX_QUEUE_DESCR8(qdescr_pa, conf.intrIdx); + assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx)); + + VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx); + + /* Read rings memory locations for TX queues */ + pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.txRingBasePA); + size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.txRingSize); + + vmxnet3_ring_init(&s->txq_descr[i].tx_ring, pa, size, + sizeof(struct Vmxnet3_TxDesc), false); + VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring); + + s->max_tx_frags += size; + + /* TXC ring */ + pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.compRingBasePA); + size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.compRingSize); + vmxnet3_ring_init(&s->txq_descr[i].comp_ring, pa, size, + sizeof(struct Vmxnet3_TxCompDesc), true); + VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); + + s->txq_descr[i].tx_stats_pa = + qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats); + + memset(&s->txq_descr[i].txq_stats, 0, + sizeof(s->txq_descr[i].txq_stats)); + + /* Fill device-managed parameters for queues */ + VMXNET3_WRITE_TX_QUEUE_DESCR32(qdescr_pa, + ctrl.txThreshold, + VMXNET3_DEF_TX_THRESHOLD); + } + + /* Preallocate TX packet wrapper */ + VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); + vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); + + /* Read rings memory locations for RX queues */ + for (i = 0; i < s->rxq_num; i++) { + int j; + hwaddr qd_pa = + qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) + + i * sizeof(struct Vmxnet3_RxQueueDesc); + + /* Read interrupt number for this RX queue */ + s->rxq_descr[i].intr_idx = + VMXNET3_READ_TX_QUEUE_DESCR8(qd_pa, conf.intrIdx); + assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx)); + + VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx); + + /* Read rings memory locations */ + for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) { + /* RX rings */ + pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.rxRingBasePA[j]); + size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.rxRingSize[j]); + vmxnet3_ring_init(&s->rxq_descr[i].rx_ring[j], pa, size, + sizeof(struct Vmxnet3_RxDesc), false); + VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", + i, j, pa, size); + } + + /* RXC ring */ + pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.compRingBasePA); + size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.compRingSize); + vmxnet3_ring_init(&s->rxq_descr[i].comp_ring, pa, size, + sizeof(struct Vmxnet3_RxCompDesc), true); + VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); + + s->rxq_descr[i].rx_stats_pa = + qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats); + memset(&s->rxq_descr[i].rxq_stats, 0, + sizeof(s->rxq_descr[i].rxq_stats)); + } + + vmxnet3_validate_interrupts(s); + + /* Make sure everything is in place before device activation */ + smp_wmb(); + + vmxnet3_reset_mac(s); + + s->device_active = true; +} + +static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd) +{ + s->last_command = cmd; + + switch (cmd) { + case VMXNET3_CMD_GET_PERM_MAC_HI: + VMW_CBPRN("Set: Get upper part of permanent MAC"); + break; + + case VMXNET3_CMD_GET_PERM_MAC_LO: + VMW_CBPRN("Set: Get lower part of permanent MAC"); + break; + + case VMXNET3_CMD_GET_STATS: + VMW_CBPRN("Set: Get device statistics"); + vmxnet3_fill_stats(s); + break; + + case VMXNET3_CMD_ACTIVATE_DEV: + VMW_CBPRN("Set: Activating vmxnet3 device"); + vmxnet3_activate_device(s); + break; + + case VMXNET3_CMD_UPDATE_RX_MODE: + VMW_CBPRN("Set: Update rx mode"); + vmxnet3_update_rx_mode(s); + break; + + case VMXNET3_CMD_UPDATE_VLAN_FILTERS: + VMW_CBPRN("Set: Update VLAN filters"); + vmxnet3_update_vlan_filters(s); + break; + + case VMXNET3_CMD_UPDATE_MAC_FILTERS: + VMW_CBPRN("Set: Update MAC filters"); + vmxnet3_update_mcast_filters(s); + break; + + case VMXNET3_CMD_UPDATE_FEATURE: + VMW_CBPRN("Set: Update features"); + vmxnet3_update_features(s); + break; + + case VMXNET3_CMD_UPDATE_PMCFG: + VMW_CBPRN("Set: Update power management config"); + vmxnet3_update_pm_state(s); + break; + + case VMXNET3_CMD_GET_LINK: + VMW_CBPRN("Set: Get link"); + break; + + case VMXNET3_CMD_RESET_DEV: + VMW_CBPRN("Set: Reset device"); + vmxnet3_reset(s); + break; + + case VMXNET3_CMD_QUIESCE_DEV: + VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - pause the device"); + vmxnet3_deactivate_device(s); + break; + + case VMXNET3_CMD_GET_CONF_INTR: + VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration"); + break; + + default: + VMW_CBPRN("Received unknown command: %" PRIx64, cmd); + break; + } +} + +static uint64_t vmxnet3_get_command_status(VMXNET3State *s) +{ + uint64_t ret; + + switch (s->last_command) { + case VMXNET3_CMD_ACTIVATE_DEV: + ret = (s->device_active) ? 0 : -1; + VMW_CFPRN("Device active: %" PRIx64, ret); + break; + + case VMXNET3_CMD_RESET_DEV: + case VMXNET3_CMD_QUIESCE_DEV: + case VMXNET3_CMD_GET_QUEUE_STATUS: + ret = 0; + break; + + case VMXNET3_CMD_GET_LINK: + ret = s->link_status_and_speed; + VMW_CFPRN("Link and speed: %" PRIx64, ret); + break; + + case VMXNET3_CMD_GET_PERM_MAC_LO: + ret = vmxnet3_get_mac_low(&s->perm_mac); + break; + + case VMXNET3_CMD_GET_PERM_MAC_HI: + ret = vmxnet3_get_mac_high(&s->perm_mac); + break; + + case VMXNET3_CMD_GET_CONF_INTR: + ret = vmxnet3_get_interrupt_config(s); + break; + + default: + VMW_WRPRN("Received request for unknown command: %x", s->last_command); + ret = -1; + break; + } + + return ret; +} + +static void vmxnet3_set_events(VMXNET3State *s, uint32_t val) +{ + uint32_t events; + + VMW_CBPRN("Setting events: 0x%x", val); + events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) | val; + VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events); +} + +static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val) +{ + uint32_t events; + + VMW_CBPRN("Clearing events: 0x%x", val); + events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) & ~val; + VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events); +} + +static void +vmxnet3_io_bar1_write(void *opaque, + hwaddr addr, + uint64_t val, + unsigned size) +{ + VMXNET3State *s = opaque; + + switch (addr) { + /* Vmxnet3 Revision Report Selection */ + case VMXNET3_REG_VRRS: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d", + val, size); + break; + + /* UPT Version Report Selection */ + case VMXNET3_REG_UVRS: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d", + val, size); + break; + + /* Driver Shared Address Low */ + case VMXNET3_REG_DSAL: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d", + val, size); + /* + * Guest driver will first write the low part of the shared + * memory address. We save it to temp variable and set the + * shared address only after we get the high part + */ + if (val == 0) { + s->device_active = false; + } + s->temp_shared_guest_driver_memory = val; + s->drv_shmem = 0; + break; + + /* Driver Shared Address High */ + case VMXNET3_REG_DSAH: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d", + val, size); + /* + * Set the shared memory between guest driver and device. + * We already should have low address part. + */ + s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32); + break; + + /* Command */ + case VMXNET3_REG_CMD: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d", + val, size); + vmxnet3_handle_command(s, val); + break; + + /* MAC Address Low */ + case VMXNET3_REG_MACL: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d", + val, size); + s->temp_mac = val; + break; + + /* MAC Address High */ + case VMXNET3_REG_MACH: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d", + val, size); + vmxnet3_set_variable_mac(s, val, s->temp_mac); + break; + + /* Interrupt Cause Register */ + case VMXNET3_REG_ICR: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d", + val, size); + g_assert_not_reached(); + break; + + /* Event Cause Register */ + case VMXNET3_REG_ECR: + VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d", + val, size); + vmxnet3_ack_events(s, val); + break; + + default: + VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d", + addr, val, size); + break; + } +} + +static uint64_t +vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size) +{ + VMXNET3State *s = opaque; + uint64_t ret = 0; + + switch (addr) { + /* Vmxnet3 Revision Report Selection */ + case VMXNET3_REG_VRRS: + VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size); + ret = VMXNET3_DEVICE_REVISION; + break; + + /* UPT Version Report Selection */ + case VMXNET3_REG_UVRS: + VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size); + ret = VMXNET3_DEVICE_VERSION; + break; + + /* Command */ + case VMXNET3_REG_CMD: + VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size); + ret = vmxnet3_get_command_status(s); + break; + + /* MAC Address Low */ + case VMXNET3_REG_MACL: + VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size); + ret = vmxnet3_get_mac_low(&s->conf.macaddr); + break; + + /* MAC Address High */ + case VMXNET3_REG_MACH: + VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size); + ret = vmxnet3_get_mac_high(&s->conf.macaddr); + break; + + /* + * Interrupt Cause Register + * Used for legacy interrupts only so interrupt index always 0 + */ + case VMXNET3_REG_ICR: + VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size); + if (vmxnet3_interrupt_asserted(s, 0)) { + vmxnet3_clear_interrupt(s, 0); + ret = true; + } else { + ret = false; + } + break; + + default: + VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size); + break; + } + + return ret; +} + +static int +vmxnet3_can_receive(NetClientState *nc) +{ + VMXNET3State *s = qemu_get_nic_opaque(nc); + return s->device_active && + VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP); +} + +static inline bool +vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data) +{ + uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK; + if (IS_SPECIAL_VLAN_ID(vlan_tag)) { + return true; + } + + return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag); +} + +static bool +vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac) +{ + int i; + for (i = 0; i < s->mcast_list_len; i++) { + if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) { + return true; + } + } + return false; +} + +static bool +vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data, + size_t size) +{ + struct eth_header *ehdr = PKT_GET_ETH_HDR(data); + + if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) { + return true; + } + + if (!vmxnet3_is_registered_vlan(s, data)) { + return false; + } + + switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) { + case ETH_PKT_UCAST: + if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) { + return false; + } + if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) { + return false; + } + break; + + case ETH_PKT_BCAST: + if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) { + return false; + } + break; + + case ETH_PKT_MCAST: + if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) { + return true; + } + if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) { + return false; + } + if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) { + return false; + } + break; + + default: + g_assert_not_reached(); + } + + return true; +} + +static ssize_t +vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + VMXNET3State *s = qemu_get_nic_opaque(nc); + size_t bytes_indicated; + uint8_t min_buf[MIN_BUF_SIZE]; + + if (!vmxnet3_can_receive(nc)) { + VMW_PKPRN("Cannot receive now"); + return -1; + } + + if (s->peer_has_vhdr) { + vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + memcpy(min_buf, buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + buf = min_buf; + size = sizeof(min_buf); + } + + vmxnet_rx_pkt_set_packet_type(s->rx_pkt, + get_eth_packet_type(PKT_GET_ETH_HDR(buf))); + + if (vmxnet3_rx_filter_may_indicate(s, buf, size)) { + vmxnet_rx_pkt_set_protocols(s->rx_pkt, buf, size); + vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size); + vmxnet_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); + bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1; + if (bytes_indicated < size) { + VMW_PKPRN("RX: %lu of %lu bytes indicated", bytes_indicated, size); + } + } else { + VMW_PKPRN("Packet dropped by RX filter"); + bytes_indicated = size; + } + + assert(size > 0); + assert(bytes_indicated != 0); + return bytes_indicated; +} + +static void vmxnet3_set_link_status(NetClientState *nc) +{ + VMXNET3State *s = qemu_get_nic_opaque(nc); + + if (nc->link_down) { + s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP; + } else { + s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP; + } + + vmxnet3_set_events(s, VMXNET3_ECR_LINK); + vmxnet3_trigger_interrupt(s, s->event_int_idx); +} + +static NetClientInfo net_vmxnet3_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = vmxnet3_can_receive, + .receive = vmxnet3_receive, + .link_status_changed = vmxnet3_set_link_status, +}; + +static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s) +{ + NetClientState *nc = qemu_get_queue(s->nic); + + if (qemu_has_vnet_hdr(nc->peer)) { + return true; + } + + VMW_WRPRN("Peer has no virtio extension. Task offloads will be emulated."); + return false; +} + +static void vmxnet3_net_uninit(VMXNET3State *s) +{ + g_free(s->mcast_list); + vmxnet_tx_pkt_reset(s->tx_pkt); + vmxnet_tx_pkt_uninit(s->tx_pkt); + vmxnet_rx_pkt_uninit(s->rx_pkt); + qemu_del_nic(s->nic); +} + +static void vmxnet3_net_init(VMXNET3State *s) +{ + DeviceState *d = DEVICE(s); + + VMW_CBPRN("vmxnet3_net_init called..."); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + /* Windows guest will query the address that was set on init */ + memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a)); + + s->mcast_list = NULL; + s->mcast_list_len = 0; + + s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP; + + VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a)); + + s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf, + object_get_typename(OBJECT(s)), + d->id, s); + + s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s); + s->tx_sop = true; + s->skip_current_tx_pkt = false; + s->tx_pkt = NULL; + s->rx_pkt = NULL; + s->rx_vlan_stripping = false; + s->lro_supported = false; + + if (s->peer_has_vhdr) { + qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer, + sizeof(struct virtio_net_hdr)); + + qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1); + } + + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); +} + +static void +vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors) +{ + PCIDevice *d = PCI_DEVICE(s); + int i; + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(d, i); + } +} + +static bool +vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors) +{ + PCIDevice *d = PCI_DEVICE(s); + int i; + for (i = 0; i < num_vectors; i++) { + int res = msix_vector_use(d, i); + if (0 > res) { + VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res); + vmxnet3_unuse_msix_vectors(s, i); + return false; + } + } + return true; +} + +static bool +vmxnet3_init_msix(VMXNET3State *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int res = msix_init(d, VMXNET3_MAX_INTRS, + &s->msix_bar, + VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE, + &s->msix_bar, + VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA, + 0); + + if (0 > res) { + VMW_WRPRN("Failed to initialize MSI-X, error %d", res); + s->msix_used = false; + } else { + if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { + VMW_WRPRN("Failed to use MSI-X vectors, error %d", res); + msix_uninit(d, &s->msix_bar, &s->msix_bar); + s->msix_used = false; + } else { + s->msix_used = true; + } + } + return s->msix_used; +} + +static void +vmxnet3_cleanup_msix(VMXNET3State *s) +{ + PCIDevice *d = PCI_DEVICE(s); + + if (s->msix_used) { + vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS); + msix_uninit(d, &s->msix_bar, &s->msix_bar); + } +} + +#define VMXNET3_MSI_OFFSET (0x50) +#define VMXNET3_USE_64BIT (true) +#define VMXNET3_PER_VECTOR_MASK (false) + +static bool +vmxnet3_init_msi(VMXNET3State *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int res; + + res = msi_init(d, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS, + VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK); + if (0 > res) { + VMW_WRPRN("Failed to initialize MSI, error %d", res); + s->msi_used = false; + } else { + s->msi_used = true; + } + + return s->msi_used; +} + +static void +vmxnet3_cleanup_msi(VMXNET3State *s) +{ + PCIDevice *d = PCI_DEVICE(s); + + if (s->msi_used) { + msi_uninit(d); + } +} + +static void +vmxnet3_msix_save(QEMUFile *f, void *opaque) +{ + PCIDevice *d = PCI_DEVICE(opaque); + msix_save(d, f); +} + +static int +vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) +{ + PCIDevice *d = PCI_DEVICE(opaque); + msix_load(d, f); + return 0; +} + +static const MemoryRegionOps b0_ops = { + .read = vmxnet3_io_bar0_read, + .write = vmxnet3_io_bar0_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps b1_ops = { + .read = vmxnet3_io_bar1_read, + .write = vmxnet3_io_bar1_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) +{ + DeviceState *dev = DEVICE(pci_dev); + VMXNET3State *s = VMXNET3(pci_dev); + + VMW_CBPRN("Starting init..."); + + memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s, + "vmxnet3-b0", VMXNET3_PT_REG_SIZE); + pci_register_bar(pci_dev, VMXNET3_BAR0_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0); + + memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s, + "vmxnet3-b1", VMXNET3_VD_REG_SIZE); + pci_register_bar(pci_dev, VMXNET3_BAR1_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1); + + memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar", + VMXNET3_MSIX_BAR_SIZE); + pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar); + + vmxnet3_reset_interrupt_states(s); + + /* Interrupt pin A */ + pci_dev->config[PCI_INTERRUPT_PIN] = 0x01; + + if (!vmxnet3_init_msix(s)) { + VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent."); + } + + if (!vmxnet3_init_msi(s)) { + VMW_WRPRN("Failed to initialize MSI, configuration is inconsistent."); + } + + vmxnet3_net_init(s); + + register_savevm(dev, "vmxnet3-msix", -1, 1, + vmxnet3_msix_save, vmxnet3_msix_load, s); +} + +static void vmxnet3_instance_init(Object *obj) +{ + VMXNET3State *s = VMXNET3(obj); + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static void vmxnet3_pci_uninit(PCIDevice *pci_dev) +{ + DeviceState *dev = DEVICE(pci_dev); + VMXNET3State *s = VMXNET3(pci_dev); + + VMW_CBPRN("Starting uninit..."); + + unregister_savevm(dev, "vmxnet3-msix", s); + + vmxnet3_net_uninit(s); + + vmxnet3_cleanup_msix(s); + + vmxnet3_cleanup_msi(s); +} + +static void vmxnet3_qdev_reset(DeviceState *dev) +{ + PCIDevice *d = PCI_DEVICE(dev); + VMXNET3State *s = VMXNET3(d); + + VMW_CBPRN("Starting QDEV reset..."); + vmxnet3_reset(s); +} + +static bool vmxnet3_mc_list_needed(void *opaque) +{ + return true; +} + +static int vmxnet3_mcast_list_pre_load(void *opaque) +{ + VMXNET3State *s = opaque; + + s->mcast_list = g_malloc(s->mcast_list_buff_size); + + return 0; +} + + +static void vmxnet3_pre_save(void *opaque) +{ + VMXNET3State *s = opaque; + + s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr); +} + +static const VMStateDescription vmxstate_vmxnet3_mcast_list = { + .name = "vmxnet3/mcast_list", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = vmxnet3_mcast_list_pre_load, + .needed = vmxnet3_mc_list_needed, + .fields = (VMStateField[]) { + VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0, + mcast_list_buff_size), + VMSTATE_END_OF_LIST() + } +}; + +static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r) +{ + r->pa = qemu_get_be64(f); + r->size = qemu_get_be32(f); + r->cell_size = qemu_get_be32(f); + r->next = qemu_get_be32(f); + r->gen = qemu_get_byte(f); +} + +static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r) +{ + qemu_put_be64(f, r->pa); + qemu_put_be32(f, r->size); + qemu_put_be32(f, r->cell_size); + qemu_put_be32(f, r->next); + qemu_put_byte(f, r->gen); +} + +static void vmxnet3_get_tx_stats_from_file(QEMUFile *f, + struct UPT1_TxStats *tx_stat) +{ + tx_stat->TSOPktsTxOK = qemu_get_be64(f); + tx_stat->TSOBytesTxOK = qemu_get_be64(f); + tx_stat->ucastPktsTxOK = qemu_get_be64(f); + tx_stat->ucastBytesTxOK = qemu_get_be64(f); + tx_stat->mcastPktsTxOK = qemu_get_be64(f); + tx_stat->mcastBytesTxOK = qemu_get_be64(f); + tx_stat->bcastPktsTxOK = qemu_get_be64(f); + tx_stat->bcastBytesTxOK = qemu_get_be64(f); + tx_stat->pktsTxError = qemu_get_be64(f); + tx_stat->pktsTxDiscard = qemu_get_be64(f); +} + +static void vmxnet3_put_tx_stats_to_file(QEMUFile *f, + struct UPT1_TxStats *tx_stat) +{ + qemu_put_be64(f, tx_stat->TSOPktsTxOK); + qemu_put_be64(f, tx_stat->TSOBytesTxOK); + qemu_put_be64(f, tx_stat->ucastPktsTxOK); + qemu_put_be64(f, tx_stat->ucastBytesTxOK); + qemu_put_be64(f, tx_stat->mcastPktsTxOK); + qemu_put_be64(f, tx_stat->mcastBytesTxOK); + qemu_put_be64(f, tx_stat->bcastPktsTxOK); + qemu_put_be64(f, tx_stat->bcastBytesTxOK); + qemu_put_be64(f, tx_stat->pktsTxError); + qemu_put_be64(f, tx_stat->pktsTxDiscard); +} + +static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size) +{ + Vmxnet3TxqDescr *r = pv; + + vmxnet3_get_ring_from_file(f, &r->tx_ring); + vmxnet3_get_ring_from_file(f, &r->comp_ring); + r->intr_idx = qemu_get_byte(f); + r->tx_stats_pa = qemu_get_be64(f); + + vmxnet3_get_tx_stats_from_file(f, &r->txq_stats); + + return 0; +} + +static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size) +{ + Vmxnet3TxqDescr *r = pv; + + vmxnet3_put_ring_to_file(f, &r->tx_ring); + vmxnet3_put_ring_to_file(f, &r->comp_ring); + qemu_put_byte(f, r->intr_idx); + qemu_put_be64(f, r->tx_stats_pa); + vmxnet3_put_tx_stats_to_file(f, &r->txq_stats); +} + +static const VMStateInfo txq_descr_info = { + .name = "txq_descr", + .get = vmxnet3_get_txq_descr, + .put = vmxnet3_put_txq_descr +}; + +static void vmxnet3_get_rx_stats_from_file(QEMUFile *f, + struct UPT1_RxStats *rx_stat) +{ + rx_stat->LROPktsRxOK = qemu_get_be64(f); + rx_stat->LROBytesRxOK = qemu_get_be64(f); + rx_stat->ucastPktsRxOK = qemu_get_be64(f); + rx_stat->ucastBytesRxOK = qemu_get_be64(f); + rx_stat->mcastPktsRxOK = qemu_get_be64(f); + rx_stat->mcastBytesRxOK = qemu_get_be64(f); + rx_stat->bcastPktsRxOK = qemu_get_be64(f); + rx_stat->bcastBytesRxOK = qemu_get_be64(f); + rx_stat->pktsRxOutOfBuf = qemu_get_be64(f); + rx_stat->pktsRxError = qemu_get_be64(f); +} + +static void vmxnet3_put_rx_stats_to_file(QEMUFile *f, + struct UPT1_RxStats *rx_stat) +{ + qemu_put_be64(f, rx_stat->LROPktsRxOK); + qemu_put_be64(f, rx_stat->LROBytesRxOK); + qemu_put_be64(f, rx_stat->ucastPktsRxOK); + qemu_put_be64(f, rx_stat->ucastBytesRxOK); + qemu_put_be64(f, rx_stat->mcastPktsRxOK); + qemu_put_be64(f, rx_stat->mcastBytesRxOK); + qemu_put_be64(f, rx_stat->bcastPktsRxOK); + qemu_put_be64(f, rx_stat->bcastBytesRxOK); + qemu_put_be64(f, rx_stat->pktsRxOutOfBuf); + qemu_put_be64(f, rx_stat->pktsRxError); +} + +static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size) +{ + Vmxnet3RxqDescr *r = pv; + int i; + + for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) { + vmxnet3_get_ring_from_file(f, &r->rx_ring[i]); + } + + vmxnet3_get_ring_from_file(f, &r->comp_ring); + r->intr_idx = qemu_get_byte(f); + r->rx_stats_pa = qemu_get_be64(f); + + vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats); + + return 0; +} + +static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size) +{ + Vmxnet3RxqDescr *r = pv; + int i; + + for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) { + vmxnet3_put_ring_to_file(f, &r->rx_ring[i]); + } + + vmxnet3_put_ring_to_file(f, &r->comp_ring); + qemu_put_byte(f, r->intr_idx); + qemu_put_be64(f, r->rx_stats_pa); + vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats); +} + +static int vmxnet3_post_load(void *opaque, int version_id) +{ + VMXNET3State *s = opaque; + PCIDevice *d = PCI_DEVICE(s); + + vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); + + if (s->msix_used) { + if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { + VMW_WRPRN("Failed to re-use MSI-X vectors"); + msix_uninit(d, &s->msix_bar, &s->msix_bar); + s->msix_used = false; + return -1; + } + } + + vmxnet3_validate_queues(s); + vmxnet3_validate_interrupts(s); + + return 0; +} + +static const VMStateInfo rxq_descr_info = { + .name = "rxq_descr", + .get = vmxnet3_get_rxq_descr, + .put = vmxnet3_put_rxq_descr +}; + +static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size) +{ + Vmxnet3IntState *r = pv; + + r->is_masked = qemu_get_byte(f); + r->is_pending = qemu_get_byte(f); + r->is_asserted = qemu_get_byte(f); + + return 0; +} + +static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size) +{ + Vmxnet3IntState *r = pv; + + qemu_put_byte(f, r->is_masked); + qemu_put_byte(f, r->is_pending); + qemu_put_byte(f, r->is_asserted); +} + +static const VMStateInfo int_state_info = { + .name = "int_state", + .get = vmxnet3_get_int_state, + .put = vmxnet3_put_int_state +}; + +static const VMStateDescription vmstate_vmxnet3 = { + .name = "vmxnet3", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = vmxnet3_pre_save, + .post_load = vmxnet3_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), + VMSTATE_BOOL(rx_packets_compound, VMXNET3State), + VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), + VMSTATE_BOOL(lro_supported, VMXNET3State), + VMSTATE_UINT32(rx_mode, VMXNET3State), + VMSTATE_UINT32(mcast_list_len, VMXNET3State), + VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State), + VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE), + VMSTATE_UINT32(mtu, VMXNET3State), + VMSTATE_UINT16(max_rx_frags, VMXNET3State), + VMSTATE_UINT32(max_tx_frags, VMXNET3State), + VMSTATE_UINT8(event_int_idx, VMXNET3State), + VMSTATE_BOOL(auto_int_masking, VMXNET3State), + VMSTATE_UINT8(txq_num, VMXNET3State), + VMSTATE_UINT8(rxq_num, VMXNET3State), + VMSTATE_UINT32(device_active, VMXNET3State), + VMSTATE_UINT32(last_command, VMXNET3State), + VMSTATE_UINT32(link_status_and_speed, VMXNET3State), + VMSTATE_UINT32(temp_mac, VMXNET3State), + VMSTATE_UINT64(drv_shmem, VMXNET3State), + VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State), + + VMSTATE_ARRAY(txq_descr, VMXNET3State, + VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info, + Vmxnet3TxqDescr), + VMSTATE_ARRAY(rxq_descr, VMXNET3State, + VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info, + Vmxnet3RxqDescr), + VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS, + 0, int_state_info, Vmxnet3IntState), + + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmxstate_vmxnet3_mcast_list, + NULL + } +}; + +static Property vmxnet3_properties[] = { + DEFINE_NIC_PROPERTIES(VMXNET3State, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vmxnet3_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIDeviceClass *c = PCI_DEVICE_CLASS(class); + + c->realize = vmxnet3_pci_realize; + c->exit = vmxnet3_pci_uninit; + c->vendor_id = PCI_VENDOR_ID_VMWARE; + c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3; + c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION; + c->class_id = PCI_CLASS_NETWORK_ETHERNET; + c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; + c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; + dc->desc = "VMWare Paravirtualized Ethernet v3"; + dc->reset = vmxnet3_qdev_reset; + dc->vmsd = &vmstate_vmxnet3; + dc->props = vmxnet3_properties; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static const TypeInfo vmxnet3_info = { + .name = TYPE_VMXNET3, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(VMXNET3State), + .class_init = vmxnet3_class_init, + .instance_init = vmxnet3_instance_init, +}; + +static void vmxnet3_register_types(void) +{ + VMW_CBPRN("vmxnet3_register_types called..."); + type_register_static(&vmxnet3_info); +} + +type_init(vmxnet3_register_types) diff --git a/qemu/hw/net/vmxnet3.h b/qemu/hw/net/vmxnet3.h new file mode 100644 index 000000000..f987d7126 --- /dev/null +++ b/qemu/hw/net/vmxnet3.h @@ -0,0 +1,755 @@ +/* + * QEMU VMWARE VMXNET3 paravirtual NIC interface definitions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_VMXNET3_H +#define _QEMU_VMXNET3_H + +#define VMXNET3_DEVICE_MAX_TX_QUEUES 8 +#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power of 2 */ + +/* + * VMWARE headers we got from Linux kernel do not fully comply QEMU coding + * standards in sense of types and defines used. + * Since we didn't want to change VMWARE code, following set of typedefs + * and defines needed to compile these headers with QEMU introduced. + */ +#define u64 uint64_t +#define u32 uint32_t +#define u16 uint16_t +#define u8 uint8_t +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +#if defined(HOST_WORDS_BIGENDIAN) +#define __BIG_ENDIAN_BITFIELD +#else +#endif + +/* + * Following is an interface definition for + * VMXNET3 device as provided by VMWARE + * See original copyright from Linux kernel v3.2.8 + * header file drivers/net/vmxnet3/vmxnet3_defs.h below. + */ + +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * + */ + +struct UPT1_TxStats { + u64 TSOPktsTxOK; /* TSO pkts post-segmentation */ + u64 TSOBytesTxOK; + u64 ucastPktsTxOK; + u64 ucastBytesTxOK; + u64 mcastPktsTxOK; + u64 mcastBytesTxOK; + u64 bcastPktsTxOK; + u64 bcastBytesTxOK; + u64 pktsTxError; + u64 pktsTxDiscard; +}; + +struct UPT1_RxStats { + u64 LROPktsRxOK; /* LRO pkts */ + u64 LROBytesRxOK; /* bytes from LRO pkts */ + /* the following counters are for pkts from the wire, i.e., pre-LRO */ + u64 ucastPktsRxOK; + u64 ucastBytesRxOK; + u64 mcastPktsRxOK; + u64 mcastBytesRxOK; + u64 bcastPktsRxOK; + u64 bcastBytesRxOK; + u64 pktsRxOutOfBuf; + u64 pktsRxError; +}; + +/* interrupt moderation level */ +enum { + UPT1_IML_NONE = 0, /* no interrupt moderation */ + UPT1_IML_HIGHEST = 7, /* least intr generated */ + UPT1_IML_ADAPTIVE = 8, /* adpative intr moderation */ +}; +/* values for UPT1_RSSConf.hashFunc */ +enum { + UPT1_RSS_HASH_TYPE_NONE = 0x0, + UPT1_RSS_HASH_TYPE_IPV4 = 0x01, + UPT1_RSS_HASH_TYPE_TCP_IPV4 = 0x02, + UPT1_RSS_HASH_TYPE_IPV6 = 0x04, + UPT1_RSS_HASH_TYPE_TCP_IPV6 = 0x08, +}; + +enum { + UPT1_RSS_HASH_FUNC_NONE = 0x0, + UPT1_RSS_HASH_FUNC_TOEPLITZ = 0x01, +}; + +#define UPT1_RSS_MAX_KEY_SIZE 40 +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128 + +struct UPT1_RSSConf { + u16 hashType; + u16 hashFunc; + u16 hashKeySize; + u16 indTableSize; + u8 hashKey[UPT1_RSS_MAX_KEY_SIZE]; + u8 indTable[UPT1_RSS_MAX_IND_TABLE_SIZE]; +}; + +/* features */ +enum { + UPT1_F_RXCSUM = 0x0001, /* rx csum verification */ + UPT1_F_RSS = 0x0002, + UPT1_F_RXVLAN = 0x0004, /* VLAN tag stripping */ + UPT1_F_LRO = 0x0008, +}; + +/* all registers are 32 bit wide */ +/* BAR 1 */ +enum { + VMXNET3_REG_VRRS = 0x0, /* Vmxnet3 Revision Report Selection */ + VMXNET3_REG_UVRS = 0x8, /* UPT Version Report Selection */ + VMXNET3_REG_DSAL = 0x10, /* Driver Shared Address Low */ + VMXNET3_REG_DSAH = 0x18, /* Driver Shared Address High */ + VMXNET3_REG_CMD = 0x20, /* Command */ + VMXNET3_REG_MACL = 0x28, /* MAC Address Low */ + VMXNET3_REG_MACH = 0x30, /* MAC Address High */ + VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */ + VMXNET3_REG_ECR = 0x40 /* Event Cause Register */ +}; + +/* BAR 0 */ +enum { + VMXNET3_REG_IMR = 0x0, /* Interrupt Mask Register */ + VMXNET3_REG_TXPROD = 0x600, /* Tx Producer Index */ + VMXNET3_REG_RXPROD = 0x800, /* Rx Producer Index for ring 1 */ + VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */ +}; + +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ + +#define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */ +#define VMXNET3_REG_ALIGN_MASK 0x7 + +/* I/O Mapped access to registers */ +#define VMXNET3_IO_TYPE_PT 0 +#define VMXNET3_IO_TYPE_VD 1 +#define VMXNET3_IO_ADDR(type, reg) (((type) << 24) | ((reg) & 0xFFFFFF)) +#define VMXNET3_IO_TYPE(addr) ((addr) >> 24) +#define VMXNET3_IO_REG(addr) ((addr) & 0xFFFFFF) + +enum { + VMXNET3_CMD_FIRST_SET = 0xCAFE0000, + VMXNET3_CMD_ACTIVATE_DEV = VMXNET3_CMD_FIRST_SET, /* 0xCAFE0000 */ + VMXNET3_CMD_QUIESCE_DEV, /* 0xCAFE0001 */ + VMXNET3_CMD_RESET_DEV, /* 0xCAFE0002 */ + VMXNET3_CMD_UPDATE_RX_MODE, /* 0xCAFE0003 */ + VMXNET3_CMD_UPDATE_MAC_FILTERS, /* 0xCAFE0004 */ + VMXNET3_CMD_UPDATE_VLAN_FILTERS, /* 0xCAFE0005 */ + VMXNET3_CMD_UPDATE_RSSIDT, /* 0xCAFE0006 */ + VMXNET3_CMD_UPDATE_IML, /* 0xCAFE0007 */ + VMXNET3_CMD_UPDATE_PMCFG, /* 0xCAFE0008 */ + VMXNET3_CMD_UPDATE_FEATURE, /* 0xCAFE0009 */ + VMXNET3_CMD_LOAD_PLUGIN, /* 0xCAFE000A */ + + VMXNET3_CMD_FIRST_GET = 0xF00D0000, + VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, /* 0xF00D0000 */ + VMXNET3_CMD_GET_STATS, /* 0xF00D0001 */ + VMXNET3_CMD_GET_LINK, /* 0xF00D0002 */ + VMXNET3_CMD_GET_PERM_MAC_LO, /* 0xF00D0003 */ + VMXNET3_CMD_GET_PERM_MAC_HI, /* 0xF00D0004 */ + VMXNET3_CMD_GET_DID_LO, /* 0xF00D0005 */ + VMXNET3_CMD_GET_DID_HI, /* 0xF00D0006 */ + VMXNET3_CMD_GET_DEV_EXTRA_INFO, /* 0xF00D0007 */ + VMXNET3_CMD_GET_CONF_INTR /* 0xF00D0008 */ +}; + +/* + * Little Endian layout of bitfields - + * Byte 0 : 7.....len.....0 + * Byte 1 : rsvd gen 13.len.8 + * Byte 2 : 5.msscof.0 ext1 dtype + * Byte 3 : 13...msscof...6 + * + * Big Endian layout of bitfields - + * Byte 0: 13...msscof...6 + * Byte 1 : 5.msscof.0 ext1 dtype + * Byte 2 : rsvd gen 13.len.8 + * Byte 3 : 7.....len.....0 + * + * Thus, le32_to_cpu on the dword will allow the big endian driver to read + * the bit fields correctly. And cpu_to_le32 will convert bitfields + * bit fields written by big endian driver to format required by device. + */ + +struct Vmxnet3_TxDesc { + __le64 addr; + +#ifdef __BIG_ENDIAN_BITFIELD + u32 msscof:14; /* MSS, checksum offset, flags */ + u32 ext1:1; + u32 dtype:1; /* descriptor type */ + u32 rsvd:1; + u32 gen:1; /* generation bit */ + u32 len:14; +#else + u32 len:14; + u32 gen:1; /* generation bit */ + u32 rsvd:1; + u32 dtype:1; /* descriptor type */ + u32 ext1:1; + u32 msscof:14; /* MSS, checksum offset, flags */ +#endif /* __BIG_ENDIAN_BITFIELD */ + +#ifdef __BIG_ENDIAN_BITFIELD + u32 tci:16; /* Tag to Insert */ + u32 ti:1; /* VLAN Tag Insertion */ + u32 ext2:1; + u32 cq:1; /* completion request */ + u32 eop:1; /* End Of Packet */ + u32 om:2; /* offload mode */ + u32 hlen:10; /* header len */ +#else + u32 hlen:10; /* header len */ + u32 om:2; /* offload mode */ + u32 eop:1; /* End Of Packet */ + u32 cq:1; /* completion request */ + u32 ext2:1; + u32 ti:1; /* VLAN Tag Insertion */ + u32 tci:16; /* Tag to Insert */ +#endif /* __BIG_ENDIAN_BITFIELD */ +}; + +/* TxDesc.OM values */ +#define VMXNET3_OM_NONE 0 +#define VMXNET3_OM_CSUM 2 +#define VMXNET3_OM_TSO 3 + +/* fields in TxDesc we access w/o using bit fields */ +#define VMXNET3_TXD_EOP_SHIFT 12 +#define VMXNET3_TXD_CQ_SHIFT 13 +#define VMXNET3_TXD_GEN_SHIFT 14 +#define VMXNET3_TXD_EOP_DWORD_SHIFT 3 +#define VMXNET3_TXD_GEN_DWORD_SHIFT 2 + +#define VMXNET3_TXD_CQ (1 << VMXNET3_TXD_CQ_SHIFT) +#define VMXNET3_TXD_EOP (1 << VMXNET3_TXD_EOP_SHIFT) +#define VMXNET3_TXD_GEN (1 << VMXNET3_TXD_GEN_SHIFT) + +#define VMXNET3_HDR_COPY_SIZE 128 + + +struct Vmxnet3_TxDataDesc { + u8 data[VMXNET3_HDR_COPY_SIZE]; +}; + +#define VMXNET3_TCD_GEN_SHIFT 31 +#define VMXNET3_TCD_GEN_SIZE 1 +#define VMXNET3_TCD_TXIDX_SHIFT 0 +#define VMXNET3_TCD_TXIDX_SIZE 12 +#define VMXNET3_TCD_GEN_DWORD_SHIFT 3 + +struct Vmxnet3_TxCompDesc { + u32 txdIdx:12; /* Index of the EOP TxDesc */ + u32 ext1:20; + + __le32 ext2; + __le32 ext3; + + u32 rsvd:24; + u32 type:7; /* completion type */ + u32 gen:1; /* generation bit */ +}; + +struct Vmxnet3_RxDesc { + __le64 addr; + +#ifdef __BIG_ENDIAN_BITFIELD + u32 gen:1; /* Generation bit */ + u32 rsvd:15; + u32 dtype:1; /* Descriptor type */ + u32 btype:1; /* Buffer Type */ + u32 len:14; +#else + u32 len:14; + u32 btype:1; /* Buffer Type */ + u32 dtype:1; /* Descriptor type */ + u32 rsvd:15; + u32 gen:1; /* Generation bit */ +#endif + u32 ext1; +}; + +/* values of RXD.BTYPE */ +#define VMXNET3_RXD_BTYPE_HEAD 0 /* head only */ +#define VMXNET3_RXD_BTYPE_BODY 1 /* body only */ + +/* fields in RxDesc we access w/o using bit fields */ +#define VMXNET3_RXD_BTYPE_SHIFT 14 +#define VMXNET3_RXD_GEN_SHIFT 31 + +struct Vmxnet3_RxCompDesc { +#ifdef __BIG_ENDIAN_BITFIELD + u32 ext2:1; + u32 cnc:1; /* Checksum Not Calculated */ + u32 rssType:4; /* RSS hash type used */ + u32 rqID:10; /* rx queue/ring ID */ + u32 sop:1; /* Start of Packet */ + u32 eop:1; /* End of Packet */ + u32 ext1:2; + u32 rxdIdx:12; /* Index of the RxDesc */ +#else + u32 rxdIdx:12; /* Index of the RxDesc */ + u32 ext1:2; + u32 eop:1; /* End of Packet */ + u32 sop:1; /* Start of Packet */ + u32 rqID:10; /* rx queue/ring ID */ + u32 rssType:4; /* RSS hash type used */ + u32 cnc:1; /* Checksum Not Calculated */ + u32 ext2:1; +#endif /* __BIG_ENDIAN_BITFIELD */ + + __le32 rssHash; /* RSS hash value */ + +#ifdef __BIG_ENDIAN_BITFIELD + u32 tci:16; /* Tag stripped */ + u32 ts:1; /* Tag is stripped */ + u32 err:1; /* Error */ + u32 len:14; /* data length */ +#else + u32 len:14; /* data length */ + u32 err:1; /* Error */ + u32 ts:1; /* Tag is stripped */ + u32 tci:16; /* Tag stripped */ +#endif /* __BIG_ENDIAN_BITFIELD */ + + +#ifdef __BIG_ENDIAN_BITFIELD + u32 gen:1; /* generation bit */ + u32 type:7; /* completion type */ + u32 fcs:1; /* Frame CRC correct */ + u32 frg:1; /* IP Fragment */ + u32 v4:1; /* IPv4 */ + u32 v6:1; /* IPv6 */ + u32 ipc:1; /* IP Checksum Correct */ + u32 tcp:1; /* TCP packet */ + u32 udp:1; /* UDP packet */ + u32 tuc:1; /* TCP/UDP Checksum Correct */ + u32 csum:16; +#else + u32 csum:16; + u32 tuc:1; /* TCP/UDP Checksum Correct */ + u32 udp:1; /* UDP packet */ + u32 tcp:1; /* TCP packet */ + u32 ipc:1; /* IP Checksum Correct */ + u32 v6:1; /* IPv6 */ + u32 v4:1; /* IPv4 */ + u32 frg:1; /* IP Fragment */ + u32 fcs:1; /* Frame CRC correct */ + u32 type:7; /* completion type */ + u32 gen:1; /* generation bit */ +#endif /* __BIG_ENDIAN_BITFIELD */ +}; + +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.dword[3] */ +#define VMXNET3_RCD_TUC_SHIFT 16 +#define VMXNET3_RCD_IPC_SHIFT 19 + +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.qword[1] */ +#define VMXNET3_RCD_TYPE_SHIFT 56 +#define VMXNET3_RCD_GEN_SHIFT 63 + +/* csum OK for TCP/UDP pkts over IP */ +#define VMXNET3_RCD_CSUM_OK (1 << VMXNET3_RCD_TUC_SHIFT | \ + 1 << VMXNET3_RCD_IPC_SHIFT) +#define VMXNET3_TXD_GEN_SIZE 1 +#define VMXNET3_TXD_EOP_SIZE 1 + +/* value of RxCompDesc.rssType */ +enum { + VMXNET3_RCD_RSS_TYPE_NONE = 0, + VMXNET3_RCD_RSS_TYPE_IPV4 = 1, + VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2, + VMXNET3_RCD_RSS_TYPE_IPV6 = 3, + VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4, +}; + + +/* a union for accessing all cmd/completion descriptors */ +union Vmxnet3_GenericDesc { + __le64 qword[2]; + __le32 dword[4]; + __le16 word[8]; + struct Vmxnet3_TxDesc txd; + struct Vmxnet3_RxDesc rxd; + struct Vmxnet3_TxCompDesc tcd; + struct Vmxnet3_RxCompDesc rcd; +}; + +#define VMXNET3_INIT_GEN 1 + +/* Max size of a single tx buffer */ +#define VMXNET3_MAX_TX_BUF_SIZE (1 << 14) + +/* # of tx desc needed for a tx buffer size */ +#define VMXNET3_TXD_NEEDED(size) (((size) + VMXNET3_MAX_TX_BUF_SIZE - 1) / \ + VMXNET3_MAX_TX_BUF_SIZE) + +/* max # of tx descs for a non-tso pkt */ +#define VMXNET3_MAX_TXD_PER_PKT 16 + +/* Max size of a single rx buffer */ +#define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1) +/* Minimum size of a type 0 buffer */ +#define VMXNET3_MIN_T0_BUF_SIZE 128 +#define VMXNET3_MAX_CSUM_OFFSET 1024 + +/* Ring base address alignment */ +#define VMXNET3_RING_BA_ALIGN 512 +#define VMXNET3_RING_BA_MASK (VMXNET3_RING_BA_ALIGN - 1) + +/* Ring size must be a multiple of 32 */ +#define VMXNET3_RING_SIZE_ALIGN 32 +#define VMXNET3_RING_SIZE_MASK (VMXNET3_RING_SIZE_ALIGN - 1) + +/* Max ring size */ +#define VMXNET3_TX_RING_MAX_SIZE 4096 +#define VMXNET3_TC_RING_MAX_SIZE 4096 +#define VMXNET3_RX_RING_MAX_SIZE 4096 +#define VMXNET3_RC_RING_MAX_SIZE 8192 + +/* a list of reasons for queue stop */ + +enum { + VMXNET3_ERR_NOEOP = 0x80000000, /* cannot find the EOP desc of a pkt */ + VMXNET3_ERR_TXD_REUSE = 0x80000001, /* reuse TxDesc before tx completion */ + VMXNET3_ERR_BIG_PKT = 0x80000002, /* too many TxDesc for a pkt */ + VMXNET3_ERR_DESC_NOT_SPT = 0x80000003, /* descriptor type not supported */ + VMXNET3_ERR_SMALL_BUF = 0x80000004, /* type 0 buffer too small */ + VMXNET3_ERR_STRESS = 0x80000005, /* stress option firing in vmkernel */ + VMXNET3_ERR_SWITCH = 0x80000006, /* mode switch failure */ + VMXNET3_ERR_TXD_INVALID = 0x80000007, /* invalid TxDesc */ +}; + +/* completion descriptor types */ +#define VMXNET3_CDTYPE_TXCOMP 0 /* Tx Completion Descriptor */ +#define VMXNET3_CDTYPE_RXCOMP 3 /* Rx Completion Descriptor */ + +enum { + VMXNET3_GOS_BITS_UNK = 0, /* unknown */ + VMXNET3_GOS_BITS_32 = 1, + VMXNET3_GOS_BITS_64 = 2, +}; + +#define VMXNET3_GOS_TYPE_UNK 0 /* unknown */ +#define VMXNET3_GOS_TYPE_LINUX 1 +#define VMXNET3_GOS_TYPE_WIN 2 +#define VMXNET3_GOS_TYPE_SOLARIS 3 +#define VMXNET3_GOS_TYPE_FREEBSD 4 +#define VMXNET3_GOS_TYPE_PXE 5 + +struct Vmxnet3_GOSInfo { +#ifdef __BIG_ENDIAN_BITFIELD + u32 gosMisc:10; /* other info about gos */ + u32 gosVer:16; /* gos version */ + u32 gosType:4; /* which guest */ + u32 gosBits:2; /* 32-bit or 64-bit? */ +#else + u32 gosBits:2; /* 32-bit or 64-bit? */ + u32 gosType:4; /* which guest */ + u32 gosVer:16; /* gos version */ + u32 gosMisc:10; /* other info about gos */ +#endif /* __BIG_ENDIAN_BITFIELD */ +}; + +struct Vmxnet3_DriverInfo { + __le32 version; + struct Vmxnet3_GOSInfo gos; + __le32 vmxnet3RevSpt; + __le32 uptVerSpt; +}; + + +#define VMXNET3_REV1_MAGIC 0xbabefee1 + +/* + * QueueDescPA must be 128 bytes aligned. It points to an array of + * Vmxnet3_TxQueueDesc followed by an array of Vmxnet3_RxQueueDesc. + * The number of Vmxnet3_TxQueueDesc/Vmxnet3_RxQueueDesc are specified by + * Vmxnet3_MiscConf.numTxQueues/numRxQueues, respectively. + */ +#define VMXNET3_QUEUE_DESC_ALIGN 128 + + +struct Vmxnet3_MiscConf { + struct Vmxnet3_DriverInfo driverInfo; + __le64 uptFeatures; + __le64 ddPA; /* driver data PA */ + __le64 queueDescPA; /* queue descriptor table PA */ + __le32 ddLen; /* driver data len */ + __le32 queueDescLen; /* queue desc. table len in bytes */ + __le32 mtu; + __le16 maxNumRxSG; + u8 numTxQueues; + u8 numRxQueues; + __le32 reserved[4]; +}; + + +struct Vmxnet3_TxQueueConf { + __le64 txRingBasePA; + __le64 dataRingBasePA; + __le64 compRingBasePA; + __le64 ddPA; /* driver data */ + __le64 reserved; + __le32 txRingSize; /* # of tx desc */ + __le32 dataRingSize; /* # of data desc */ + __le32 compRingSize; /* # of comp desc */ + __le32 ddLen; /* size of driver data */ + u8 intrIdx; + u8 _pad[7]; +}; + + +struct Vmxnet3_RxQueueConf { + __le64 rxRingBasePA[2]; + __le64 compRingBasePA; + __le64 ddPA; /* driver data */ + __le64 reserved; + __le32 rxRingSize[2]; /* # of rx desc */ + __le32 compRingSize; /* # of rx comp desc */ + __le32 ddLen; /* size of driver data */ + u8 intrIdx; + u8 _pad[7]; +}; + + +enum vmxnet3_intr_mask_mode { + VMXNET3_IMM_AUTO = 0, + VMXNET3_IMM_ACTIVE = 1, + VMXNET3_IMM_LAZY = 2 +}; + +enum vmxnet3_intr_type { + VMXNET3_IT_AUTO = 0, + VMXNET3_IT_INTX = 1, + VMXNET3_IT_MSI = 2, + VMXNET3_IT_MSIX = 3 +}; + +#define VMXNET3_MAX_TX_QUEUES 8 +#define VMXNET3_MAX_RX_QUEUES 16 +/* addition 1 for events */ +#define VMXNET3_MAX_INTRS 25 + +/* value of intrCtrl */ +#define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */ + + +struct Vmxnet3_IntrConf { + bool autoMask; + u8 numIntrs; /* # of interrupts */ + u8 eventIntrIdx; + u8 modLevels[VMXNET3_MAX_INTRS]; /* moderation level for + * each intr */ + __le32 intrCtrl; + __le32 reserved[2]; +}; + +/* one bit per VLAN ID, the size is in the units of u32 */ +#define VMXNET3_VFT_SIZE (4096/(sizeof(uint32_t)*8)) + + +struct Vmxnet3_QueueStatus { + bool stopped; + u8 _pad[3]; + __le32 error; +}; + + +struct Vmxnet3_TxQueueCtrl { + __le32 txNumDeferred; + __le32 txThreshold; + __le64 reserved; +}; + + +struct Vmxnet3_RxQueueCtrl { + bool updateRxProd; + u8 _pad[7]; + __le64 reserved; +}; + +enum { + VMXNET3_RXM_UCAST = 0x01, /* unicast only */ + VMXNET3_RXM_MCAST = 0x02, /* multicast passing the filters */ + VMXNET3_RXM_BCAST = 0x04, /* broadcast only */ + VMXNET3_RXM_ALL_MULTI = 0x08, /* all multicast */ + VMXNET3_RXM_PROMISC = 0x10 /* promiscuous */ +}; + +struct Vmxnet3_RxFilterConf { + __le32 rxMode; /* VMXNET3_RXM_xxx */ + __le16 mfTableLen; /* size of the multicast filter table */ + __le16 _pad1; + __le64 mfTablePA; /* PA of the multicast filters table */ + __le32 vfTable[VMXNET3_VFT_SIZE]; /* vlan filter */ +}; + + +#define VMXNET3_PM_MAX_FILTERS 6 +#define VMXNET3_PM_MAX_PATTERN_SIZE 128 +#define VMXNET3_PM_MAX_MASK_SIZE (VMXNET3_PM_MAX_PATTERN_SIZE / 8) + +#define VMXNET3_PM_WAKEUP_MAGIC cpu_to_le16(0x01) /* wake up on magic pkts */ +#define VMXNET3_PM_WAKEUP_FILTER cpu_to_le16(0x02) /* wake up on pkts matching + * filters */ + + +struct Vmxnet3_PM_PktFilter { + u8 maskSize; + u8 patternSize; + u8 mask[VMXNET3_PM_MAX_MASK_SIZE]; + u8 pattern[VMXNET3_PM_MAX_PATTERN_SIZE]; + u8 pad[6]; +}; + + +struct Vmxnet3_PMConf { + __le16 wakeUpEvents; /* VMXNET3_PM_WAKEUP_xxx */ + u8 numFilters; + u8 pad[5]; + struct Vmxnet3_PM_PktFilter filters[VMXNET3_PM_MAX_FILTERS]; +}; + + +struct Vmxnet3_VariableLenConfDesc { + __le32 confVer; + __le32 confLen; + __le64 confPA; +}; + + +struct Vmxnet3_TxQueueDesc { + struct Vmxnet3_TxQueueCtrl ctrl; + struct Vmxnet3_TxQueueConf conf; + + /* Driver read after a GET command */ + struct Vmxnet3_QueueStatus status; + struct UPT1_TxStats stats; + u8 _pad[88]; /* 128 aligned */ +}; + + +struct Vmxnet3_RxQueueDesc { + struct Vmxnet3_RxQueueCtrl ctrl; + struct Vmxnet3_RxQueueConf conf; + /* Driver read after a GET commad */ + struct Vmxnet3_QueueStatus status; + struct UPT1_RxStats stats; + u8 __pad[88]; /* 128 aligned */ +}; + + +struct Vmxnet3_DSDevRead { + /* read-only region for device, read by dev in response to a SET cmd */ + struct Vmxnet3_MiscConf misc; + struct Vmxnet3_IntrConf intrConf; + struct Vmxnet3_RxFilterConf rxFilterConf; + struct Vmxnet3_VariableLenConfDesc rssConfDesc; + struct Vmxnet3_VariableLenConfDesc pmConfDesc; + struct Vmxnet3_VariableLenConfDesc pluginConfDesc; +}; + +/* All structures in DriverShared are padded to multiples of 8 bytes */ +struct Vmxnet3_DriverShared { + __le32 magic; + /* make devRead start at 64bit boundaries */ + __le32 pad; + struct Vmxnet3_DSDevRead devRead; + __le32 ecr; + __le32 reserved[5]; +}; + + +#define VMXNET3_ECR_RQERR (1 << 0) +#define VMXNET3_ECR_TQERR (1 << 1) +#define VMXNET3_ECR_LINK (1 << 2) +#define VMXNET3_ECR_DIC (1 << 3) +#define VMXNET3_ECR_DEBUG (1 << 4) + +/* flip the gen bit of a ring */ +#define VMXNET3_FLIP_RING_GEN(gen) ((gen) = (gen) ^ 0x1) + +/* only use this if moving the idx won't affect the gen bit */ +#define VMXNET3_INC_RING_IDX_ONLY(idx, ring_size) \ + do {\ + (idx)++;\ + if (unlikely((idx) == (ring_size))) {\ + (idx) = 0;\ + } \ + } while (0) + +#define VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid) \ + (vfTable[vid >> 5] |= (1 << (vid & 31))) +#define VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid) \ + (vfTable[vid >> 5] &= ~(1 << (vid & 31))) + +#define VMXNET3_VFTABLE_ENTRY_IS_SET(vfTable, vid) \ + ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0) + +#define VMXNET3_MAX_MTU 9000 +#define VMXNET3_MIN_MTU 60 + +#define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ +#define VMXNET3_LINK_DOWN 0 + +#undef u64 +#undef u32 +#undef u16 +#undef u8 +#undef __le16 +#undef __le32 +#undef __le64 +#if defined(HOST_WORDS_BIGENDIAN) +#undef __BIG_ENDIAN_BITFIELD +#endif + +#endif diff --git a/qemu/hw/net/vmxnet_debug.h b/qemu/hw/net/vmxnet_debug.h new file mode 100644 index 000000000..96dae0f91 --- /dev/null +++ b/qemu/hw/net/vmxnet_debug.h @@ -0,0 +1,115 @@ +/* + * QEMU VMWARE VMXNET* paravirtual NICs - debugging facilities + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_VMXNET_DEBUG_H +#define _QEMU_VMXNET_DEBUG_H + +#define VMXNET_DEVICE_NAME "vmxnet3" + +/* #define VMXNET_DEBUG_CB */ +#define VMXNET_DEBUG_WARNINGS +#define VMXNET_DEBUG_ERRORS +/* #define VMXNET_DEBUG_INTERRUPTS */ +/* #define VMXNET_DEBUG_CONFIG */ +/* #define VMXNET_DEBUG_RINGS */ +/* #define VMXNET_DEBUG_PACKETS */ +/* #define VMXNET_DEBUG_SHMEM_ACCESS */ + +#ifdef VMXNET_DEBUG_SHMEM_ACCESS +#define VMW_SHPRN(fmt, ...) \ + do { \ + printf("[%s][SH][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_SHPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_CB +#define VMW_CBPRN(fmt, ...) \ + do { \ + printf("[%s][CB][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_CBPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_PACKETS +#define VMW_PKPRN(fmt, ...) \ + do { \ + printf("[%s][PK][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_PKPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_WARNINGS +#define VMW_WRPRN(fmt, ...) \ + do { \ + printf("[%s][WR][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_WRPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_ERRORS +#define VMW_ERPRN(fmt, ...) \ + do { \ + printf("[%s][ER][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_ERPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_INTERRUPTS +#define VMW_IRPRN(fmt, ...) \ + do { \ + printf("[%s][IR][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_IRPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_CONFIG +#define VMW_CFPRN(fmt, ...) \ + do { \ + printf("[%s][CF][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_CFPRN(fmt, ...) do {} while (0) +#endif + +#ifdef VMXNET_DEBUG_RINGS +#define VMW_RIPRN(fmt, ...) \ + do { \ + printf("[%s][RI][%s]: " fmt "\n", VMXNET_DEVICE_NAME, __func__, \ + ## __VA_ARGS__); \ + } while (0) +#else +#define VMW_RIPRN(fmt, ...) do {} while (0) +#endif + +#define VMXNET_MF "%02X:%02X:%02X:%02X:%02X:%02X" +#define VMXNET_MA(a) (a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5] + +#endif /* _QEMU_VMXNET3_DEBUG_H */ diff --git a/qemu/hw/net/vmxnet_rx_pkt.c b/qemu/hw/net/vmxnet_rx_pkt.c new file mode 100644 index 000000000..aa5462931 --- /dev/null +++ b/qemu/hw/net/vmxnet_rx_pkt.c @@ -0,0 +1,186 @@ +/* + * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstractions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "vmxnet_rx_pkt.h" +#include "net/eth.h" +#include "qemu-common.h" +#include "qemu/iov.h" +#include "net/checksum.h" +#include "net/tap.h" + +/* + * RX packet may contain up to 2 fragments - rebuilt eth header + * in case of VLAN tag stripping + * and payload received from QEMU - in any case + */ +#define VMXNET_MAX_RX_PACKET_FRAGMENTS (2) + +struct VmxnetRxPkt { + struct virtio_net_hdr virt_hdr; + uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; + struct iovec vec[VMXNET_MAX_RX_PACKET_FRAGMENTS]; + uint16_t vec_len; + uint32_t tot_len; + uint16_t tci; + bool vlan_stripped; + bool has_virt_hdr; + eth_pkt_types_e packet_type; + + /* Analysis results */ + bool isip4; + bool isip6; + bool isudp; + bool istcp; +}; + +void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr) +{ + struct VmxnetRxPkt *p = g_malloc0(sizeof *p); + p->has_virt_hdr = has_virt_hdr; + *pkt = p; +} + +void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt) +{ + g_free(pkt); +} + +struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + return &pkt->virt_hdr; +} + +void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan) +{ + uint16_t tci = 0; + uint16_t ploff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); + } + + if (pkt->vlan_stripped) { + pkt->vec[0].iov_base = pkt->ehdr_buf; + pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); + pkt->vec[1].iov_base = (uint8_t *) data + ploff; + pkt->vec[1].iov_len = len - ploff; + pkt->vec_len = 2; + pkt->tot_len = len - ploff + sizeof(struct eth_header); + } else { + pkt->vec[0].iov_base = (void *)data; + pkt->vec[0].iov_len = len; + pkt->vec_len = 1; + pkt->tot_len = len; + } + + pkt->tci = tci; +} + +void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt) +{ +#ifdef VMXNET_RX_PKT_DEBUG + VmxnetRxPkt *pkt = (VmxnetRxPkt *)pkt; + assert(pkt); + + printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n", + pkt->tot_len, pkt->vlan_stripped, pkt->tci); +#endif +} + +void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt, + eth_pkt_types_e packet_type) +{ + assert(pkt); + + pkt->packet_type = packet_type; + +} + +eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + + return pkt->packet_type; +} + +size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + + return pkt->tot_len; +} + +void vmxnet_rx_pkt_set_protocols(struct VmxnetRxPkt *pkt, const void *data, + size_t len) +{ + assert(pkt); + + eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp); +} + +void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp) +{ + assert(pkt); + + *isip4 = pkt->isip4; + *isip6 = pkt->isip6; + *isudp = pkt->isudp; + *istcp = pkt->istcp; +} + +struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec; +} + +void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt, + struct virtio_net_hdr *vhdr) +{ + assert(pkt); + + memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); +} + +bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vlan_stripped; +} + +bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + + return pkt->has_virt_hdr; +} + +uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt) +{ + assert(pkt); + + return pkt->tci; +} diff --git a/qemu/hw/net/vmxnet_rx_pkt.h b/qemu/hw/net/vmxnet_rx_pkt.h new file mode 100644 index 000000000..a425846b5 --- /dev/null +++ b/qemu/hw/net/vmxnet_rx_pkt.h @@ -0,0 +1,176 @@ +/* + * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstraction + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VMXNET_RX_PKT_H +#define VMXNET_RX_PKT_H + +#include "stdint.h" +#include "stdbool.h" +#include "net/eth.h" + +/* defines to enable packet dump functions */ +/*#define VMXNET_RX_PKT_DEBUG*/ + +struct VmxnetRxPkt; + +/** + * Clean all rx packet resources + * + * @pkt: packet + * + */ +void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt); + +/** + * Init function for rx packet functionality + * + * @pkt: packet pointer + * @has_virt_hdr: device uses virtio header + * + */ +void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr); + +/** + * returns total length of data attached to rx context + * + * @pkt: packet + * + * Return: nothing + * + */ +size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt); + +/** + * parse and set packet analysis results + * + * @pkt: packet + * @data: pointer to the data buffer to be parsed + * @len: data length + * + */ +void vmxnet_rx_pkt_set_protocols(struct VmxnetRxPkt *pkt, const void *data, + size_t len); + +/** + * fetches packet analysis results + * + * @pkt: packet + * @isip4: whether the packet given is IPv4 + * @isip6: whether the packet given is IPv6 + * @isudp: whether the packet given is UDP + * @istcp: whether the packet given is TCP + * + */ +void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp); + +/** + * returns virtio header stored in rx context + * + * @pkt: packet + * @ret: virtio header + * + */ +struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt); + +/** + * returns packet type + * + * @pkt: packet + * @ret: packet type + * + */ +eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt); + +/** + * returns vlan tag + * + * @pkt: packet + * @ret: VLAN tag + * + */ +uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt); + +/** + * tells whether vlan was stripped from the packet + * + * @pkt: packet + * @ret: VLAN stripped sign + * + */ +bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt); + +/** + * notifies caller if the packet has virtio header + * + * @pkt: packet + * @ret: true if packet has virtio header, false otherwize + * + */ +bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt); + +/** + * attach data to rx packet + * + * @pkt: packet + * @data: pointer to the data buffer + * @len: data length + * @strip_vlan: should the module strip vlan from data + * + */ +void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan); + +/** + * returns io vector that holds the attached data + * + * @pkt: packet + * @ret: pointer to IOVec + * + */ +struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt); + +/** + * prints rx packet data if debug is enabled + * + * @pkt: packet + * + */ +void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt); + +/** + * copy passed vhdr data to packet context + * + * @pkt: packet + * @vhdr: VHDR buffer + * + */ +void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt, + struct virtio_net_hdr *vhdr); + +/** + * save packet type in packet context + * + * @pkt: packet + * @packet_type: the packet type + * + */ +void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt, + eth_pkt_types_e packet_type); + +#endif diff --git a/qemu/hw/net/vmxnet_tx_pkt.c b/qemu/hw/net/vmxnet_tx_pkt.c new file mode 100644 index 000000000..f7344c4cb --- /dev/null +++ b/qemu/hw/net/vmxnet_tx_pkt.c @@ -0,0 +1,567 @@ +/* + * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstractions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/hw.h" +#include "vmxnet_tx_pkt.h" +#include "net/eth.h" +#include "qemu-common.h" +#include "qemu/iov.h" +#include "net/checksum.h" +#include "net/tap.h" +#include "net/net.h" + +enum { + VMXNET_TX_PKT_VHDR_FRAG = 0, + VMXNET_TX_PKT_L2HDR_FRAG, + VMXNET_TX_PKT_L3HDR_FRAG, + VMXNET_TX_PKT_PL_START_FRAG +}; + +/* TX packet private context */ +struct VmxnetTxPkt { + struct virtio_net_hdr virt_hdr; + bool has_virt_hdr; + + struct iovec *raw; + uint32_t raw_frags; + uint32_t max_raw_frags; + + struct iovec *vec; + + uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + + uint32_t payload_len; + + uint32_t payload_frags; + uint32_t max_payload_frags; + + uint16_t hdr_len; + eth_pkt_types_e packet_type; + uint8_t l4proto; +}; + +void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, + bool has_virt_hdr) +{ + struct VmxnetTxPkt *p = g_malloc0(sizeof *p); + + p->vec = g_malloc((sizeof *p->vec) * + (max_frags + VMXNET_TX_PKT_PL_START_FRAG)); + + p->raw = g_malloc((sizeof *p->raw) * max_frags); + + p->max_payload_frags = max_frags; + p->max_raw_frags = max_frags; + p->has_virt_hdr = has_virt_hdr; + p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; + p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_len = + p->has_virt_hdr ? sizeof p->virt_hdr : 0; + p->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; + p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; + p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len = 0; + + *pkt = p; +} + +void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt) +{ + if (pkt) { + g_free(pkt->vec); + g_free(pkt->raw); + g_free(pkt); + } +} + +void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt) +{ + uint16_t csum; + uint32_t ph_raw_csum; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + struct ip_header *ip_hdr; + + if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && + VIRTIO_NET_HDR_GSO_UDP != gso_type) { + return; + } + + ip_hdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; + + if (pkt->payload_len + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len > + ETH_MAX_IP_DGRAM_LEN) { + return; + } + + ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); + + /* Calculate IP header checksum */ + ip_hdr->ip_sum = 0; + csum = net_raw_checksum((uint8_t *)ip_hdr, + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); + ip_hdr->ip_sum = cpu_to_be16(csum); + + /* Calculate IP pseudo header checksum */ + ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); + csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); + iov_from_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); +} + +static void vmxnet_tx_pkt_calculate_hdr_len(struct VmxnetTxPkt *pkt) +{ + pkt->hdr_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len + + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; +} + +static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) +{ + struct iovec *l2_hdr, *l3_hdr; + size_t bytes_read; + size_t full_ip6hdr_len; + uint16_t l3_proto; + + assert(pkt); + + l2_hdr = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; + l3_hdr = &pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG]; + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, + ETH_MAX_L2_HDR_LEN); + if (bytes_read < ETH_MAX_L2_HDR_LEN) { + l2_hdr->iov_len = 0; + return false; + } else { + l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); + } + + l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + + switch (l3_proto) { + case ETH_P_IP: + l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + l3_hdr->iov_base, sizeof(struct ip_header)); + + if (bytes_read < sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + + l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + /* copy optional IPv4 header data */ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + break; + + case ETH_P_IPV6: + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + &pkt->l4proto, &full_ip6hdr_len)) { + l3_hdr->iov_len = 0; + return false; + } + + l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + l3_hdr->iov_base, full_ip6hdr_len); + + if (bytes_read < full_ip6hdr_len) { + l3_hdr->iov_len = 0; + return false; + } else { + l3_hdr->iov_len = full_ip6hdr_len; + } + break; + + default: + l3_hdr->iov_len = 0; + break; + } + + vmxnet_tx_pkt_calculate_hdr_len(pkt); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); + return true; +} + +static bool vmxnet_tx_pkt_rebuild_payload(struct VmxnetTxPkt *pkt) +{ + size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; + + pkt->payload_frags = iov_copy(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], + pkt->max_payload_frags, + pkt->raw, pkt->raw_frags, + pkt->hdr_len, payload_len); + + if (pkt->payload_frags != (uint32_t) -1) { + pkt->payload_len = payload_len; + return true; + } else { + return false; + } +} + +bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt) +{ + return vmxnet_tx_pkt_parse_headers(pkt) && + vmxnet_tx_pkt_rebuild_payload(pkt); +} + +struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt) +{ + assert(pkt); + return &pkt->virt_hdr; +} + +static uint8_t vmxnet_tx_pkt_get_gso_type(struct VmxnetTxPkt *pkt, + bool tso_enable) +{ + uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; + uint16_t l3_proto; + + l3_proto = eth_get_l3_proto(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, + pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len); + + if (!tso_enable) { + goto func_exit; + } + + rc = eth_get_gso_type(l3_proto, pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base, + pkt->l4proto); + +func_exit: + return rc; +} + +void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, + bool csum_enable, uint32_t gso_size) +{ + struct tcp_hdr l4hdr; + assert(pkt); + + /* csum has to be enabled if tso is. */ + assert(csum_enable || !tso_enable); + + pkt->virt_hdr.gso_type = vmxnet_tx_pkt_get_gso_type(pkt, tso_enable); + + switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_NONE: + pkt->virt_hdr.hdr_len = 0; + pkt->virt_hdr.gso_size = 0; + break; + + case VIRTIO_NET_HDR_GSO_UDP: + pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); + break; + + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + iov_to_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + 0, &l4hdr, sizeof(l4hdr)); + pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); + pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + break; + + default: + g_assert_not_reached(); + } + + if (csum_enable) { + switch (pkt->l4proto) { + case IP_PROTO_TCP: + pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + pkt->virt_hdr.csum_start = pkt->hdr_len; + pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); + break; + case IP_PROTO_UDP: + pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + pkt->virt_hdr.csum_start = pkt->hdr_len; + pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); + break; + default: + break; + } + } +} + +void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan) +{ + bool is_new; + assert(pkt); + + eth_setup_vlan_headers(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, &is_new); + + /* update l2hdrlen */ + if (is_new) { + pkt->hdr_len += sizeof(struct vlan_header); + pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len += + sizeof(struct vlan_header); + } +} + +bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, + size_t len) +{ + hwaddr mapped_len = 0; + struct iovec *ventry; + assert(pkt); + assert(pkt->max_raw_frags > pkt->raw_frags); + + if (!len) { + return true; + } + + ventry = &pkt->raw[pkt->raw_frags]; + mapped_len = len; + + ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); + ventry->iov_len = mapped_len; + pkt->raw_frags += !!ventry->iov_base; + + if ((ventry->iov_base == NULL) || (len != mapped_len)) { + return false; + } + + return true; +} + +eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt) +{ + assert(pkt); + + return pkt->packet_type; +} + +size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt) +{ + assert(pkt); + + return pkt->hdr_len + pkt->payload_len; +} + +void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt) +{ +#ifdef VMXNET_TX_PKT_DEBUG + assert(pkt); + + printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " + "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, + pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len, + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); +#endif +} + +void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt) +{ + int i; + + /* no assert, as reset can be called before tx_pkt_init */ + if (!pkt) { + return; + } + + memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); + + g_free(pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base); + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; + + assert(pkt->vec); + for (i = VMXNET_TX_PKT_L2HDR_FRAG; + i < pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG; i++) { + pkt->vec[i].iov_len = 0; + } + pkt->payload_len = 0; + pkt->payload_frags = 0; + + assert(pkt->raw); + for (i = 0; i < pkt->raw_frags; i++) { + assert(pkt->raw[i].iov_base); + cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, + false, pkt->raw[i].iov_len); + pkt->raw[i].iov_len = 0; + } + pkt->raw_frags = 0; + + pkt->hdr_len = 0; + pkt->packet_type = 0; + pkt->l4proto = 0; +} + +static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt) +{ + struct iovec *iov = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; + uint32_t csum_cntr; + uint16_t csum = 0; + /* num of iovec without vhdr */ + uint32_t iov_len = pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG - 1; + uint16_t csl; + struct ip_header *iphdr; + size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; + + /* Put zero to checksum field */ + iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); + + /* Calculate L4 TCP/UDP checksum */ + csl = pkt->payload_len; + + /* data checksum */ + csum_cntr = + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); + /* add pseudo header to csum */ + iphdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; + csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + + /* Put the checksum obtained into the packet */ + csum = cpu_to_be16(net_checksum_finish(csum_cntr)); + iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); +} + +enum { + VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, + VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS, + VMXNET_TX_PKT_FRAGMENT_HEADER_NUM +}; + +#define VMXNET_MAX_FRAG_SG_LIST (64) + +static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt *pkt, + int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) +{ + size_t fetched = 0; + struct iovec *src = pkt->vec; + + *dst_idx = VMXNET_TX_PKT_FRAGMENT_HEADER_NUM; + + while (fetched < pkt->virt_hdr.gso_size) { + + /* no more place in fragment iov */ + if (*dst_idx == VMXNET_MAX_FRAG_SG_LIST) { + break; + } + + /* no more data in iovec */ + if (*src_idx == (pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG)) { + break; + } + + + dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; + dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, + pkt->virt_hdr.gso_size - fetched); + + *src_offset += dst[*dst_idx].iov_len; + fetched += dst[*dst_idx].iov_len; + + if (*src_offset == src[*src_idx].iov_len) { + *src_offset = 0; + (*src_idx)++; + } + + (*dst_idx)++; + } + + return fetched; +} + +static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt, + NetClientState *nc) +{ + struct iovec fragment[VMXNET_MAX_FRAG_SG_LIST]; + size_t fragment_len = 0; + bool more_frags = false; + + /* some pointers for shorter code */ + void *l2_iov_base, *l3_iov_base; + size_t l2_iov_len, l3_iov_len; + int src_idx = VMXNET_TX_PKT_PL_START_FRAG, dst_idx; + size_t src_offset = 0; + size_t fragment_offset = 0; + + l2_iov_base = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base; + l2_iov_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len; + l3_iov_base = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; + l3_iov_len = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; + + /* Copy headers */ + fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; + fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; + fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; + fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; + + + /* Put as much data as possible and send */ + do { + fragment_len = vmxnet_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, + fragment, &dst_idx); + + more_frags = (fragment_offset + fragment_len < pkt->payload_len); + + eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base, + l3_iov_len, fragment_len, fragment_offset, more_frags); + + eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); + + qemu_sendv_packet(nc, fragment, dst_idx); + + fragment_offset += fragment_len; + + } while (more_frags); + + return true; +} + +bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc) +{ + assert(pkt); + + if (!pkt->has_virt_hdr && + pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + vmxnet_tx_pkt_do_sw_csum(pkt); + } + + /* + * Since underlying infrastructure does not support IP datagrams longer + * than 64K we should drop such packets and don't even try to send + */ + if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { + if (pkt->payload_len > + ETH_MAX_IP_DGRAM_LEN - + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len) { + return false; + } + } + + if (pkt->has_virt_hdr || + pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { + qemu_sendv_packet(nc, pkt->vec, + pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG); + return true; + } + + return vmxnet_tx_pkt_do_sw_fragmentation(pkt, nc); +} diff --git a/qemu/hw/net/vmxnet_tx_pkt.h b/qemu/hw/net/vmxnet_tx_pkt.h new file mode 100644 index 000000000..57121a6fe --- /dev/null +++ b/qemu/hw/net/vmxnet_tx_pkt.h @@ -0,0 +1,148 @@ +/* + * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstraction + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VMXNET_TX_PKT_H +#define VMXNET_TX_PKT_H + +#include "stdint.h" +#include "stdbool.h" +#include "net/eth.h" +#include "exec/hwaddr.h" + +/* define to enable packet dump functions */ +/*#define VMXNET_TX_PKT_DEBUG*/ + +struct VmxnetTxPkt; + +/** + * Init function for tx packet functionality + * + * @pkt: packet pointer + * @max_frags: max tx ip fragments + * @has_virt_hdr: device uses virtio header. + */ +void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, + bool has_virt_hdr); + +/** + * Clean all tx packet resources. + * + * @pkt: packet. + */ +void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt); + +/** + * get virtio header + * + * @pkt: packet + * @ret: virtio header + */ +struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt); + +/** + * build virtio header (will be stored in module context) + * + * @pkt: packet + * @tso_enable: TSO enabled + * @csum_enable: CSO enabled + * @gso_size: MSS size for TSO + * + */ +void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, + bool csum_enable, uint32_t gso_size); + +/** + * updates vlan tag, and adds vlan header in case it is missing + * + * @pkt: packet + * @vlan: VLAN tag + * + */ +void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan); + +/** + * populate data fragment into pkt context. + * + * @pkt: packet + * @pa: physical address of fragment + * @len: length of fragment + * + */ +bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, + size_t len); + +/** + * fix ip header fields and calculate checksums needed. + * + * @pkt: packet + * + */ +void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt); + +/** + * get length of all populated data. + * + * @pkt: packet + * @ret: total data length + * + */ +size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt); + +/** + * get packet type + * + * @pkt: packet + * @ret: packet type + * + */ +eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt); + +/** + * prints packet data if debug is enabled + * + * @pkt: packet + * + */ +void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt); + +/** + * reset tx packet private context (needed to be called between packets) + * + * @pkt: packet + * + */ +void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt); + +/** + * Send packet to qemu. handles sw offloads if vhdr is not supported. + * + * @pkt: packet + * @nc: NetClientState + * @ret: operation result + * + */ +bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc); + +/** + * parse raw packet data and analyze offload requirements. + * + * @pkt: packet + * + */ +bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt); + +#endif diff --git a/qemu/hw/net/xen_nic.c b/qemu/hw/net/xen_nic.c new file mode 100644 index 000000000..d7cbfc103 --- /dev/null +++ b/qemu/hw/net/xen_nic.c @@ -0,0 +1,422 @@ +/* + * xen paravirt network card backend + * + * (c) Gerd Hoffmann <kraxel@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "hw/hw.h" +#include "net/net.h" +#include "net/checksum.h" +#include "net/util.h" +#include "hw/xen/xen_backend.h" + +#include <xen/io/netif.h> + +/* ------------------------------------------------------------- */ + +struct XenNetDev { + struct XenDevice xendev; /* must be first */ + char *mac; + int tx_work; + int tx_ring_ref; + int rx_ring_ref; + struct netif_tx_sring *txs; + struct netif_rx_sring *rxs; + netif_tx_back_ring_t tx_ring; + netif_rx_back_ring_t rx_ring; + NICConf conf; + NICState *nic; +}; + +/* ------------------------------------------------------------- */ + +static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st) +{ + RING_IDX i = netdev->tx_ring.rsp_prod_pvt; + netif_tx_response_t *resp; + int notify; + + resp = RING_GET_RESPONSE(&netdev->tx_ring, i); + resp->id = txp->id; + resp->status = st; + +#if 0 + if (txp->flags & NETTXF_extra_info) { + RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL; + } +#endif + + netdev->tx_ring.rsp_prod_pvt = ++i; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify); + if (notify) { + xen_be_send_notify(&netdev->xendev); + } + + if (i == netdev->tx_ring.req_cons) { + int more_to_do; + RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do); + if (more_to_do) { + netdev->tx_work++; + } + } +} + +static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end) +{ +#if 0 + /* + * Hmm, why netback fails everything in the ring? + * Should we do that even when not supporting SG and TSO? + */ + RING_IDX cons = netdev->tx_ring.req_cons; + + do { + make_tx_response(netif, txp, NETIF_RSP_ERROR); + if (cons >= end) { + break; + } + txp = RING_GET_REQUEST(&netdev->tx_ring, cons++); + } while (1); + netdev->tx_ring.req_cons = cons; + netif_schedule_work(netif); + netif_put(netif); +#else + net_tx_response(netdev, txp, NETIF_RSP_ERROR); +#endif +} + +static void net_tx_packets(struct XenNetDev *netdev) +{ + netif_tx_request_t txreq; + RING_IDX rc, rp; + void *page; + void *tmpbuf = NULL; + + for (;;) { + rc = netdev->tx_ring.req_cons; + rp = netdev->tx_ring.sring->req_prod; + xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while ((rc != rp)) { + if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) { + break; + } + memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq)); + netdev->tx_ring.req_cons = ++rc; + +#if 1 + /* should not happen in theory, we don't announce the * + * feature-{sg,gso,whatelse} flags in xenstore (yet?) */ + if (txreq.flags & NETTXF_extra_info) { + xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n"); + net_tx_error(netdev, &txreq, rc); + continue; + } + if (txreq.flags & NETTXF_more_data) { + xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n"); + net_tx_error(netdev, &txreq, rc); + continue; + } +#endif + + if (txreq.size < 14) { + xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size); + net_tx_error(netdev, &txreq, rc); + continue; + } + + if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) { + xen_be_printf(&netdev->xendev, 0, "error: page crossing\n"); + net_tx_error(netdev, &txreq, rc); + continue; + } + + xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n", + txreq.gref, txreq.offset, txreq.size, txreq.flags, + (txreq.flags & NETTXF_csum_blank) ? " csum_blank" : "", + (txreq.flags & NETTXF_data_validated) ? " data_validated" : "", + (txreq.flags & NETTXF_more_data) ? " more_data" : "", + (txreq.flags & NETTXF_extra_info) ? " extra_info" : ""); + + page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, + netdev->xendev.dom, + txreq.gref, PROT_READ); + if (page == NULL) { + xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n", + txreq.gref); + net_tx_error(netdev, &txreq, rc); + continue; + } + if (txreq.flags & NETTXF_csum_blank) { + /* have read-only mapping -> can't fill checksum in-place */ + if (!tmpbuf) { + tmpbuf = g_malloc(XC_PAGE_SIZE); + } + memcpy(tmpbuf, page + txreq.offset, txreq.size); + net_checksum_calculate(tmpbuf, txreq.size); + qemu_send_packet(qemu_get_queue(netdev->nic), tmpbuf, + txreq.size); + } else { + qemu_send_packet(qemu_get_queue(netdev->nic), + page + txreq.offset, txreq.size); + } + xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1); + net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); + } + if (!netdev->tx_work) { + break; + } + netdev->tx_work = 0; + } + g_free(tmpbuf); +} + +/* ------------------------------------------------------------- */ + +static void net_rx_response(struct XenNetDev *netdev, + netif_rx_request_t *req, int8_t st, + uint16_t offset, uint16_t size, + uint16_t flags) +{ + RING_IDX i = netdev->rx_ring.rsp_prod_pvt; + netif_rx_response_t *resp; + int notify; + + resp = RING_GET_RESPONSE(&netdev->rx_ring, i); + resp->offset = offset; + resp->flags = flags; + resp->id = req->id; + resp->status = (int16_t)size; + if (st < 0) { + resp->status = (int16_t)st; + } + + xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n", + i, resp->status, resp->flags); + + netdev->rx_ring.rsp_prod_pvt = ++i; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify); + if (notify) { + xen_be_send_notify(&netdev->xendev); + } +} + +#define NET_IP_ALIGN 2 + +static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size) +{ + struct XenNetDev *netdev = qemu_get_nic_opaque(nc); + netif_rx_request_t rxreq; + RING_IDX rc, rp; + void *page; + + if (netdev->xendev.be_state != XenbusStateConnected) { + return -1; + } + + rc = netdev->rx_ring.req_cons; + rp = netdev->rx_ring.sring->req_prod; + xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ + + if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { + return 0; + } + if (size > XC_PAGE_SIZE - NET_IP_ALIGN) { + xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)", + (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN); + return -1; + } + + memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq)); + netdev->rx_ring.req_cons = ++rc; + + page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, + netdev->xendev.dom, + rxreq.gref, PROT_WRITE); + if (page == NULL) { + xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n", + rxreq.gref); + net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0); + return -1; + } + memcpy(page + NET_IP_ALIGN, buf, size); + xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1); + net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0); + + return size; +} + +/* ------------------------------------------------------------- */ + +static NetClientInfo net_xen_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = net_rx_packet, +}; + +static int net_init(struct XenDevice *xendev) +{ + struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); + + /* read xenstore entries */ + if (netdev->mac == NULL) { + netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac"); + } + + /* do we have all we need? */ + if (netdev->mac == NULL) { + return -1; + } + + if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) { + return -1; + } + + netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf, + "xen", NULL, netdev); + + snprintf(qemu_get_queue(netdev->nic)->info_str, + sizeof(qemu_get_queue(netdev->nic)->info_str), + "nic: xenbus vif macaddr=%s", netdev->mac); + + /* fill info */ + xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1); + xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0); + + return 0; +} + +static int net_connect(struct XenDevice *xendev) +{ + struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); + int rx_copy; + + if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref", + &netdev->tx_ring_ref) == -1) { + return -1; + } + if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref", + &netdev->rx_ring_ref) == -1) { + return 1; + } + if (xenstore_read_fe_int(&netdev->xendev, "event-channel", + &netdev->xendev.remote_port) == -1) { + return -1; + } + + if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) { + rx_copy = 0; + } + if (rx_copy == 0) { + xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n"); + return -1; + } + + netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, + netdev->xendev.dom, + netdev->tx_ring_ref, + PROT_READ | PROT_WRITE); + if (!netdev->txs) { + return -1; + } + netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, + netdev->xendev.dom, + netdev->rx_ring_ref, + PROT_READ | PROT_WRITE); + if (!netdev->rxs) { + xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1); + netdev->txs = NULL; + return -1; + } + BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE); + BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE); + + xen_be_bind_evtchn(&netdev->xendev); + + xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, " + "remote port %d, local port %d\n", + netdev->tx_ring_ref, netdev->rx_ring_ref, + netdev->xendev.remote_port, netdev->xendev.local_port); + + net_tx_packets(netdev); + return 0; +} + +static void net_disconnect(struct XenDevice *xendev) +{ + struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); + + xen_be_unbind_evtchn(&netdev->xendev); + + if (netdev->txs) { + xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1); + netdev->txs = NULL; + } + if (netdev->rxs) { + xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1); + netdev->rxs = NULL; + } +} + +static void net_event(struct XenDevice *xendev) +{ + struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); + net_tx_packets(netdev); + qemu_flush_queued_packets(qemu_get_queue(netdev->nic)); +} + +static int net_free(struct XenDevice *xendev) +{ + struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); + + if (netdev->nic) { + qemu_del_nic(netdev->nic); + netdev->nic = NULL; + } + g_free(netdev->mac); + netdev->mac = NULL; + return 0; +} + +/* ------------------------------------------------------------- */ + +struct XenDevOps xen_netdev_ops = { + .size = sizeof(struct XenNetDev), + .flags = DEVOPS_FLAG_NEED_GNTDEV, + .init = net_init, + .initialise = net_connect, + .event = net_event, + .disconnect = net_disconnect, + .free = net_free, +}; diff --git a/qemu/hw/net/xgmac.c b/qemu/hw/net/xgmac.c new file mode 100644 index 000000000..15fb68194 --- /dev/null +++ b/qemu/hw/net/xgmac.c @@ -0,0 +1,432 @@ +/* + * QEMU model of XGMAC Ethernet. + * + * derived from the Xilinx AXI-Ethernet by Edgar E. Iglesias. + * + * Copyright (c) 2011 Calxeda, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/sysbus.h" +#include "sysemu/char.h" +#include "qemu/log.h" +#include "net/net.h" +#include "net/checksum.h" + +#ifdef DEBUG_XGMAC +#define DEBUGF_BRK(message, args...) do { \ + fprintf(stderr, (message), ## args); \ + } while (0) +#else +#define DEBUGF_BRK(message, args...) do { } while (0) +#endif + +#define XGMAC_CONTROL 0x00000000 /* MAC Configuration */ +#define XGMAC_FRAME_FILTER 0x00000001 /* MAC Frame Filter */ +#define XGMAC_FLOW_CTRL 0x00000006 /* MAC Flow Control */ +#define XGMAC_VLAN_TAG 0x00000007 /* VLAN Tags */ +#define XGMAC_VERSION 0x00000008 /* Version */ +/* VLAN tag for insertion or replacement into tx frames */ +#define XGMAC_VLAN_INCL 0x00000009 +#define XGMAC_LPI_CTRL 0x0000000a /* LPI Control and Status */ +#define XGMAC_LPI_TIMER 0x0000000b /* LPI Timers Control */ +#define XGMAC_TX_PACE 0x0000000c /* Transmit Pace and Stretch */ +#define XGMAC_VLAN_HASH 0x0000000d /* VLAN Hash Table */ +#define XGMAC_DEBUG 0x0000000e /* Debug */ +#define XGMAC_INT_STATUS 0x0000000f /* Interrupt and Control */ +/* HASH table registers */ +#define XGMAC_HASH(n) ((0x00000300/4) + (n)) +#define XGMAC_NUM_HASH 16 +/* Operation Mode */ +#define XGMAC_OPMODE (0x00000400/4) +/* Remote Wake-Up Frame Filter */ +#define XGMAC_REMOTE_WAKE (0x00000700/4) +/* PMT Control and Status */ +#define XGMAC_PMT (0x00000704/4) + +#define XGMAC_ADDR_HIGH(reg) (0x00000010+((reg) * 2)) +#define XGMAC_ADDR_LOW(reg) (0x00000011+((reg) * 2)) + +#define DMA_BUS_MODE 0x000003c0 /* Bus Mode */ +#define DMA_XMT_POLL_DEMAND 0x000003c1 /* Transmit Poll Demand */ +#define DMA_RCV_POLL_DEMAND 0x000003c2 /* Received Poll Demand */ +#define DMA_RCV_BASE_ADDR 0x000003c3 /* Receive List Base */ +#define DMA_TX_BASE_ADDR 0x000003c4 /* Transmit List Base */ +#define DMA_STATUS 0x000003c5 /* Status Register */ +#define DMA_CONTROL 0x000003c6 /* Ctrl (Operational Mode) */ +#define DMA_INTR_ENA 0x000003c7 /* Interrupt Enable */ +#define DMA_MISSED_FRAME_CTR 0x000003c8 /* Missed Frame Counter */ +/* Receive Interrupt Watchdog Timer */ +#define DMA_RI_WATCHDOG_TIMER 0x000003c9 +#define DMA_AXI_BUS 0x000003ca /* AXI Bus Mode */ +#define DMA_AXI_STATUS 0x000003cb /* AXI Status */ +#define DMA_CUR_TX_DESC_ADDR 0x000003d2 /* Current Host Tx Descriptor */ +#define DMA_CUR_RX_DESC_ADDR 0x000003d3 /* Current Host Rx Descriptor */ +#define DMA_CUR_TX_BUF_ADDR 0x000003d4 /* Current Host Tx Buffer */ +#define DMA_CUR_RX_BUF_ADDR 0x000003d5 /* Current Host Rx Buffer */ +#define DMA_HW_FEATURE 0x000003d6 /* Enabled Hardware Features */ + +/* DMA Status register defines */ +#define DMA_STATUS_GMI 0x08000000 /* MMC interrupt */ +#define DMA_STATUS_GLI 0x04000000 /* GMAC Line interface int */ +#define DMA_STATUS_EB_MASK 0x00380000 /* Error Bits Mask */ +#define DMA_STATUS_EB_TX_ABORT 0x00080000 /* Error Bits - TX Abort */ +#define DMA_STATUS_EB_RX_ABORT 0x00100000 /* Error Bits - RX Abort */ +#define DMA_STATUS_TS_MASK 0x00700000 /* Transmit Process State */ +#define DMA_STATUS_TS_SHIFT 20 +#define DMA_STATUS_RS_MASK 0x000e0000 /* Receive Process State */ +#define DMA_STATUS_RS_SHIFT 17 +#define DMA_STATUS_NIS 0x00010000 /* Normal Interrupt Summary */ +#define DMA_STATUS_AIS 0x00008000 /* Abnormal Interrupt Summary */ +#define DMA_STATUS_ERI 0x00004000 /* Early Receive Interrupt */ +#define DMA_STATUS_FBI 0x00002000 /* Fatal Bus Error Interrupt */ +#define DMA_STATUS_ETI 0x00000400 /* Early Transmit Interrupt */ +#define DMA_STATUS_RWT 0x00000200 /* Receive Watchdog Timeout */ +#define DMA_STATUS_RPS 0x00000100 /* Receive Process Stopped */ +#define DMA_STATUS_RU 0x00000080 /* Receive Buffer Unavailable */ +#define DMA_STATUS_RI 0x00000040 /* Receive Interrupt */ +#define DMA_STATUS_UNF 0x00000020 /* Transmit Underflow */ +#define DMA_STATUS_OVF 0x00000010 /* Receive Overflow */ +#define DMA_STATUS_TJT 0x00000008 /* Transmit Jabber Timeout */ +#define DMA_STATUS_TU 0x00000004 /* Transmit Buffer Unavailable */ +#define DMA_STATUS_TPS 0x00000002 /* Transmit Process Stopped */ +#define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ + +/* DMA Control register defines */ +#define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */ +#define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */ +#define DMA_CONTROL_DFF 0x01000000 /* Disable flush of rx frames */ + +struct desc { + uint32_t ctl_stat; + uint16_t buffer1_size; + uint16_t buffer2_size; + uint32_t buffer1_addr; + uint32_t buffer2_addr; + uint32_t ext_stat; + uint32_t res[3]; +}; + +#define R_MAX 0x400 + +typedef struct RxTxStats { + uint64_t rx_bytes; + uint64_t tx_bytes; + + uint64_t rx; + uint64_t rx_bcast; + uint64_t rx_mcast; +} RxTxStats; + +#define TYPE_XGMAC "xgmac" +#define XGMAC(obj) OBJECT_CHECK(XgmacState, (obj), TYPE_XGMAC) + +typedef struct XgmacState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + qemu_irq sbd_irq; + qemu_irq pmt_irq; + qemu_irq mci_irq; + NICState *nic; + NICConf conf; + + struct RxTxStats stats; + uint32_t regs[R_MAX]; +} XgmacState; + +static const VMStateDescription vmstate_rxtx_stats = { + .name = "xgmac_stats", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(rx_bytes, RxTxStats), + VMSTATE_UINT64(tx_bytes, RxTxStats), + VMSTATE_UINT64(rx, RxTxStats), + VMSTATE_UINT64(rx_bcast, RxTxStats), + VMSTATE_UINT64(rx_mcast, RxTxStats), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_xgmac = { + .name = "xgmac", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(stats, XgmacState, 0, vmstate_rxtx_stats, RxTxStats), + VMSTATE_UINT32_ARRAY(regs, XgmacState, R_MAX), + VMSTATE_END_OF_LIST() + } +}; + +static void xgmac_read_desc(XgmacState *s, struct desc *d, int rx) +{ + uint32_t addr = rx ? s->regs[DMA_CUR_RX_DESC_ADDR] : + s->regs[DMA_CUR_TX_DESC_ADDR]; + cpu_physical_memory_read(addr, d, sizeof(*d)); +} + +static void xgmac_write_desc(XgmacState *s, struct desc *d, int rx) +{ + int reg = rx ? DMA_CUR_RX_DESC_ADDR : DMA_CUR_TX_DESC_ADDR; + uint32_t addr = s->regs[reg]; + + if (!rx && (d->ctl_stat & 0x00200000)) { + s->regs[reg] = s->regs[DMA_TX_BASE_ADDR]; + } else if (rx && (d->buffer1_size & 0x8000)) { + s->regs[reg] = s->regs[DMA_RCV_BASE_ADDR]; + } else { + s->regs[reg] += sizeof(*d); + } + cpu_physical_memory_write(addr, d, sizeof(*d)); +} + +static void xgmac_enet_send(XgmacState *s) +{ + struct desc bd; + int frame_size; + int len; + uint8_t frame[8192]; + uint8_t *ptr; + + ptr = frame; + frame_size = 0; + while (1) { + xgmac_read_desc(s, &bd, 0); + if ((bd.ctl_stat & 0x80000000) == 0) { + /* Run out of descriptors to transmit. */ + break; + } + len = (bd.buffer1_size & 0xfff) + (bd.buffer2_size & 0xfff); + + if ((bd.buffer1_size & 0xfff) > 2048) { + DEBUGF_BRK("qemu:%s:ERROR...ERROR...ERROR... -- " + "xgmac buffer 1 len on send > 2048 (0x%x)\n", + __func__, bd.buffer1_size & 0xfff); + } + if ((bd.buffer2_size & 0xfff) != 0) { + DEBUGF_BRK("qemu:%s:ERROR...ERROR...ERROR... -- " + "xgmac buffer 2 len on send != 0 (0x%x)\n", + __func__, bd.buffer2_size & 0xfff); + } + if (len >= sizeof(frame)) { + DEBUGF_BRK("qemu:%s: buffer overflow %d read into %zu " + "buffer\n" , __func__, len, sizeof(frame)); + DEBUGF_BRK("qemu:%s: buffer1.size=%d; buffer2.size=%d\n", + __func__, bd.buffer1_size, bd.buffer2_size); + } + + cpu_physical_memory_read(bd.buffer1_addr, ptr, len); + ptr += len; + frame_size += len; + if (bd.ctl_stat & 0x20000000) { + /* Last buffer in frame. */ + qemu_send_packet(qemu_get_queue(s->nic), frame, len); + ptr = frame; + frame_size = 0; + s->regs[DMA_STATUS] |= DMA_STATUS_TI | DMA_STATUS_NIS; + } + bd.ctl_stat &= ~0x80000000; + /* Write back the modified descriptor. */ + xgmac_write_desc(s, &bd, 0); + } +} + +static void enet_update_irq(XgmacState *s) +{ + int stat = s->regs[DMA_STATUS] & s->regs[DMA_INTR_ENA]; + qemu_set_irq(s->sbd_irq, !!stat); +} + +static uint64_t enet_read(void *opaque, hwaddr addr, unsigned size) +{ + XgmacState *s = opaque; + uint64_t r = 0; + addr >>= 2; + + switch (addr) { + case XGMAC_VERSION: + r = 0x1012; + break; + default: + if (addr < ARRAY_SIZE(s->regs)) { + r = s->regs[addr]; + } + break; + } + return r; +} + +static void enet_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + XgmacState *s = opaque; + + addr >>= 2; + switch (addr) { + case DMA_BUS_MODE: + s->regs[DMA_BUS_MODE] = value & ~0x1; + break; + case DMA_XMT_POLL_DEMAND: + xgmac_enet_send(s); + break; + case DMA_STATUS: + s->regs[DMA_STATUS] = s->regs[DMA_STATUS] & ~value; + break; + case DMA_RCV_BASE_ADDR: + s->regs[DMA_RCV_BASE_ADDR] = s->regs[DMA_CUR_RX_DESC_ADDR] = value; + break; + case DMA_TX_BASE_ADDR: + s->regs[DMA_TX_BASE_ADDR] = s->regs[DMA_CUR_TX_DESC_ADDR] = value; + break; + default: + if (addr < ARRAY_SIZE(s->regs)) { + s->regs[addr] = value; + } + break; + } + enet_update_irq(s); +} + +static const MemoryRegionOps enet_mem_ops = { + .read = enet_read, + .write = enet_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static int eth_can_rx(XgmacState *s) +{ + /* RX enabled? */ + return s->regs[DMA_CONTROL] & DMA_CONTROL_SR; +} + +static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) +{ + XgmacState *s = qemu_get_nic_opaque(nc); + static const unsigned char sa_bcast[6] = {0xff, 0xff, 0xff, + 0xff, 0xff, 0xff}; + int unicast, broadcast, multicast; + struct desc bd; + ssize_t ret; + + if (!eth_can_rx(s)) { + return -1; + } + unicast = ~buf[0] & 0x1; + broadcast = memcmp(buf, sa_bcast, 6) == 0; + multicast = !unicast && !broadcast; + if (size < 12) { + s->regs[DMA_STATUS] |= DMA_STATUS_RI | DMA_STATUS_NIS; + ret = -1; + goto out; + } + + xgmac_read_desc(s, &bd, 1); + if ((bd.ctl_stat & 0x80000000) == 0) { + s->regs[DMA_STATUS] |= DMA_STATUS_RU | DMA_STATUS_AIS; + ret = size; + goto out; + } + + cpu_physical_memory_write(bd.buffer1_addr, buf, size); + + /* Add in the 4 bytes for crc (the real hw returns length incl crc) */ + size += 4; + bd.ctl_stat = (size << 16) | 0x300; + xgmac_write_desc(s, &bd, 1); + + s->stats.rx_bytes += size; + s->stats.rx++; + if (multicast) { + s->stats.rx_mcast++; + } else if (broadcast) { + s->stats.rx_bcast++; + } + + s->regs[DMA_STATUS] |= DMA_STATUS_RI | DMA_STATUS_NIS; + ret = size; + +out: + enet_update_irq(s); + return ret; +} + +static NetClientInfo net_xgmac_enet_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = eth_rx, +}; + +static int xgmac_enet_init(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + XgmacState *s = XGMAC(dev); + + memory_region_init_io(&s->iomem, OBJECT(s), &enet_mem_ops, s, + "xgmac", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->sbd_irq); + sysbus_init_irq(sbd, &s->pmt_irq); + sysbus_init_irq(sbd, &s->mci_irq); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_xgmac_enet_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + s->regs[XGMAC_ADDR_HIGH(0)] = (s->conf.macaddr.a[5] << 8) | + s->conf.macaddr.a[4]; + s->regs[XGMAC_ADDR_LOW(0)] = (s->conf.macaddr.a[3] << 24) | + (s->conf.macaddr.a[2] << 16) | + (s->conf.macaddr.a[1] << 8) | + s->conf.macaddr.a[0]; + + return 0; +} + +static Property xgmac_properties[] = { + DEFINE_NIC_PROPERTIES(XgmacState, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void xgmac_enet_class_init(ObjectClass *klass, void *data) +{ + SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + sbc->init = xgmac_enet_init; + dc->vmsd = &vmstate_xgmac; + dc->props = xgmac_properties; +} + +static const TypeInfo xgmac_enet_info = { + .name = TYPE_XGMAC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XgmacState), + .class_init = xgmac_enet_class_init, +}; + +static void xgmac_enet_register_types(void) +{ + type_register_static(&xgmac_enet_info); +} + +type_init(xgmac_enet_register_types) diff --git a/qemu/hw/net/xilinx_axienet.c b/qemu/hw/net/xilinx_axienet.c new file mode 100644 index 000000000..d63c42324 --- /dev/null +++ b/qemu/hw/net/xilinx_axienet.c @@ -0,0 +1,1082 @@ +/* + * QEMU model of Xilinx AXI-Ethernet. + * + * Copyright (c) 2011 Edgar E. Iglesias. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/sysbus.h" +#include "qemu/log.h" +#include "net/net.h" +#include "net/checksum.h" + +#include "hw/stream.h" + +#define DPHY(x) + +#define TYPE_XILINX_AXI_ENET "xlnx.axi-ethernet" +#define TYPE_XILINX_AXI_ENET_DATA_STREAM "xilinx-axienet-data-stream" +#define TYPE_XILINX_AXI_ENET_CONTROL_STREAM "xilinx-axienet-control-stream" + +#define XILINX_AXI_ENET(obj) \ + OBJECT_CHECK(XilinxAXIEnet, (obj), TYPE_XILINX_AXI_ENET) + +#define XILINX_AXI_ENET_DATA_STREAM(obj) \ + OBJECT_CHECK(XilinxAXIEnetStreamSlave, (obj),\ + TYPE_XILINX_AXI_ENET_DATA_STREAM) + +#define XILINX_AXI_ENET_CONTROL_STREAM(obj) \ + OBJECT_CHECK(XilinxAXIEnetStreamSlave, (obj),\ + TYPE_XILINX_AXI_ENET_CONTROL_STREAM) + +/* Advertisement control register. */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ + +#define CONTROL_PAYLOAD_WORDS 5 +#define CONTROL_PAYLOAD_SIZE (CONTROL_PAYLOAD_WORDS * (sizeof(uint32_t))) + +struct PHY { + uint32_t regs[32]; + + int link; + + unsigned int (*read)(struct PHY *phy, unsigned int req); + void (*write)(struct PHY *phy, unsigned int req, + unsigned int data); +}; + +static unsigned int tdk_read(struct PHY *phy, unsigned int req) +{ + int regnum; + unsigned r = 0; + + regnum = req & 0x1f; + + switch (regnum) { + case 1: + if (!phy->link) { + break; + } + /* MR1. */ + /* Speeds and modes. */ + r |= (1 << 13) | (1 << 14); + r |= (1 << 11) | (1 << 12); + r |= (1 << 5); /* Autoneg complete. */ + r |= (1 << 3); /* Autoneg able. */ + r |= (1 << 2); /* link. */ + r |= (1 << 1); /* link. */ + break; + case 5: + /* Link partner ability. + We are kind; always agree with whatever best mode + the guest advertises. */ + r = 1 << 14; /* Success. */ + /* Copy advertised modes. */ + r |= phy->regs[4] & (15 << 5); + /* Autoneg support. */ + r |= 1; + break; + case 17: + /* Marvell PHY on many xilinx boards. */ + r = 0x8000; /* 1000Mb */ + break; + case 18: + { + /* Diagnostics reg. */ + int duplex = 0; + int speed_100 = 0; + + if (!phy->link) { + break; + } + + /* Are we advertising 100 half or 100 duplex ? */ + speed_100 = !!(phy->regs[4] & ADVERTISE_100HALF); + speed_100 |= !!(phy->regs[4] & ADVERTISE_100FULL); + + /* Are we advertising 10 duplex or 100 duplex ? */ + duplex = !!(phy->regs[4] & ADVERTISE_100FULL); + duplex |= !!(phy->regs[4] & ADVERTISE_10FULL); + r = (speed_100 << 10) | (duplex << 11); + } + break; + + default: + r = phy->regs[regnum]; + break; + } + DPHY(qemu_log("\n%s %x = reg[%d]\n", __func__, r, regnum)); + return r; +} + +static void +tdk_write(struct PHY *phy, unsigned int req, unsigned int data) +{ + int regnum; + + regnum = req & 0x1f; + DPHY(qemu_log("%s reg[%d] = %x\n", __func__, regnum, data)); + switch (regnum) { + default: + phy->regs[regnum] = data; + break; + } + + /* Unconditionally clear regs[BMCR][BMCR_RESET] */ + phy->regs[0] &= ~0x8000; +} + +static void +tdk_init(struct PHY *phy) +{ + phy->regs[0] = 0x3100; + /* PHY Id. */ + phy->regs[2] = 0x0300; + phy->regs[3] = 0xe400; + /* Autonegotiation advertisement reg. */ + phy->regs[4] = 0x01E1; + phy->link = 1; + + phy->read = tdk_read; + phy->write = tdk_write; +} + +struct MDIOBus { + /* bus. */ + int mdc; + int mdio; + + /* decoder. */ + enum { + PREAMBLE, + SOF, + OPC, + ADDR, + REQ, + TURNAROUND, + DATA + } state; + unsigned int drive; + + unsigned int cnt; + unsigned int addr; + unsigned int opc; + unsigned int req; + unsigned int data; + + struct PHY *devs[32]; +}; + +static void +mdio_attach(struct MDIOBus *bus, struct PHY *phy, unsigned int addr) +{ + bus->devs[addr & 0x1f] = phy; +} + +#ifdef USE_THIS_DEAD_CODE +static void +mdio_detach(struct MDIOBus *bus, struct PHY *phy, unsigned int addr) +{ + bus->devs[addr & 0x1f] = NULL; +} +#endif + +static uint16_t mdio_read_req(struct MDIOBus *bus, unsigned int addr, + unsigned int reg) +{ + struct PHY *phy; + uint16_t data; + + phy = bus->devs[addr]; + if (phy && phy->read) { + data = phy->read(phy, reg); + } else { + data = 0xffff; + } + DPHY(qemu_log("%s addr=%d reg=%d data=%x\n", __func__, addr, reg, data)); + return data; +} + +static void mdio_write_req(struct MDIOBus *bus, unsigned int addr, + unsigned int reg, uint16_t data) +{ + struct PHY *phy; + + DPHY(qemu_log("%s addr=%d reg=%d data=%x\n", __func__, addr, reg, data)); + phy = bus->devs[addr]; + if (phy && phy->write) { + phy->write(phy, reg, data); + } +} + +#define DENET(x) + +#define R_RAF (0x000 / 4) +enum { + RAF_MCAST_REJ = (1 << 1), + RAF_BCAST_REJ = (1 << 2), + RAF_EMCF_EN = (1 << 12), + RAF_NEWFUNC_EN = (1 << 11) +}; + +#define R_IS (0x00C / 4) +enum { + IS_HARD_ACCESS_COMPLETE = 1, + IS_AUTONEG = (1 << 1), + IS_RX_COMPLETE = (1 << 2), + IS_RX_REJECT = (1 << 3), + IS_TX_COMPLETE = (1 << 5), + IS_RX_DCM_LOCK = (1 << 6), + IS_MGM_RDY = (1 << 7), + IS_PHY_RST_DONE = (1 << 8), +}; + +#define R_IP (0x010 / 4) +#define R_IE (0x014 / 4) +#define R_UAWL (0x020 / 4) +#define R_UAWU (0x024 / 4) +#define R_PPST (0x030 / 4) +enum { + PPST_LINKSTATUS = (1 << 0), + PPST_PHY_LINKSTATUS = (1 << 7), +}; + +#define R_STATS_RX_BYTESL (0x200 / 4) +#define R_STATS_RX_BYTESH (0x204 / 4) +#define R_STATS_TX_BYTESL (0x208 / 4) +#define R_STATS_TX_BYTESH (0x20C / 4) +#define R_STATS_RXL (0x290 / 4) +#define R_STATS_RXH (0x294 / 4) +#define R_STATS_RX_BCASTL (0x2a0 / 4) +#define R_STATS_RX_BCASTH (0x2a4 / 4) +#define R_STATS_RX_MCASTL (0x2a8 / 4) +#define R_STATS_RX_MCASTH (0x2ac / 4) + +#define R_RCW0 (0x400 / 4) +#define R_RCW1 (0x404 / 4) +enum { + RCW1_VLAN = (1 << 27), + RCW1_RX = (1 << 28), + RCW1_FCS = (1 << 29), + RCW1_JUM = (1 << 30), + RCW1_RST = (1 << 31), +}; + +#define R_TC (0x408 / 4) +enum { + TC_VLAN = (1 << 27), + TC_TX = (1 << 28), + TC_FCS = (1 << 29), + TC_JUM = (1 << 30), + TC_RST = (1 << 31), +}; + +#define R_EMMC (0x410 / 4) +enum { + EMMC_LINKSPEED_10MB = (0 << 30), + EMMC_LINKSPEED_100MB = (1 << 30), + EMMC_LINKSPEED_1000MB = (2 << 30), +}; + +#define R_PHYC (0x414 / 4) + +#define R_MC (0x500 / 4) +#define MC_EN (1 << 6) + +#define R_MCR (0x504 / 4) +#define R_MWD (0x508 / 4) +#define R_MRD (0x50c / 4) +#define R_MIS (0x600 / 4) +#define R_MIP (0x620 / 4) +#define R_MIE (0x640 / 4) +#define R_MIC (0x640 / 4) + +#define R_UAW0 (0x700 / 4) +#define R_UAW1 (0x704 / 4) +#define R_FMI (0x708 / 4) +#define R_AF0 (0x710 / 4) +#define R_AF1 (0x714 / 4) +#define R_MAX (0x34 / 4) + +/* Indirect registers. */ +struct TEMAC { + struct MDIOBus mdio_bus; + struct PHY phy; + + void *parent; +}; + +typedef struct XilinxAXIEnetStreamSlave XilinxAXIEnetStreamSlave; +typedef struct XilinxAXIEnet XilinxAXIEnet; + +struct XilinxAXIEnetStreamSlave { + Object parent; + + struct XilinxAXIEnet *enet; +} ; + +struct XilinxAXIEnet { + SysBusDevice busdev; + MemoryRegion iomem; + qemu_irq irq; + StreamSlave *tx_data_dev; + StreamSlave *tx_control_dev; + XilinxAXIEnetStreamSlave rx_data_dev; + XilinxAXIEnetStreamSlave rx_control_dev; + NICState *nic; + NICConf conf; + + + uint32_t c_rxmem; + uint32_t c_txmem; + uint32_t c_phyaddr; + + struct TEMAC TEMAC; + + /* MII regs. */ + union { + uint32_t regs[4]; + struct { + uint32_t mc; + uint32_t mcr; + uint32_t mwd; + uint32_t mrd; + }; + } mii; + + struct { + uint64_t rx_bytes; + uint64_t tx_bytes; + + uint64_t rx; + uint64_t rx_bcast; + uint64_t rx_mcast; + } stats; + + /* Receive configuration words. */ + uint32_t rcw[2]; + /* Transmit config. */ + uint32_t tc; + uint32_t emmc; + uint32_t phyc; + + /* Unicast Address Word. */ + uint32_t uaw[2]; + /* Unicast address filter used with extended mcast. */ + uint32_t ext_uaw[2]; + uint32_t fmi; + + uint32_t regs[R_MAX]; + + /* Multicast filter addrs. */ + uint32_t maddr[4][2]; + /* 32K x 1 lookup filter. */ + uint32_t ext_mtable[1024]; + + uint32_t hdr[CONTROL_PAYLOAD_WORDS]; + + uint8_t *rxmem; + uint32_t rxsize; + uint32_t rxpos; + + uint8_t rxapp[CONTROL_PAYLOAD_SIZE]; + uint32_t rxappsize; + + /* Whether axienet_eth_rx_notify should flush incoming queue. */ + bool need_flush; +}; + +static void axienet_rx_reset(XilinxAXIEnet *s) +{ + s->rcw[1] = RCW1_JUM | RCW1_FCS | RCW1_RX | RCW1_VLAN; +} + +static void axienet_tx_reset(XilinxAXIEnet *s) +{ + s->tc = TC_JUM | TC_TX | TC_VLAN; +} + +static inline int axienet_rx_resetting(XilinxAXIEnet *s) +{ + return s->rcw[1] & RCW1_RST; +} + +static inline int axienet_rx_enabled(XilinxAXIEnet *s) +{ + return s->rcw[1] & RCW1_RX; +} + +static inline int axienet_extmcf_enabled(XilinxAXIEnet *s) +{ + return !!(s->regs[R_RAF] & RAF_EMCF_EN); +} + +static inline int axienet_newfunc_enabled(XilinxAXIEnet *s) +{ + return !!(s->regs[R_RAF] & RAF_NEWFUNC_EN); +} + +static void xilinx_axienet_reset(DeviceState *d) +{ + XilinxAXIEnet *s = XILINX_AXI_ENET(d); + + axienet_rx_reset(s); + axienet_tx_reset(s); + + s->regs[R_PPST] = PPST_LINKSTATUS | PPST_PHY_LINKSTATUS; + s->regs[R_IS] = IS_AUTONEG | IS_RX_DCM_LOCK | IS_MGM_RDY | IS_PHY_RST_DONE; + + s->emmc = EMMC_LINKSPEED_100MB; +} + +static void enet_update_irq(XilinxAXIEnet *s) +{ + s->regs[R_IP] = s->regs[R_IS] & s->regs[R_IE]; + qemu_set_irq(s->irq, !!s->regs[R_IP]); +} + +static uint64_t enet_read(void *opaque, hwaddr addr, unsigned size) +{ + XilinxAXIEnet *s = opaque; + uint32_t r = 0; + addr >>= 2; + + switch (addr) { + case R_RCW0: + case R_RCW1: + r = s->rcw[addr & 1]; + break; + + case R_TC: + r = s->tc; + break; + + case R_EMMC: + r = s->emmc; + break; + + case R_PHYC: + r = s->phyc; + break; + + case R_MCR: + r = s->mii.regs[addr & 3] | (1 << 7); /* Always ready. */ + break; + + case R_STATS_RX_BYTESL: + case R_STATS_RX_BYTESH: + r = s->stats.rx_bytes >> (32 * (addr & 1)); + break; + + case R_STATS_TX_BYTESL: + case R_STATS_TX_BYTESH: + r = s->stats.tx_bytes >> (32 * (addr & 1)); + break; + + case R_STATS_RXL: + case R_STATS_RXH: + r = s->stats.rx >> (32 * (addr & 1)); + break; + case R_STATS_RX_BCASTL: + case R_STATS_RX_BCASTH: + r = s->stats.rx_bcast >> (32 * (addr & 1)); + break; + case R_STATS_RX_MCASTL: + case R_STATS_RX_MCASTH: + r = s->stats.rx_mcast >> (32 * (addr & 1)); + break; + + case R_MC: + case R_MWD: + case R_MRD: + r = s->mii.regs[addr & 3]; + break; + + case R_UAW0: + case R_UAW1: + r = s->uaw[addr & 1]; + break; + + case R_UAWU: + case R_UAWL: + r = s->ext_uaw[addr & 1]; + break; + + case R_FMI: + r = s->fmi; + break; + + case R_AF0: + case R_AF1: + r = s->maddr[s->fmi & 3][addr & 1]; + break; + + case 0x8000 ... 0x83ff: + r = s->ext_mtable[addr - 0x8000]; + break; + + default: + if (addr < ARRAY_SIZE(s->regs)) { + r = s->regs[addr]; + } + DENET(qemu_log("%s addr=" TARGET_FMT_plx " v=%x\n", + __func__, addr * 4, r)); + break; + } + return r; +} + +static void enet_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + XilinxAXIEnet *s = opaque; + struct TEMAC *t = &s->TEMAC; + + addr >>= 2; + switch (addr) { + case R_RCW0: + case R_RCW1: + s->rcw[addr & 1] = value; + if ((addr & 1) && value & RCW1_RST) { + axienet_rx_reset(s); + } else { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + + case R_TC: + s->tc = value; + if (value & TC_RST) { + axienet_tx_reset(s); + } + break; + + case R_EMMC: + s->emmc = value; + break; + + case R_PHYC: + s->phyc = value; + break; + + case R_MC: + value &= ((1 << 7) - 1); + + /* Enable the MII. */ + if (value & MC_EN) { + unsigned int miiclkdiv = value & ((1 << 6) - 1); + if (!miiclkdiv) { + qemu_log("AXIENET: MDIO enabled but MDIOCLK is zero!\n"); + } + } + s->mii.mc = value; + break; + + case R_MCR: { + unsigned int phyaddr = (value >> 24) & 0x1f; + unsigned int regaddr = (value >> 16) & 0x1f; + unsigned int op = (value >> 14) & 3; + unsigned int initiate = (value >> 11) & 1; + + if (initiate) { + if (op == 1) { + mdio_write_req(&t->mdio_bus, phyaddr, regaddr, s->mii.mwd); + } else if (op == 2) { + s->mii.mrd = mdio_read_req(&t->mdio_bus, phyaddr, regaddr); + } else { + qemu_log("AXIENET: invalid MDIOBus OP=%d\n", op); + } + } + s->mii.mcr = value; + break; + } + + case R_MWD: + case R_MRD: + s->mii.regs[addr & 3] = value; + break; + + + case R_UAW0: + case R_UAW1: + s->uaw[addr & 1] = value; + break; + + case R_UAWL: + case R_UAWU: + s->ext_uaw[addr & 1] = value; + break; + + case R_FMI: + s->fmi = value; + break; + + case R_AF0: + case R_AF1: + s->maddr[s->fmi & 3][addr & 1] = value; + break; + + case R_IS: + s->regs[addr] &= ~value; + break; + + case 0x8000 ... 0x83ff: + s->ext_mtable[addr - 0x8000] = value; + break; + + default: + DENET(qemu_log("%s addr=" TARGET_FMT_plx " v=%x\n", + __func__, addr * 4, (unsigned)value)); + if (addr < ARRAY_SIZE(s->regs)) { + s->regs[addr] = value; + } + break; + } + enet_update_irq(s); +} + +static const MemoryRegionOps enet_ops = { + .read = enet_read, + .write = enet_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static int eth_can_rx(XilinxAXIEnet *s) +{ + /* RX enabled? */ + return !s->rxsize && !axienet_rx_resetting(s) && axienet_rx_enabled(s); +} + +static int enet_match_addr(const uint8_t *buf, uint32_t f0, uint32_t f1) +{ + int match = 1; + + if (memcmp(buf, &f0, 4)) { + match = 0; + } + + if (buf[4] != (f1 & 0xff) || buf[5] != ((f1 >> 8) & 0xff)) { + match = 0; + } + + return match; +} + +static void axienet_eth_rx_notify(void *opaque) +{ + XilinxAXIEnet *s = XILINX_AXI_ENET(opaque); + + while (s->rxappsize && stream_can_push(s->tx_control_dev, + axienet_eth_rx_notify, s)) { + size_t ret = stream_push(s->tx_control_dev, + (void *)s->rxapp + CONTROL_PAYLOAD_SIZE + - s->rxappsize, s->rxappsize); + s->rxappsize -= ret; + } + + while (s->rxsize && stream_can_push(s->tx_data_dev, + axienet_eth_rx_notify, s)) { + size_t ret = stream_push(s->tx_data_dev, (void *)s->rxmem + s->rxpos, + s->rxsize); + s->rxsize -= ret; + s->rxpos += ret; + if (!s->rxsize) { + s->regs[R_IS] |= IS_RX_COMPLETE; + if (s->need_flush) { + s->need_flush = false; + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + } + } + enet_update_irq(s); +} + +static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) +{ + XilinxAXIEnet *s = qemu_get_nic_opaque(nc); + static const unsigned char sa_bcast[6] = {0xff, 0xff, 0xff, + 0xff, 0xff, 0xff}; + static const unsigned char sa_ipmcast[3] = {0x01, 0x00, 0x52}; + uint32_t app[CONTROL_PAYLOAD_WORDS] = {0}; + int promisc = s->fmi & (1 << 31); + int unicast, broadcast, multicast, ip_multicast = 0; + uint32_t csum32; + uint16_t csum16; + int i; + + DENET(qemu_log("%s: %zd bytes\n", __func__, size)); + + if (!eth_can_rx(s)) { + s->need_flush = true; + return 0; + } + + unicast = ~buf[0] & 0x1; + broadcast = memcmp(buf, sa_bcast, 6) == 0; + multicast = !unicast && !broadcast; + if (multicast && (memcmp(sa_ipmcast, buf, sizeof sa_ipmcast) == 0)) { + ip_multicast = 1; + } + + /* Jumbo or vlan sizes ? */ + if (!(s->rcw[1] & RCW1_JUM)) { + if (size > 1518 && size <= 1522 && !(s->rcw[1] & RCW1_VLAN)) { + return size; + } + } + + /* Basic Address filters. If you want to use the extended filters + you'll generally have to place the ethernet mac into promiscuous mode + to avoid the basic filtering from dropping most frames. */ + if (!promisc) { + if (unicast) { + if (!enet_match_addr(buf, s->uaw[0], s->uaw[1])) { + return size; + } + } else { + if (broadcast) { + /* Broadcast. */ + if (s->regs[R_RAF] & RAF_BCAST_REJ) { + return size; + } + } else { + int drop = 1; + + /* Multicast. */ + if (s->regs[R_RAF] & RAF_MCAST_REJ) { + return size; + } + + for (i = 0; i < 4; i++) { + if (enet_match_addr(buf, s->maddr[i][0], s->maddr[i][1])) { + drop = 0; + break; + } + } + + if (drop) { + return size; + } + } + } + } + + /* Extended mcast filtering enabled? */ + if (axienet_newfunc_enabled(s) && axienet_extmcf_enabled(s)) { + if (unicast) { + if (!enet_match_addr(buf, s->ext_uaw[0], s->ext_uaw[1])) { + return size; + } + } else { + if (broadcast) { + /* Broadcast. ??? */ + if (s->regs[R_RAF] & RAF_BCAST_REJ) { + return size; + } + } else { + int idx, bit; + + /* Multicast. */ + if (!memcmp(buf, sa_ipmcast, 3)) { + return size; + } + + idx = (buf[4] & 0x7f) << 8; + idx |= buf[5]; + + bit = 1 << (idx & 0x1f); + idx >>= 5; + + if (!(s->ext_mtable[idx] & bit)) { + return size; + } + } + } + } + + if (size < 12) { + s->regs[R_IS] |= IS_RX_REJECT; + enet_update_irq(s); + return -1; + } + + if (size > (s->c_rxmem - 4)) { + size = s->c_rxmem - 4; + } + + memcpy(s->rxmem, buf, size); + memset(s->rxmem + size, 0, 4); /* Clear the FCS. */ + + if (s->rcw[1] & RCW1_FCS) { + size += 4; /* fcs is inband. */ + } + + app[0] = 5 << 28; + csum32 = net_checksum_add(size - 14, (uint8_t *)s->rxmem + 14); + /* Fold it once. */ + csum32 = (csum32 & 0xffff) + (csum32 >> 16); + /* And twice to get rid of possible carries. */ + csum16 = (csum32 & 0xffff) + (csum32 >> 16); + app[3] = csum16; + app[4] = size & 0xffff; + + s->stats.rx_bytes += size; + s->stats.rx++; + if (multicast) { + s->stats.rx_mcast++; + app[2] |= 1 | (ip_multicast << 1); + } else if (broadcast) { + s->stats.rx_bcast++; + app[2] |= 1 << 3; + } + + /* Good frame. */ + app[2] |= 1 << 6; + + s->rxsize = size; + s->rxpos = 0; + for (i = 0; i < ARRAY_SIZE(app); ++i) { + app[i] = cpu_to_le32(app[i]); + } + s->rxappsize = CONTROL_PAYLOAD_SIZE; + memcpy(s->rxapp, app, s->rxappsize); + axienet_eth_rx_notify(s); + + enet_update_irq(s); + return size; +} + +static size_t +xilinx_axienet_control_stream_push(StreamSlave *obj, uint8_t *buf, size_t len) +{ + int i; + XilinxAXIEnetStreamSlave *cs = XILINX_AXI_ENET_CONTROL_STREAM(obj); + XilinxAXIEnet *s = cs->enet; + + if (len != CONTROL_PAYLOAD_SIZE) { + hw_error("AXI Enet requires %d byte control stream payload\n", + (int)CONTROL_PAYLOAD_SIZE); + } + + memcpy(s->hdr, buf, len); + + for (i = 0; i < ARRAY_SIZE(s->hdr); ++i) { + s->hdr[i] = le32_to_cpu(s->hdr[i]); + } + return len; +} + +static size_t +xilinx_axienet_data_stream_push(StreamSlave *obj, uint8_t *buf, size_t size) +{ + XilinxAXIEnetStreamSlave *ds = XILINX_AXI_ENET_DATA_STREAM(obj); + XilinxAXIEnet *s = ds->enet; + + /* TX enable ? */ + if (!(s->tc & TC_TX)) { + return size; + } + + /* Jumbo or vlan sizes ? */ + if (!(s->tc & TC_JUM)) { + if (size > 1518 && size <= 1522 && !(s->tc & TC_VLAN)) { + return size; + } + } + + if (s->hdr[0] & 1) { + unsigned int start_off = s->hdr[1] >> 16; + unsigned int write_off = s->hdr[1] & 0xffff; + uint32_t tmp_csum; + uint16_t csum; + + tmp_csum = net_checksum_add(size - start_off, + (uint8_t *)buf + start_off); + /* Accumulate the seed. */ + tmp_csum += s->hdr[2] & 0xffff; + + /* Fold the 32bit partial checksum. */ + csum = net_checksum_finish(tmp_csum); + + /* Writeback. */ + buf[write_off] = csum >> 8; + buf[write_off + 1] = csum & 0xff; + } + + qemu_send_packet(qemu_get_queue(s->nic), buf, size); + + s->stats.tx_bytes += size; + s->regs[R_IS] |= IS_TX_COMPLETE; + enet_update_irq(s); + + return size; +} + +static NetClientInfo net_xilinx_enet_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .receive = eth_rx, +}; + +static void xilinx_enet_realize(DeviceState *dev, Error **errp) +{ + XilinxAXIEnet *s = XILINX_AXI_ENET(dev); + XilinxAXIEnetStreamSlave *ds = XILINX_AXI_ENET_DATA_STREAM(&s->rx_data_dev); + XilinxAXIEnetStreamSlave *cs = XILINX_AXI_ENET_CONTROL_STREAM( + &s->rx_control_dev); + Error *local_err = NULL; + + object_property_add_link(OBJECT(ds), "enet", "xlnx.axi-ethernet", + (Object **) &ds->enet, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &local_err); + object_property_add_link(OBJECT(cs), "enet", "xlnx.axi-ethernet", + (Object **) &cs->enet, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &local_err); + if (local_err) { + goto xilinx_enet_realize_fail; + } + object_property_set_link(OBJECT(ds), OBJECT(s), "enet", &local_err); + object_property_set_link(OBJECT(cs), OBJECT(s), "enet", &local_err); + if (local_err) { + goto xilinx_enet_realize_fail; + } + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_xilinx_enet_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + tdk_init(&s->TEMAC.phy); + mdio_attach(&s->TEMAC.mdio_bus, &s->TEMAC.phy, s->c_phyaddr); + + s->TEMAC.parent = s; + + s->rxmem = g_malloc(s->c_rxmem); + return; + +xilinx_enet_realize_fail: + if (!*errp) { + *errp = local_err; + } +} + +static void xilinx_enet_init(Object *obj) +{ + XilinxAXIEnet *s = XILINX_AXI_ENET(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + object_property_add_link(obj, "axistream-connected", TYPE_STREAM_SLAVE, + (Object **) &s->tx_data_dev, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + object_property_add_link(obj, "axistream-control-connected", + TYPE_STREAM_SLAVE, + (Object **) &s->tx_control_dev, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + + object_initialize(&s->rx_data_dev, sizeof(s->rx_data_dev), + TYPE_XILINX_AXI_ENET_DATA_STREAM); + object_initialize(&s->rx_control_dev, sizeof(s->rx_control_dev), + TYPE_XILINX_AXI_ENET_CONTROL_STREAM); + object_property_add_child(OBJECT(s), "axistream-connected-target", + (Object *)&s->rx_data_dev, &error_abort); + object_property_add_child(OBJECT(s), "axistream-control-connected-target", + (Object *)&s->rx_control_dev, &error_abort); + + sysbus_init_irq(sbd, &s->irq); + + memory_region_init_io(&s->iomem, OBJECT(s), &enet_ops, s, "enet", 0x40000); + sysbus_init_mmio(sbd, &s->iomem); +} + +static Property xilinx_enet_properties[] = { + DEFINE_PROP_UINT32("phyaddr", XilinxAXIEnet, c_phyaddr, 7), + DEFINE_PROP_UINT32("rxmem", XilinxAXIEnet, c_rxmem, 0x1000), + DEFINE_PROP_UINT32("txmem", XilinxAXIEnet, c_txmem, 0x1000), + DEFINE_NIC_PROPERTIES(XilinxAXIEnet, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void xilinx_enet_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xilinx_enet_realize; + dc->props = xilinx_enet_properties; + dc->reset = xilinx_axienet_reset; +} + +static void xilinx_enet_stream_class_init(ObjectClass *klass, void *data) +{ + StreamSlaveClass *ssc = STREAM_SLAVE_CLASS(klass); + + ssc->push = data; +} + +static const TypeInfo xilinx_enet_info = { + .name = TYPE_XILINX_AXI_ENET, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XilinxAXIEnet), + .class_init = xilinx_enet_class_init, + .instance_init = xilinx_enet_init, +}; + +static const TypeInfo xilinx_enet_data_stream_info = { + .name = TYPE_XILINX_AXI_ENET_DATA_STREAM, + .parent = TYPE_OBJECT, + .instance_size = sizeof(struct XilinxAXIEnetStreamSlave), + .class_init = xilinx_enet_stream_class_init, + .class_data = xilinx_axienet_data_stream_push, + .interfaces = (InterfaceInfo[]) { + { TYPE_STREAM_SLAVE }, + { } + } +}; + +static const TypeInfo xilinx_enet_control_stream_info = { + .name = TYPE_XILINX_AXI_ENET_CONTROL_STREAM, + .parent = TYPE_OBJECT, + .instance_size = sizeof(struct XilinxAXIEnetStreamSlave), + .class_init = xilinx_enet_stream_class_init, + .class_data = xilinx_axienet_control_stream_push, + .interfaces = (InterfaceInfo[]) { + { TYPE_STREAM_SLAVE }, + { } + } +}; + +static void xilinx_enet_register_types(void) +{ + type_register_static(&xilinx_enet_info); + type_register_static(&xilinx_enet_data_stream_info); + type_register_static(&xilinx_enet_control_stream_info); +} + +type_init(xilinx_enet_register_types) diff --git a/qemu/hw/net/xilinx_ethlite.c b/qemu/hw/net/xilinx_ethlite.c new file mode 100644 index 000000000..ad6b55306 --- /dev/null +++ b/qemu/hw/net/xilinx_ethlite.c @@ -0,0 +1,273 @@ +/* + * QEMU model of the Xilinx Ethernet Lite MAC. + * + * Copyright (c) 2009 Edgar E. Iglesias. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/sysbus.h" +#include "hw/hw.h" +#include "net/net.h" + +#define D(x) +#define R_TX_BUF0 0 +#define R_TX_LEN0 (0x07f4 / 4) +#define R_TX_GIE0 (0x07f8 / 4) +#define R_TX_CTRL0 (0x07fc / 4) +#define R_TX_BUF1 (0x0800 / 4) +#define R_TX_LEN1 (0x0ff4 / 4) +#define R_TX_CTRL1 (0x0ffc / 4) + +#define R_RX_BUF0 (0x1000 / 4) +#define R_RX_CTRL0 (0x17fc / 4) +#define R_RX_BUF1 (0x1800 / 4) +#define R_RX_CTRL1 (0x1ffc / 4) +#define R_MAX (0x2000 / 4) + +#define GIE_GIE 0x80000000 + +#define CTRL_I 0x8 +#define CTRL_P 0x2 +#define CTRL_S 0x1 + +#define TYPE_XILINX_ETHLITE "xlnx.xps-ethernetlite" +#define XILINX_ETHLITE(obj) \ + OBJECT_CHECK(struct xlx_ethlite, (obj), TYPE_XILINX_ETHLITE) + +struct xlx_ethlite +{ + SysBusDevice parent_obj; + + MemoryRegion mmio; + qemu_irq irq; + NICState *nic; + NICConf conf; + + uint32_t c_tx_pingpong; + uint32_t c_rx_pingpong; + unsigned int txbuf; + unsigned int rxbuf; + + uint32_t regs[R_MAX]; +}; + +static inline void eth_pulse_irq(struct xlx_ethlite *s) +{ + /* Only the first gie reg is active. */ + if (s->regs[R_TX_GIE0] & GIE_GIE) { + qemu_irq_pulse(s->irq); + } +} + +static uint64_t +eth_read(void *opaque, hwaddr addr, unsigned int size) +{ + struct xlx_ethlite *s = opaque; + uint32_t r = 0; + + addr >>= 2; + + switch (addr) + { + case R_TX_GIE0: + case R_TX_LEN0: + case R_TX_LEN1: + case R_TX_CTRL1: + case R_TX_CTRL0: + case R_RX_CTRL1: + case R_RX_CTRL0: + r = s->regs[addr]; + D(qemu_log("%s " TARGET_FMT_plx "=%x\n", __func__, addr * 4, r)); + break; + + default: + r = tswap32(s->regs[addr]); + break; + } + return r; +} + +static void +eth_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + struct xlx_ethlite *s = opaque; + unsigned int base = 0; + uint32_t value = val64; + + addr >>= 2; + switch (addr) + { + case R_TX_CTRL0: + case R_TX_CTRL1: + if (addr == R_TX_CTRL1) + base = 0x800 / 4; + + D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n", + __func__, addr * 4, value)); + if ((value & (CTRL_P | CTRL_S)) == CTRL_S) { + qemu_send_packet(qemu_get_queue(s->nic), + (void *) &s->regs[base], + s->regs[base + R_TX_LEN0]); + D(qemu_log("eth_tx %d\n", s->regs[base + R_TX_LEN0])); + if (s->regs[base + R_TX_CTRL0] & CTRL_I) + eth_pulse_irq(s); + } else if ((value & (CTRL_P | CTRL_S)) == (CTRL_P | CTRL_S)) { + memcpy(&s->conf.macaddr.a[0], &s->regs[base], 6); + if (s->regs[base + R_TX_CTRL0] & CTRL_I) + eth_pulse_irq(s); + } + + /* We are fast and get ready pretty much immediately so + we actually never flip the S nor P bits to one. */ + s->regs[addr] = value & ~(CTRL_P | CTRL_S); + break; + + /* Keep these native. */ + case R_RX_CTRL0: + case R_RX_CTRL1: + if (!(value & CTRL_S)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + /* fall through */ + case R_TX_LEN0: + case R_TX_LEN1: + case R_TX_GIE0: + D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n", + __func__, addr * 4, value)); + s->regs[addr] = value; + break; + + default: + s->regs[addr] = tswap32(value); + break; + } +} + +static const MemoryRegionOps eth_ops = { + .read = eth_read, + .write = eth_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4 + } +}; + +static int eth_can_rx(NetClientState *nc) +{ + struct xlx_ethlite *s = qemu_get_nic_opaque(nc); + unsigned int rxbase = s->rxbuf * (0x800 / 4); + + return !(s->regs[rxbase + R_RX_CTRL0] & CTRL_S); +} + +static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) +{ + struct xlx_ethlite *s = qemu_get_nic_opaque(nc); + unsigned int rxbase = s->rxbuf * (0x800 / 4); + + /* DA filter. */ + if (!(buf[0] & 0x80) && memcmp(&s->conf.macaddr.a[0], buf, 6)) + return size; + + if (s->regs[rxbase + R_RX_CTRL0] & CTRL_S) { + D(qemu_log("ethlite lost packet %x\n", s->regs[R_RX_CTRL0])); + return -1; + } + + D(qemu_log("%s %zd rxbase=%x\n", __func__, size, rxbase)); + memcpy(&s->regs[rxbase + R_RX_BUF0], buf, size); + + s->regs[rxbase + R_RX_CTRL0] |= CTRL_S; + if (s->regs[R_RX_CTRL0] & CTRL_I) { + eth_pulse_irq(s); + } + + /* If c_rx_pingpong was set flip buffers. */ + s->rxbuf ^= s->c_rx_pingpong; + return size; +} + +static void xilinx_ethlite_reset(DeviceState *dev) +{ + struct xlx_ethlite *s = XILINX_ETHLITE(dev); + + s->rxbuf = 0; +} + +static NetClientInfo net_xilinx_ethlite_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = eth_can_rx, + .receive = eth_rx, +}; + +static void xilinx_ethlite_realize(DeviceState *dev, Error **errp) +{ + struct xlx_ethlite *s = XILINX_ETHLITE(dev); + + qemu_macaddr_default_if_unset(&s->conf.macaddr); + s->nic = qemu_new_nic(&net_xilinx_ethlite_info, &s->conf, + object_get_typename(OBJECT(dev)), dev->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); +} + +static void xilinx_ethlite_init(Object *obj) +{ + struct xlx_ethlite *s = XILINX_ETHLITE(obj); + + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, ð_ops, s, + "xlnx.xps-ethernetlite", R_MAX * 4); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static Property xilinx_ethlite_properties[] = { + DEFINE_PROP_UINT32("tx-ping-pong", struct xlx_ethlite, c_tx_pingpong, 1), + DEFINE_PROP_UINT32("rx-ping-pong", struct xlx_ethlite, c_rx_pingpong, 1), + DEFINE_NIC_PROPERTIES(struct xlx_ethlite, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void xilinx_ethlite_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xilinx_ethlite_realize; + dc->reset = xilinx_ethlite_reset; + dc->props = xilinx_ethlite_properties; +} + +static const TypeInfo xilinx_ethlite_info = { + .name = TYPE_XILINX_ETHLITE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(struct xlx_ethlite), + .instance_init = xilinx_ethlite_init, + .class_init = xilinx_ethlite_class_init, +}; + +static void xilinx_ethlite_register_types(void) +{ + type_register_static(&xilinx_ethlite_info); +} + +type_init(xilinx_ethlite_register_types) |