diff options
author | Yang Zhang <yang.z.zhang@intel.com> | 2015-08-28 09:58:54 +0800 |
---|---|---|
committer | Yang Zhang <yang.z.zhang@intel.com> | 2015-09-01 12:44:00 +0800 |
commit | e44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch) | |
tree | 66b09f592c55df2878107a468a91d21506104d3f /qemu/hw/pci | |
parent | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff) |
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5
Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/hw/pci')
-rw-r--r-- | qemu/hw/pci/Makefile.objs | 9 | ||||
-rw-r--r-- | qemu/hw/pci/msi.c | 400 | ||||
-rw-r--r-- | qemu/hw/pci/msix.c | 608 | ||||
-rw-r--r-- | qemu/hw/pci/pci-stub.c | 36 | ||||
-rw-r--r-- | qemu/hw/pci/pci.c | 2472 | ||||
-rw-r--r-- | qemu/hw/pci/pci_bridge.c | 419 | ||||
-rw-r--r-- | qemu/hw/pci/pci_host.c | 191 | ||||
-rw-r--r-- | qemu/hw/pci/pcie.c | 634 | ||||
-rw-r--r-- | qemu/hw/pci/pcie_aer.c | 1042 | ||||
-rw-r--r-- | qemu/hw/pci/pcie_host.c | 146 | ||||
-rw-r--r-- | qemu/hw/pci/pcie_port.c | 178 | ||||
-rw-r--r-- | qemu/hw/pci/shpc.c | 721 | ||||
-rw-r--r-- | qemu/hw/pci/slotid_cap.c | 45 |
13 files changed, 6901 insertions, 0 deletions
diff --git a/qemu/hw/pci/Makefile.objs b/qemu/hw/pci/Makefile.objs new file mode 100644 index 000000000..9f905e634 --- /dev/null +++ b/qemu/hw/pci/Makefile.objs @@ -0,0 +1,9 @@ +common-obj-$(CONFIG_PCI) += pci.o pci_bridge.o +common-obj-$(CONFIG_PCI) += msix.o msi.o +common-obj-$(CONFIG_PCI) += shpc.o +common-obj-$(CONFIG_PCI) += slotid_cap.o +common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o +common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o + +common-obj-$(call lnot,$(CONFIG_PCI)) += pci-stub.o +common-obj-$(CONFIG_ALL) += pci-stub.o diff --git a/qemu/hw/pci/msi.c b/qemu/hw/pci/msi.c new file mode 100644 index 000000000..f9c048442 --- /dev/null +++ b/qemu/hw/pci/msi.c @@ -0,0 +1,400 @@ +/* + * msi.c + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/pci/msi.h" +#include "qemu/range.h" + +/* PCI_MSI_ADDRESS_LO */ +#define PCI_MSI_ADDRESS_LO_MASK (~0x3) + +/* If we get rid of cap allocator, we won't need those. */ +#define PCI_MSI_32_SIZEOF 0x0a +#define PCI_MSI_64_SIZEOF 0x0e +#define PCI_MSI_32M_SIZEOF 0x14 +#define PCI_MSI_64M_SIZEOF 0x18 + +#define PCI_MSI_VECTORS_MAX 32 + +/* Flag for interrupt controller to declare MSI/MSI-X support */ +bool msi_supported; + +/* If we get rid of cap allocator, we won't need this. */ +static inline uint8_t msi_cap_sizeof(uint16_t flags) +{ + switch (flags & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) { + case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT: + return PCI_MSI_64M_SIZEOF; + case PCI_MSI_FLAGS_64BIT: + return PCI_MSI_64_SIZEOF; + case PCI_MSI_FLAGS_MASKBIT: + return PCI_MSI_32M_SIZEOF; + case 0: + return PCI_MSI_32_SIZEOF; + default: + abort(); + break; + } + return 0; +} + +//#define MSI_DEBUG + +#ifdef MSI_DEBUG +# define MSI_DPRINTF(fmt, ...) \ + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) +#else +# define MSI_DPRINTF(fmt, ...) do { } while (0) +#endif +#define MSI_DEV_PRINTF(dev, fmt, ...) \ + MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) + +static inline unsigned int msi_nr_vectors(uint16_t flags) +{ + return 1U << + ((flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE)); +} + +static inline uint8_t msi_flags_off(const PCIDevice* dev) +{ + return dev->msi_cap + PCI_MSI_FLAGS; +} + +static inline uint8_t msi_address_lo_off(const PCIDevice* dev) +{ + return dev->msi_cap + PCI_MSI_ADDRESS_LO; +} + +static inline uint8_t msi_address_hi_off(const PCIDevice* dev) +{ + return dev->msi_cap + PCI_MSI_ADDRESS_HI; +} + +static inline uint8_t msi_data_off(const PCIDevice* dev, bool msi64bit) +{ + return dev->msi_cap + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32); +} + +static inline uint8_t msi_mask_off(const PCIDevice* dev, bool msi64bit) +{ + return dev->msi_cap + (msi64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32); +} + +static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit) +{ + return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32); +} + +/* + * Special API for POWER to configure the vectors through + * a side channel. Should never be used by devices. + */ +void msi_set_message(PCIDevice *dev, MSIMessage msg) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + + if (msi64bit) { + pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address); + } else { + pci_set_long(dev->config + msi_address_lo_off(dev), msg.address); + } + pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data); +} + +MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + unsigned int nr_vectors = msi_nr_vectors(flags); + MSIMessage msg; + + assert(vector < nr_vectors); + + if (msi64bit) { + msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev)); + } else { + msg.address = pci_get_long(dev->config + msi_address_lo_off(dev)); + } + + /* upper bit 31:16 is zero */ + msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit)); + if (nr_vectors > 1) { + msg.data &= ~(nr_vectors - 1); + msg.data |= vector; + } + + return msg; +} + +bool msi_enabled(const PCIDevice *dev) +{ + return msi_present(dev) && + (pci_get_word(dev->config + msi_flags_off(dev)) & + PCI_MSI_FLAGS_ENABLE); +} + +int msi_init(struct PCIDevice *dev, uint8_t offset, + unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask) +{ + unsigned int vectors_order; + uint16_t flags; + uint8_t cap_size; + int config_offset; + + if (!msi_supported) { + return -ENOTSUP; + } + + MSI_DEV_PRINTF(dev, + "init offset: 0x%"PRIx8" vector: %"PRId8 + " 64bit %d mask %d\n", + offset, nr_vectors, msi64bit, msi_per_vector_mask); + + assert(!(nr_vectors & (nr_vectors - 1))); /* power of 2 */ + assert(nr_vectors > 0); + assert(nr_vectors <= PCI_MSI_VECTORS_MAX); + /* the nr of MSI vectors is up to 32 */ + vectors_order = ctz32(nr_vectors); + + flags = vectors_order << ctz32(PCI_MSI_FLAGS_QMASK); + if (msi64bit) { + flags |= PCI_MSI_FLAGS_64BIT; + } + if (msi_per_vector_mask) { + flags |= PCI_MSI_FLAGS_MASKBIT; + } + + cap_size = msi_cap_sizeof(flags); + config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, cap_size); + if (config_offset < 0) { + return config_offset; + } + + dev->msi_cap = config_offset; + dev->cap_present |= QEMU_PCI_CAP_MSI; + + pci_set_word(dev->config + msi_flags_off(dev), flags); + pci_set_word(dev->wmask + msi_flags_off(dev), + PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); + pci_set_long(dev->wmask + msi_address_lo_off(dev), + PCI_MSI_ADDRESS_LO_MASK); + if (msi64bit) { + pci_set_long(dev->wmask + msi_address_hi_off(dev), 0xffffffff); + } + pci_set_word(dev->wmask + msi_data_off(dev, msi64bit), 0xffff); + + if (msi_per_vector_mask) { + /* Make mask bits 0 to nr_vectors - 1 writable. */ + pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit), + 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); + } + return config_offset; +} + +void msi_uninit(struct PCIDevice *dev) +{ + uint16_t flags; + uint8_t cap_size; + + if (!msi_present(dev)) { + return; + } + flags = pci_get_word(dev->config + msi_flags_off(dev)); + cap_size = msi_cap_sizeof(flags); + pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); + dev->cap_present &= ~QEMU_PCI_CAP_MSI; + + MSI_DEV_PRINTF(dev, "uninit\n"); +} + +void msi_reset(PCIDevice *dev) +{ + uint16_t flags; + bool msi64bit; + + if (!msi_present(dev)) { + return; + } + + flags = pci_get_word(dev->config + msi_flags_off(dev)); + flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); + msi64bit = flags & PCI_MSI_FLAGS_64BIT; + + pci_set_word(dev->config + msi_flags_off(dev), flags); + pci_set_long(dev->config + msi_address_lo_off(dev), 0); + if (msi64bit) { + pci_set_long(dev->config + msi_address_hi_off(dev), 0); + } + pci_set_word(dev->config + msi_data_off(dev, msi64bit), 0); + if (flags & PCI_MSI_FLAGS_MASKBIT) { + pci_set_long(dev->config + msi_mask_off(dev, msi64bit), 0); + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), 0); + } + MSI_DEV_PRINTF(dev, "reset\n"); +} + +static bool msi_is_masked(const PCIDevice *dev, unsigned int vector) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + uint32_t mask; + assert(vector < PCI_MSI_VECTORS_MAX); + + if (!(flags & PCI_MSI_FLAGS_MASKBIT)) { + return false; + } + + mask = pci_get_long(dev->config + + msi_mask_off(dev, flags & PCI_MSI_FLAGS_64BIT)); + return mask & (1U << vector); +} + +void msi_notify(PCIDevice *dev, unsigned int vector) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + unsigned int nr_vectors = msi_nr_vectors(flags); + MSIMessage msg; + + assert(vector < nr_vectors); + if (msi_is_masked(dev, vector)) { + assert(flags & PCI_MSI_FLAGS_MASKBIT); + pci_long_test_and_set_mask( + dev->config + msi_pending_off(dev, msi64bit), 1U << vector); + MSI_DEV_PRINTF(dev, "pending vector 0x%x\n", vector); + return; + } + + msg = msi_get_message(dev, vector); + + MSI_DEV_PRINTF(dev, + "notify vector 0x%x" + " address: 0x%"PRIx64" data: 0x%"PRIx32"\n", + vector, msg.address, msg.data); + msi_send_message(dev, msg); +} + +void msi_send_message(PCIDevice *dev, MSIMessage msg) +{ + MemTxAttrs attrs = {}; + + attrs.stream_id = (pci_bus_num(dev->bus) << 8) | dev->devfn; + address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, + attrs, NULL); +} + +/* Normally called by pci_default_write_config(). */ +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + bool msi_per_vector_mask = flags & PCI_MSI_FLAGS_MASKBIT; + unsigned int nr_vectors; + uint8_t log_num_vecs; + uint8_t log_max_vecs; + unsigned int vector; + uint32_t pending; + + if (!msi_present(dev) || + !ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) { + return; + } + +#ifdef MSI_DEBUG + MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n", + addr, val, len); + MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32, + flags, + pci_get_long(dev->config + msi_address_lo_off(dev))); + if (msi64bit) { + fprintf(stderr, " address-hi: 0x%"PRIx32, + pci_get_long(dev->config + msi_address_hi_off(dev))); + } + fprintf(stderr, " data: 0x%"PRIx16, + pci_get_word(dev->config + msi_data_off(dev, msi64bit))); + if (flags & PCI_MSI_FLAGS_MASKBIT) { + fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32, + pci_get_long(dev->config + msi_mask_off(dev, msi64bit)), + pci_get_long(dev->config + msi_pending_off(dev, msi64bit))); + } + fprintf(stderr, "\n"); +#endif + + if (!(flags & PCI_MSI_FLAGS_ENABLE)) { + return; + } + + /* + * Now MSI is enabled, clear INTx# interrupts. + * the driver is prohibited from writing enable bit to mask + * a service request. But the guest OS could do this. + * So we just discard the interrupts as moderate fallback. + * + * 6.8.3.3. Enabling Operation + * While enabled for MSI or MSI-X operation, a function is prohibited + * from using its INTx# pin (if implemented) to request + * service (MSI, MSI-X, and INTx# are mutually exclusive). + */ + pci_device_deassert_intx(dev); + + /* + * nr_vectors might be set bigger than capable. So clamp it. + * This is not legal by spec, so we can do anything we like, + * just don't crash the host + */ + log_num_vecs = + (flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE); + log_max_vecs = + (flags & PCI_MSI_FLAGS_QMASK) >> ctz32(PCI_MSI_FLAGS_QMASK); + if (log_num_vecs > log_max_vecs) { + flags &= ~PCI_MSI_FLAGS_QSIZE; + flags |= log_max_vecs << ctz32(PCI_MSI_FLAGS_QSIZE); + pci_set_word(dev->config + msi_flags_off(dev), flags); + } + + if (!msi_per_vector_mask) { + /* if per vector masking isn't supported, + there is no pending interrupt. */ + return; + } + + nr_vectors = msi_nr_vectors(flags); + + /* This will discard pending interrupts, if any. */ + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); + pending &= 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors); + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); + + /* deliver pending interrupts which are unmasked */ + for (vector = 0; vector < nr_vectors; ++vector) { + if (msi_is_masked(dev, vector) || !(pending & (1U << vector))) { + continue; + } + + pci_long_test_and_clear_mask( + dev->config + msi_pending_off(dev, msi64bit), 1U << vector); + msi_notify(dev, vector); + } +} + +unsigned int msi_nr_vectors_allocated(const PCIDevice *dev) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + return msi_nr_vectors(flags); +} diff --git a/qemu/hw/pci/msix.c b/qemu/hw/pci/msix.c new file mode 100644 index 000000000..7716bf364 --- /dev/null +++ b/qemu/hw/pci/msix.c @@ -0,0 +1,608 @@ +/* + * MSI-X device support + * + * This module includes support for MSI-X in pci devices. + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "hw/hw.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/pci/pci.h" +#include "qemu/range.h" + +#define MSIX_CAP_LENGTH 12 + +/* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ +#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) +#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) +#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) + +MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; + MSIMessage msg; + + msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); + msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); + return msg; +} + +/* + * Special API for POWER to configure the vectors through + * a side channel. Should never be used by devices. + */ +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg) +{ + uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; + + pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address); + pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data); + table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; +} + +static uint8_t msix_pending_mask(int vector) +{ + return 1 << (vector % 8); +} + +static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) +{ + return dev->msix_pba + vector / 8; +} + +static int msix_is_pending(PCIDevice *dev, int vector) +{ + return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); +} + +void msix_set_pending(PCIDevice *dev, unsigned int vector) +{ + *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); +} + +static void msix_clr_pending(PCIDevice *dev, int vector) +{ + *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); +} + +static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask) +{ + unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; +} + +bool msix_is_masked(PCIDevice *dev, unsigned int vector) +{ + return msix_vector_masked(dev, vector, dev->msix_function_masked); +} + +static void msix_fire_vector_notifier(PCIDevice *dev, + unsigned int vector, bool is_masked) +{ + MSIMessage msg; + int ret; + + if (!dev->msix_vector_use_notifier) { + return; + } + if (is_masked) { + dev->msix_vector_release_notifier(dev, vector); + } else { + msg = msix_get_message(dev, vector); + ret = dev->msix_vector_use_notifier(dev, vector, msg); + assert(ret >= 0); + } +} + +static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) +{ + bool is_masked = msix_is_masked(dev, vector); + + if (is_masked == was_masked) { + return; + } + + msix_fire_vector_notifier(dev, vector, is_masked); + + if (!is_masked && msix_is_pending(dev, vector)) { + msix_clr_pending(dev, vector); + msix_notify(dev, vector); + } +} + +static void msix_update_function_masked(PCIDevice *dev) +{ + dev->msix_function_masked = !msix_enabled(dev) || + (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK); +} + +/* Handle MSI-X capability config write. */ +void msix_write_config(PCIDevice *dev, uint32_t addr, + uint32_t val, int len) +{ + unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; + int vector; + bool was_masked; + + if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) { + return; + } + + was_masked = dev->msix_function_masked; + msix_update_function_masked(dev); + + if (!msix_enabled(dev)) { + return; + } + + pci_device_deassert_intx(dev); + + if (dev->msix_function_masked == was_masked) { + return; + } + + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { + msix_handle_mask_update(dev, vector, + msix_vector_masked(dev, vector, was_masked)); + } +} + +static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + PCIDevice *dev = opaque; + + return pci_get_long(dev->msix_table + addr); +} + +static void msix_table_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PCIDevice *dev = opaque; + int vector = addr / PCI_MSIX_ENTRY_SIZE; + bool was_masked; + + was_masked = msix_is_masked(dev, vector); + pci_set_long(dev->msix_table + addr, val); + msix_handle_mask_update(dev, vector, was_masked); +} + +static const MemoryRegionOps msix_table_mmio_ops = { + .read = msix_table_mmio_read, + .write = msix_table_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + PCIDevice *dev = opaque; + if (dev->msix_vector_poll_notifier) { + unsigned vector_start = addr * 8; + unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); + dev->msix_vector_poll_notifier(dev, vector_start, vector_end); + } + + return pci_get_long(dev->msix_pba + addr); +} + +static const MemoryRegionOps msix_pba_mmio_ops = { + .read = msix_pba_mmio_read, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) +{ + int vector; + + for (vector = 0; vector < nentries; ++vector) { + unsigned offset = + vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + bool was_masked = msix_is_masked(dev, vector); + + dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + msix_handle_mask_update(dev, vector, was_masked); + } +} + +/* Initialize the MSI-X structures */ +int msix_init(struct PCIDevice *dev, unsigned short nentries, + MemoryRegion *table_bar, uint8_t table_bar_nr, + unsigned table_offset, MemoryRegion *pba_bar, + uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos) +{ + int cap; + unsigned table_size, pba_size; + uint8_t *config; + + /* Nothing to do if MSI is not supported by interrupt controller */ + if (!msi_supported) { + return -ENOTSUP; + } + + if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) { + return -EINVAL; + } + + table_size = nentries * PCI_MSIX_ENTRY_SIZE; + pba_size = QEMU_ALIGN_UP(nentries, 64) / 8; + + /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */ + if ((table_bar_nr == pba_bar_nr && + ranges_overlap(table_offset, table_size, pba_offset, pba_size)) || + table_offset + table_size > memory_region_size(table_bar) || + pba_offset + pba_size > memory_region_size(pba_bar) || + (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) { + return -EINVAL; + } + + cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH); + if (cap < 0) { + return cap; + } + + dev->msix_cap = cap; + dev->cap_present |= QEMU_PCI_CAP_MSIX; + config = dev->config + cap; + + pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); + dev->msix_entries_nr = nentries; + dev->msix_function_masked = true; + + pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr); + pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr); + + /* Make flags bit writable. */ + dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | + MSIX_MASKALL_MASK; + + dev->msix_table = g_malloc0(table_size); + dev->msix_pba = g_malloc0(pba_size); + dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used); + + msix_mask_all(dev, nentries); + + memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev, + "msix-table", table_size); + memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio); + memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev, + "msix-pba", pba_size); + memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); + + return 0; +} + +int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries, + uint8_t bar_nr) +{ + int ret; + char *name; + uint32_t bar_size = 4096; + uint32_t bar_pba_offset = bar_size / 2; + uint32_t bar_pba_size = (nentries / 8 + 1) * 8; + + /* + * Migration compatibility dictates that this remains a 4k + * BAR with the vector table in the lower half and PBA in + * the upper half for nentries which is lower or equal to 128. + * No need to care about using more than 65 entries for legacy + * machine types who has at most 64 queues. + */ + if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) { + bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE; + } + + if (bar_pba_offset + bar_pba_size > 4096) { + bar_size = bar_pba_offset + bar_pba_size; + } + + if (bar_size & (bar_size - 1)) { + bar_size = 1 << qemu_fls(bar_size); + } + + name = g_strdup_printf("%s-msix", dev->name); + memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size); + g_free(name); + + ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, + 0, &dev->msix_exclusive_bar, + bar_nr, bar_pba_offset, + 0); + if (ret) { + return ret; + } + + pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, + &dev->msix_exclusive_bar); + + return 0; +} + +static void msix_free_irq_entries(PCIDevice *dev) +{ + int vector; + + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { + dev->msix_entry_used[vector] = 0; + msix_clr_pending(dev, vector); + } +} + +static void msix_clear_all_vectors(PCIDevice *dev) +{ + int vector; + + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { + msix_clr_pending(dev, vector); + } +} + +/* Clean up resources for the device. */ +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) +{ + if (!msix_present(dev)) { + return; + } + pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); + dev->msix_cap = 0; + msix_free_irq_entries(dev); + dev->msix_entries_nr = 0; + memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio); + g_free(dev->msix_pba); + dev->msix_pba = NULL; + memory_region_del_subregion(table_bar, &dev->msix_table_mmio); + g_free(dev->msix_table); + dev->msix_table = NULL; + g_free(dev->msix_entry_used); + dev->msix_entry_used = NULL; + dev->cap_present &= ~QEMU_PCI_CAP_MSIX; +} + +void msix_uninit_exclusive_bar(PCIDevice *dev) +{ + if (msix_present(dev)) { + msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar); + } +} + +void msix_save(PCIDevice *dev, QEMUFile *f) +{ + unsigned n = dev->msix_entries_nr; + + if (!msix_present(dev)) { + return; + } + + qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); + qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8); +} + +/* Should be called after restoring the config space. */ +void msix_load(PCIDevice *dev, QEMUFile *f) +{ + unsigned n = dev->msix_entries_nr; + unsigned int vector; + + if (!msix_present(dev)) { + return; + } + + msix_clear_all_vectors(dev); + qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); + qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8); + msix_update_function_masked(dev); + + for (vector = 0; vector < n; vector++) { + msix_handle_mask_update(dev, vector, true); + } +} + +/* Does device support MSI-X? */ +int msix_present(PCIDevice *dev) +{ + return dev->cap_present & QEMU_PCI_CAP_MSIX; +} + +/* Is MSI-X enabled? */ +int msix_enabled(PCIDevice *dev) +{ + return (dev->cap_present & QEMU_PCI_CAP_MSIX) && + (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + MSIX_ENABLE_MASK); +} + +/* Send an MSI-X message */ +void msix_notify(PCIDevice *dev, unsigned vector) +{ + MSIMessage msg; + + if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) + return; + if (msix_is_masked(dev, vector)) { + msix_set_pending(dev, vector); + return; + } + + msg = msix_get_message(dev, vector); + + msi_send_message(dev, msg); +} + +void msix_reset(PCIDevice *dev) +{ + if (!msix_present(dev)) { + return; + } + msix_clear_all_vectors(dev); + dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= + ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; + memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); + memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8); + msix_mask_all(dev, dev->msix_entries_nr); +} + +/* PCI spec suggests that devices make it possible for software to configure + * less vectors than supported by the device, but does not specify a standard + * mechanism for devices to do so. + * + * We support this by asking devices to declare vectors software is going to + * actually use, and checking this on the notification path. Devices that + * don't want to follow the spec suggestion can declare all vectors as used. */ + +/* Mark vector as used. */ +int msix_vector_use(PCIDevice *dev, unsigned vector) +{ + if (vector >= dev->msix_entries_nr) + return -EINVAL; + dev->msix_entry_used[vector]++; + return 0; +} + +/* Mark vector as unused. */ +void msix_vector_unuse(PCIDevice *dev, unsigned vector) +{ + if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { + return; + } + if (--dev->msix_entry_used[vector]) { + return; + } + msix_clr_pending(dev, vector); +} + +void msix_unuse_all_vectors(PCIDevice *dev) +{ + if (!msix_present(dev)) { + return; + } + msix_free_irq_entries(dev); +} + +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) +{ + return dev->msix_entries_nr; +} + +static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + MSIMessage msg; + + if (msix_is_masked(dev, vector)) { + return 0; + } + msg = msix_get_message(dev, vector); + return dev->msix_vector_use_notifier(dev, vector, msg); +} + +static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + if (msix_is_masked(dev, vector)) { + return; + } + dev->msix_vector_release_notifier(dev, vector); +} + +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier, + MSIVectorPollNotifier poll_notifier) +{ + int vector, ret; + + assert(use_notifier && release_notifier); + + dev->msix_vector_use_notifier = use_notifier; + dev->msix_vector_release_notifier = release_notifier; + dev->msix_vector_poll_notifier = poll_notifier; + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + ret = msix_set_notifier_for_vector(dev, vector); + if (ret < 0) { + goto undo; + } + } + } + if (dev->msix_vector_poll_notifier) { + dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr); + } + return 0; + +undo: + while (--vector >= 0) { + msix_unset_notifier_for_vector(dev, vector); + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; + return ret; +} + +void msix_unset_vector_notifiers(PCIDevice *dev) +{ + int vector; + + assert(dev->msix_vector_use_notifier && + dev->msix_vector_release_notifier); + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + msix_unset_notifier_for_vector(dev, vector); + } + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; + dev->msix_vector_poll_notifier = NULL; +} + +static void put_msix_state(QEMUFile *f, void *pv, size_t size) +{ + msix_save(pv, f); +} + +static int get_msix_state(QEMUFile *f, void *pv, size_t size) +{ + msix_load(pv, f); + return 0; +} + +static VMStateInfo vmstate_info_msix = { + .name = "msix state", + .get = get_msix_state, + .put = put_msix_state, +}; + +const VMStateDescription vmstate_msix = { + .name = "msix", + .fields = (VMStateField[]) { + { + .name = "msix", + .version_id = 0, + .field_exists = NULL, + .size = 0, /* ouch */ + .info = &vmstate_info_msix, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + } +}; diff --git a/qemu/hw/pci/pci-stub.c b/qemu/hw/pci/pci-stub.c new file mode 100644 index 000000000..063a7c242 --- /dev/null +++ b/qemu/hw/pci/pci-stub.c @@ -0,0 +1,36 @@ +/* + * PCI stubs for platforms that don't support pci bus. + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "sysemu/sysemu.h" +#include "monitor/monitor.h" +#include "qapi/qmp/qerror.h" +#include "hw/pci/pci.h" +#include "qmp-commands.h" + +PciInfoList *qmp_query_pci(Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); + return NULL; +} + +void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict) +{ + monitor_printf(mon, "PCI devices not supported\n"); +} diff --git a/qemu/hw/pci/pci.c b/qemu/hw/pci/pci.c new file mode 100644 index 000000000..a017614d4 --- /dev/null +++ b/qemu/hw/pci/pci.c @@ -0,0 +1,2472 @@ +/* + * QEMU PCI bus manager + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_host.h" +#include "monitor/monitor.h" +#include "net/net.h" +#include "sysemu/sysemu.h" +#include "hw/loader.h" +#include "qemu/error-report.h" +#include "qemu/range.h" +#include "qmp-commands.h" +#include "trace.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "exec/address-spaces.h" +#include "hw/hotplug.h" + +//#define DEBUG_PCI +#ifdef DEBUG_PCI +# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) +#else +# define PCI_DPRINTF(format, ...) do { } while (0) +#endif + +static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); +static char *pcibus_get_dev_path(DeviceState *dev); +static char *pcibus_get_fw_dev_path(DeviceState *dev); +static void pcibus_reset(BusState *qbus); + +static Property pci_props[] = { + DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), + DEFINE_PROP_STRING("romfile", PCIDevice, romfile), + DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), + DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, + QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), + DEFINE_PROP_BIT("command_serr_enable", PCIDevice, cap_present, + QEMU_PCI_CAP_SERR_BITNR, true), + DEFINE_PROP_END_OF_LIST() +}; + +static const VMStateDescription vmstate_pcibus = { + .name = "PCIBUS", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT32_EQUAL(nirq, PCIBus), + VMSTATE_VARRAY_INT32(irq_count, PCIBus, + nirq, 0, vmstate_info_int32, + int32_t), + VMSTATE_END_OF_LIST() + } +}; + +static void pci_bus_realize(BusState *qbus, Error **errp) +{ + PCIBus *bus = PCI_BUS(qbus); + + vmstate_register(NULL, -1, &vmstate_pcibus, bus); +} + +static void pci_bus_unrealize(BusState *qbus, Error **errp) +{ + PCIBus *bus = PCI_BUS(qbus); + + vmstate_unregister(NULL, &vmstate_pcibus, bus); +} + +static bool pcibus_is_root(PCIBus *bus) +{ + return !bus->parent_dev; +} + +static int pcibus_num(PCIBus *bus) +{ + if (pcibus_is_root(bus)) { + return 0; /* pci host bridge */ + } + return bus->parent_dev->config[PCI_SECONDARY_BUS]; +} + +static uint16_t pcibus_numa_node(PCIBus *bus) +{ + return NUMA_NODE_UNASSIGNED; +} + +static void pci_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + PCIBusClass *pbc = PCI_BUS_CLASS(klass); + + k->print_dev = pcibus_dev_print; + k->get_dev_path = pcibus_get_dev_path; + k->get_fw_dev_path = pcibus_get_fw_dev_path; + k->realize = pci_bus_realize; + k->unrealize = pci_bus_unrealize; + k->reset = pcibus_reset; + + pbc->is_root = pcibus_is_root; + pbc->bus_num = pcibus_num; + pbc->numa_node = pcibus_numa_node; +} + +static const TypeInfo pci_bus_info = { + .name = TYPE_PCI_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(PCIBus), + .class_size = sizeof(PCIBusClass), + .class_init = pci_bus_class_init, +}; + +static const TypeInfo pcie_bus_info = { + .name = TYPE_PCIE_BUS, + .parent = TYPE_PCI_BUS, +}; + +static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num); +static void pci_update_mappings(PCIDevice *d); +static void pci_irq_handler(void *opaque, int irq_num, int level); +static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **); +static void pci_del_option_rom(PCIDevice *pdev); + +static uint16_t pci_default_sub_vendor_id = PCI_SUBVENDOR_ID_REDHAT_QUMRANET; +static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU; + +static QLIST_HEAD(, PCIHostState) pci_host_bridges; + +int pci_bar(PCIDevice *d, int reg) +{ + uint8_t type; + + if (reg != PCI_ROM_SLOT) + return PCI_BASE_ADDRESS_0 + reg * 4; + + type = d->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + return type == PCI_HEADER_TYPE_BRIDGE ? PCI_ROM_ADDRESS1 : PCI_ROM_ADDRESS; +} + +static inline int pci_irq_state(PCIDevice *d, int irq_num) +{ + return (d->irq_state >> irq_num) & 0x1; +} + +static inline void pci_set_irq_state(PCIDevice *d, int irq_num, int level) +{ + d->irq_state &= ~(0x1 << irq_num); + d->irq_state |= level << irq_num; +} + +static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) +{ + PCIBus *bus; + for (;;) { + bus = pci_dev->bus; + irq_num = bus->map_irq(pci_dev, irq_num); + if (bus->set_irq) + break; + pci_dev = bus->parent_dev; + } + bus->irq_count[irq_num] += change; + bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); +} + +int pci_bus_get_irq_level(PCIBus *bus, int irq_num) +{ + assert(irq_num >= 0); + assert(irq_num < bus->nirq); + return !!bus->irq_count[irq_num]; +} + +/* Update interrupt status bit in config space on interrupt + * state change. */ +static void pci_update_irq_status(PCIDevice *dev) +{ + if (dev->irq_state) { + dev->config[PCI_STATUS] |= PCI_STATUS_INTERRUPT; + } else { + dev->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT; + } +} + +void pci_device_deassert_intx(PCIDevice *dev) +{ + int i; + for (i = 0; i < PCI_NUM_PINS; ++i) { + pci_irq_handler(dev, i, 0); + } +} + +static void pci_do_device_reset(PCIDevice *dev) +{ + int r; + + pci_device_deassert_intx(dev); + assert(dev->irq_state == 0); + + /* Clear all writable bits */ + pci_word_test_and_clear_mask(dev->config + PCI_COMMAND, + pci_get_word(dev->wmask + PCI_COMMAND) | + pci_get_word(dev->w1cmask + PCI_COMMAND)); + pci_word_test_and_clear_mask(dev->config + PCI_STATUS, + pci_get_word(dev->wmask + PCI_STATUS) | + pci_get_word(dev->w1cmask + PCI_STATUS)); + dev->config[PCI_CACHE_LINE_SIZE] = 0x0; + dev->config[PCI_INTERRUPT_LINE] = 0x0; + for (r = 0; r < PCI_NUM_REGIONS; ++r) { + PCIIORegion *region = &dev->io_regions[r]; + if (!region->size) { + continue; + } + + if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) && + region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(dev->config + pci_bar(dev, r), region->type); + } else { + pci_set_long(dev->config + pci_bar(dev, r), region->type); + } + } + pci_update_mappings(dev); + + msi_reset(dev); + msix_reset(dev); +} + +/* + * This function is called on #RST and FLR. + * FLR if PCI_EXP_DEVCTL_BCR_FLR is set + */ +void pci_device_reset(PCIDevice *dev) +{ + qdev_reset_all(&dev->qdev); + pci_do_device_reset(dev); +} + +/* + * Trigger pci bus reset under a given bus. + * Called via qbus_reset_all on RST# assert, after the devices + * have been reset qdev_reset_all-ed already. + */ +static void pcibus_reset(BusState *qbus) +{ + PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus); + int i; + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + if (bus->devices[i]) { + pci_do_device_reset(bus->devices[i]); + } + } + + for (i = 0; i < bus->nirq; i++) { + assert(bus->irq_count[i] == 0); + } +} + +static void pci_host_bus_register(PCIBus *bus, DeviceState *parent) +{ + PCIHostState *host_bridge = PCI_HOST_BRIDGE(parent); + + QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next); +} + +PCIBus *pci_find_primary_bus(void) +{ + PCIBus *primary_bus = NULL; + PCIHostState *host; + + QLIST_FOREACH(host, &pci_host_bridges, next) { + if (primary_bus) { + /* We have multiple root buses, refuse to select a primary */ + return NULL; + } + primary_bus = host->bus; + } + + return primary_bus; +} + +PCIBus *pci_device_root_bus(const PCIDevice *d) +{ + PCIBus *bus = d->bus; + + while (!pci_bus_is_root(bus)) { + d = bus->parent_dev; + assert(d != NULL); + + bus = d->bus; + } + + return bus; +} + +const char *pci_root_bus_path(PCIDevice *dev) +{ + PCIBus *rootbus = pci_device_root_bus(dev); + PCIHostState *host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent); + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_GET_CLASS(host_bridge); + + assert(host_bridge->bus == rootbus); + + if (hc->root_bus_path) { + return (*hc->root_bus_path)(host_bridge, rootbus); + } + + return rootbus->qbus.name; +} + +static void pci_bus_init(PCIBus *bus, DeviceState *parent, + const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min) +{ + assert(PCI_FUNC(devfn_min) == 0); + bus->devfn_min = devfn_min; + bus->address_space_mem = address_space_mem; + bus->address_space_io = address_space_io; + + /* host bridge */ + QLIST_INIT(&bus->child); + + pci_host_bus_register(bus, parent); +} + +bool pci_bus_is_express(PCIBus *bus) +{ + return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS); +} + +bool pci_bus_is_root(PCIBus *bus) +{ + return PCI_BUS_GET_CLASS(bus)->is_root(bus); +} + +void pci_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, + const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, const char *typename) +{ + qbus_create_inplace(bus, bus_size, typename, parent, name); + pci_bus_init(bus, parent, name, address_space_mem, + address_space_io, devfn_min); +} + +PCIBus *pci_bus_new(DeviceState *parent, const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, const char *typename) +{ + PCIBus *bus; + + bus = PCI_BUS(qbus_create(typename, parent, name)); + pci_bus_init(bus, parent, name, address_space_mem, + address_space_io, devfn_min); + return bus; +} + +void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, + void *irq_opaque, int nirq) +{ + bus->set_irq = set_irq; + bus->map_irq = map_irq; + bus->irq_opaque = irq_opaque; + bus->nirq = nirq; + bus->irq_count = g_malloc0(nirq * sizeof(bus->irq_count[0])); +} + +PCIBus *pci_register_bus(DeviceState *parent, const char *name, + pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, + void *irq_opaque, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, int nirq, const char *typename) +{ + PCIBus *bus; + + bus = pci_bus_new(parent, name, address_space_mem, + address_space_io, devfn_min, typename); + pci_bus_irqs(bus, set_irq, map_irq, irq_opaque, nirq); + return bus; +} + +int pci_bus_num(PCIBus *s) +{ + return PCI_BUS_GET_CLASS(s)->bus_num(s); +} + +int pci_bus_numa_node(PCIBus *bus) +{ + return PCI_BUS_GET_CLASS(bus)->numa_node(bus); +} + +static int get_pci_config_device(QEMUFile *f, void *pv, size_t size) +{ + PCIDevice *s = container_of(pv, PCIDevice, config); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(s); + uint8_t *config; + int i; + + assert(size == pci_config_size(s)); + config = g_malloc(size); + + qemu_get_buffer(f, config, size); + for (i = 0; i < size; ++i) { + if ((config[i] ^ s->config[i]) & + s->cmask[i] & ~s->wmask[i] & ~s->w1cmask[i]) { + error_report("%s: Bad config data: i=0x%x read: %x device: %x " + "cmask: %x wmask: %x w1cmask:%x", __func__, + i, config[i], s->config[i], + s->cmask[i], s->wmask[i], s->w1cmask[i]); + g_free(config); + return -EINVAL; + } + } + memcpy(s->config, config, size); + + pci_update_mappings(s); + if (pc->is_bridge) { + PCIBridge *b = PCI_BRIDGE(s); + pci_bridge_update_mappings(b); + } + + memory_region_set_enabled(&s->bus_master_enable_region, + pci_get_word(s->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); + + g_free(config); + return 0; +} + +/* just put buffer */ +static void put_pci_config_device(QEMUFile *f, void *pv, size_t size) +{ + const uint8_t **v = pv; + assert(size == pci_config_size(container_of(pv, PCIDevice, config))); + qemu_put_buffer(f, *v, size); +} + +static VMStateInfo vmstate_info_pci_config = { + .name = "pci config", + .get = get_pci_config_device, + .put = put_pci_config_device, +}; + +static int get_pci_irq_state(QEMUFile *f, void *pv, size_t size) +{ + PCIDevice *s = container_of(pv, PCIDevice, irq_state); + uint32_t irq_state[PCI_NUM_PINS]; + int i; + for (i = 0; i < PCI_NUM_PINS; ++i) { + irq_state[i] = qemu_get_be32(f); + if (irq_state[i] != 0x1 && irq_state[i] != 0) { + fprintf(stderr, "irq state %d: must be 0 or 1.\n", + irq_state[i]); + return -EINVAL; + } + } + + for (i = 0; i < PCI_NUM_PINS; ++i) { + pci_set_irq_state(s, i, irq_state[i]); + } + + return 0; +} + +static void put_pci_irq_state(QEMUFile *f, void *pv, size_t size) +{ + int i; + PCIDevice *s = container_of(pv, PCIDevice, irq_state); + + for (i = 0; i < PCI_NUM_PINS; ++i) { + qemu_put_be32(f, pci_irq_state(s, i)); + } +} + +static VMStateInfo vmstate_info_pci_irq_state = { + .name = "pci irq state", + .get = get_pci_irq_state, + .put = put_pci_irq_state, +}; + +const VMStateDescription vmstate_pci_device = { + .name = "PCIDevice", + .version_id = 2, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), + VMSTATE_BUFFER_UNSAFE_INFO(config, PCIDevice, 0, + vmstate_info_pci_config, + PCI_CONFIG_SPACE_SIZE), + VMSTATE_BUFFER_UNSAFE_INFO(irq_state, PCIDevice, 2, + vmstate_info_pci_irq_state, + PCI_NUM_PINS * sizeof(int32_t)), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_pcie_device = { + .name = "PCIEDevice", + .version_id = 2, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), + VMSTATE_BUFFER_UNSAFE_INFO(config, PCIDevice, 0, + vmstate_info_pci_config, + PCIE_CONFIG_SPACE_SIZE), + VMSTATE_BUFFER_UNSAFE_INFO(irq_state, PCIDevice, 2, + vmstate_info_pci_irq_state, + PCI_NUM_PINS * sizeof(int32_t)), + VMSTATE_END_OF_LIST() + } +}; + +static inline const VMStateDescription *pci_get_vmstate(PCIDevice *s) +{ + return pci_is_express(s) ? &vmstate_pcie_device : &vmstate_pci_device; +} + +void pci_device_save(PCIDevice *s, QEMUFile *f) +{ + /* Clear interrupt status bit: it is implicit + * in irq_state which we are saving. + * This makes us compatible with old devices + * which never set or clear this bit. */ + s->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT; + vmstate_save_state(f, pci_get_vmstate(s), s, NULL); + /* Restore the interrupt status bit. */ + pci_update_irq_status(s); +} + +int pci_device_load(PCIDevice *s, QEMUFile *f) +{ + int ret; + ret = vmstate_load_state(f, pci_get_vmstate(s), s, s->version_id); + /* Restore the interrupt status bit. */ + pci_update_irq_status(s); + return ret; +} + +static void pci_set_default_subsystem_id(PCIDevice *pci_dev) +{ + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, + pci_default_sub_vendor_id); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, + pci_default_sub_device_id); +} + +/* + * Parse [[<domain>:]<bus>:]<slot>, return -1 on error if funcp == NULL + * [[<domain>:]<bus>:]<slot>.<func>, return -1 on error + */ +static int pci_parse_devaddr(const char *addr, int *domp, int *busp, + unsigned int *slotp, unsigned int *funcp) +{ + const char *p; + char *e; + unsigned long val; + unsigned long dom = 0, bus = 0; + unsigned int slot = 0; + unsigned int func = 0; + + p = addr; + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == ':') { + bus = val; + p = e + 1; + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == ':') { + dom = bus; + bus = val; + p = e + 1; + val = strtoul(p, &e, 16); + if (e == p) + return -1; + } + } + + slot = val; + + if (funcp != NULL) { + if (*e != '.') + return -1; + + p = e + 1; + val = strtoul(p, &e, 16); + if (e == p) + return -1; + + func = val; + } + + /* if funcp == NULL func is 0 */ + if (dom > 0xffff || bus > 0xff || slot > 0x1f || func > 7) + return -1; + + if (*e) + return -1; + + *domp = dom; + *busp = bus; + *slotp = slot; + if (funcp != NULL) + *funcp = func; + return 0; +} + +static PCIBus *pci_get_bus_devfn(int *devfnp, PCIBus *root, + const char *devaddr) +{ + int dom, bus; + unsigned slot; + + if (!root) { + fprintf(stderr, "No primary PCI bus\n"); + return NULL; + } + + assert(!root->parent_dev); + + if (!devaddr) { + *devfnp = -1; + return pci_find_bus_nr(root, 0); + } + + if (pci_parse_devaddr(devaddr, &dom, &bus, &slot, NULL) < 0) { + return NULL; + } + + if (dom != 0) { + fprintf(stderr, "No support for non-zero PCI domains\n"); + return NULL; + } + + *devfnp = PCI_DEVFN(slot, 0); + return pci_find_bus_nr(root, bus); +} + +static void pci_init_cmask(PCIDevice *dev) +{ + pci_set_word(dev->cmask + PCI_VENDOR_ID, 0xffff); + pci_set_word(dev->cmask + PCI_DEVICE_ID, 0xffff); + dev->cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST; + dev->cmask[PCI_REVISION_ID] = 0xff; + dev->cmask[PCI_CLASS_PROG] = 0xff; + pci_set_word(dev->cmask + PCI_CLASS_DEVICE, 0xffff); + dev->cmask[PCI_HEADER_TYPE] = 0xff; + dev->cmask[PCI_CAPABILITY_LIST] = 0xff; +} + +static void pci_init_wmask(PCIDevice *dev) +{ + int config_size = pci_config_size(dev); + + dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff; + dev->wmask[PCI_INTERRUPT_LINE] = 0xff; + pci_set_word(dev->wmask + PCI_COMMAND, + PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | + PCI_COMMAND_INTX_DISABLE); + if (dev->cap_present & QEMU_PCI_CAP_SERR) { + pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR); + } + + memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff, + config_size - PCI_CONFIG_HEADER_SIZE); +} + +static void pci_init_w1cmask(PCIDevice *dev) +{ + /* + * Note: It's okay to set w1cmask even for readonly bits as + * long as their value is hardwired to 0. + */ + pci_set_word(dev->w1cmask + PCI_STATUS, + PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY); +} + +static void pci_init_mask_bridge(PCIDevice *d) +{ + /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and + PCI_SEC_LETENCY_TIMER */ + memset(d->wmask + PCI_PRIMARY_BUS, 0xff, 4); + + /* base and limit */ + d->wmask[PCI_IO_BASE] = PCI_IO_RANGE_MASK & 0xff; + d->wmask[PCI_IO_LIMIT] = PCI_IO_RANGE_MASK & 0xff; + pci_set_word(d->wmask + PCI_MEMORY_BASE, + PCI_MEMORY_RANGE_MASK & 0xffff); + pci_set_word(d->wmask + PCI_MEMORY_LIMIT, + PCI_MEMORY_RANGE_MASK & 0xffff); + pci_set_word(d->wmask + PCI_PREF_MEMORY_BASE, + PCI_PREF_RANGE_MASK & 0xffff); + pci_set_word(d->wmask + PCI_PREF_MEMORY_LIMIT, + PCI_PREF_RANGE_MASK & 0xffff); + + /* PCI_PREF_BASE_UPPER32 and PCI_PREF_LIMIT_UPPER32 */ + memset(d->wmask + PCI_PREF_BASE_UPPER32, 0xff, 8); + + /* Supported memory and i/o types */ + d->config[PCI_IO_BASE] |= PCI_IO_RANGE_TYPE_16; + d->config[PCI_IO_LIMIT] |= PCI_IO_RANGE_TYPE_16; + pci_word_test_and_set_mask(d->config + PCI_PREF_MEMORY_BASE, + PCI_PREF_RANGE_TYPE_64); + pci_word_test_and_set_mask(d->config + PCI_PREF_MEMORY_LIMIT, + PCI_PREF_RANGE_TYPE_64); + + /* + * TODO: Bridges default to 10-bit VGA decoding but we currently only + * implement 16-bit decoding (no alias support). + */ + pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_PARITY | + PCI_BRIDGE_CTL_SERR | + PCI_BRIDGE_CTL_ISA | + PCI_BRIDGE_CTL_VGA | + PCI_BRIDGE_CTL_VGA_16BIT | + PCI_BRIDGE_CTL_MASTER_ABORT | + PCI_BRIDGE_CTL_BUS_RESET | + PCI_BRIDGE_CTL_FAST_BACK | + PCI_BRIDGE_CTL_DISCARD | + PCI_BRIDGE_CTL_SEC_DISCARD | + PCI_BRIDGE_CTL_DISCARD_SERR); + /* Below does not do anything as we never set this bit, put here for + * completeness. */ + pci_set_word(d->w1cmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_DISCARD_STATUS); + d->cmask[PCI_IO_BASE] |= PCI_IO_RANGE_TYPE_MASK; + d->cmask[PCI_IO_LIMIT] |= PCI_IO_RANGE_TYPE_MASK; + pci_word_test_and_set_mask(d->cmask + PCI_PREF_MEMORY_BASE, + PCI_PREF_RANGE_TYPE_MASK); + pci_word_test_and_set_mask(d->cmask + PCI_PREF_MEMORY_LIMIT, + PCI_PREF_RANGE_TYPE_MASK); +} + +static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) +{ + uint8_t slot = PCI_SLOT(dev->devfn); + uint8_t func; + + if (dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; + } + + /* + * multifunction bit is interpreted in two ways as follows. + * - all functions must set the bit to 1. + * Example: Intel X53 + * - function 0 must set the bit, but the rest function (> 0) + * is allowed to leave the bit to 0. + * Example: PIIX3(also in qemu), PIIX4(also in qemu), ICH10, + * + * So OS (at least Linux) checks the bit of only function 0, + * and doesn't see the bit of function > 0. + * + * The below check allows both interpretation. + */ + if (PCI_FUNC(dev->devfn)) { + PCIDevice *f0 = bus->devices[PCI_DEVFN(slot, 0)]; + if (f0 && !(f0->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)) { + /* function 0 should set multifunction bit */ + error_setg(errp, "PCI: single function device can't be populated " + "in function %x.%x", slot, PCI_FUNC(dev->devfn)); + return; + } + return; + } + + if (dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + return; + } + /* function 0 indicates single function, so function > 0 must be NULL */ + for (func = 1; func < PCI_FUNC_MAX; ++func) { + if (bus->devices[PCI_DEVFN(slot, func)]) { + error_setg(errp, "PCI: %x.0 indicates single function, " + "but %x.%x is already populated.", + slot, slot, func); + return; + } + } +} + +static void pci_config_alloc(PCIDevice *pci_dev) +{ + int config_size = pci_config_size(pci_dev); + + pci_dev->config = g_malloc0(config_size); + pci_dev->cmask = g_malloc0(config_size); + pci_dev->wmask = g_malloc0(config_size); + pci_dev->w1cmask = g_malloc0(config_size); + pci_dev->used = g_malloc0(config_size); +} + +static void pci_config_free(PCIDevice *pci_dev) +{ + g_free(pci_dev->config); + g_free(pci_dev->cmask); + g_free(pci_dev->wmask); + g_free(pci_dev->w1cmask); + g_free(pci_dev->used); +} + +static void do_pci_unregister_device(PCIDevice *pci_dev) +{ + pci_dev->bus->devices[pci_dev->devfn] = NULL; + pci_config_free(pci_dev); + + address_space_destroy(&pci_dev->bus_master_as); +} + +/* -1 for devfn means auto assign */ +static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, + const char *name, int devfn, + Error **errp) +{ + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev); + PCIConfigReadFunc *config_read = pc->config_read; + PCIConfigWriteFunc *config_write = pc->config_write; + Error *local_err = NULL; + AddressSpace *dma_as; + + if (devfn < 0) { + for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices); + devfn += PCI_FUNC_MAX) { + if (!bus->devices[devfn]) + goto found; + } + error_setg(errp, "PCI: no slot/function available for %s, all in use", + name); + return NULL; + found: ; + } else if (bus->devices[devfn]) { + error_setg(errp, "PCI: slot %d function %d not available for %s," + " in use by %s", + PCI_SLOT(devfn), PCI_FUNC(devfn), name, + bus->devices[devfn]->name); + return NULL; + } + + pci_dev->bus = bus; + pci_dev->devfn = devfn; + dma_as = pci_device_iommu_address_space(pci_dev); + + memory_region_init_alias(&pci_dev->bus_master_enable_region, + OBJECT(pci_dev), "bus master", + dma_as->root, 0, memory_region_size(dma_as->root)); + memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region, + name); + + pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); + pci_dev->irq_state = 0; + pci_config_alloc(pci_dev); + + pci_config_set_vendor_id(pci_dev->config, pc->vendor_id); + pci_config_set_device_id(pci_dev->config, pc->device_id); + pci_config_set_revision(pci_dev->config, pc->revision); + pci_config_set_class(pci_dev->config, pc->class_id); + + if (!pc->is_bridge) { + if (pc->subsystem_vendor_id || pc->subsystem_id) { + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, + pc->subsystem_vendor_id); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, + pc->subsystem_id); + } else { + pci_set_default_subsystem_id(pci_dev); + } + } else { + /* subsystem_vendor_id/subsystem_id are only for header type 0 */ + assert(!pc->subsystem_vendor_id); + assert(!pc->subsystem_id); + } + pci_init_cmask(pci_dev); + pci_init_wmask(pci_dev); + pci_init_w1cmask(pci_dev); + if (pc->is_bridge) { + pci_init_mask_bridge(pci_dev); + } + pci_init_multifunction(bus, pci_dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + do_pci_unregister_device(pci_dev); + return NULL; + } + + if (!config_read) + config_read = pci_default_read_config; + if (!config_write) + config_write = pci_default_write_config; + pci_dev->config_read = config_read; + pci_dev->config_write = config_write; + bus->devices[devfn] = pci_dev; + pci_dev->version_id = 2; /* Current pci device vmstate version */ + return pci_dev; +} + +static void pci_unregister_io_regions(PCIDevice *pci_dev) +{ + PCIIORegion *r; + int i; + + for(i = 0; i < PCI_NUM_REGIONS; i++) { + r = &pci_dev->io_regions[i]; + if (!r->size || r->addr == PCI_BAR_UNMAPPED) + continue; + memory_region_del_subregion(r->address_space, r->memory); + } + + pci_unregister_vga(pci_dev); +} + +static void pci_qdev_unrealize(DeviceState *dev, Error **errp) +{ + PCIDevice *pci_dev = PCI_DEVICE(dev); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev); + + pci_unregister_io_regions(pci_dev); + pci_del_option_rom(pci_dev); + + if (pc->exit) { + pc->exit(pci_dev); + } + + do_pci_unregister_device(pci_dev); +} + +void pci_register_bar(PCIDevice *pci_dev, int region_num, + uint8_t type, MemoryRegion *memory) +{ + PCIIORegion *r; + uint32_t addr; + uint64_t wmask; + pcibus_t size = memory_region_size(memory); + + assert(region_num >= 0); + assert(region_num < PCI_NUM_REGIONS); + if (size & (size-1)) { + fprintf(stderr, "ERROR: PCI region size must be pow2 " + "type=0x%x, size=0x%"FMT_PCIBUS"\n", type, size); + exit(1); + } + + r = &pci_dev->io_regions[region_num]; + r->addr = PCI_BAR_UNMAPPED; + r->size = size; + r->type = type; + r->memory = NULL; + + wmask = ~(size - 1); + addr = pci_bar(pci_dev, region_num); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } + pci_set_long(pci_dev->config + addr, type); + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } + pci_dev->io_regions[region_num].memory = memory; + pci_dev->io_regions[region_num].address_space + = type & PCI_BASE_ADDRESS_SPACE_IO + ? pci_dev->bus->address_space_io + : pci_dev->bus->address_space_mem; +} + +static void pci_update_vga(PCIDevice *pci_dev) +{ + uint16_t cmd; + + if (!pci_dev->has_vga) { + return; + } + + cmd = pci_get_word(pci_dev->config + PCI_COMMAND); + + memory_region_set_enabled(pci_dev->vga_regions[QEMU_PCI_VGA_MEM], + cmd & PCI_COMMAND_MEMORY); + memory_region_set_enabled(pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO], + cmd & PCI_COMMAND_IO); + memory_region_set_enabled(pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI], + cmd & PCI_COMMAND_IO); +} + +void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem, + MemoryRegion *io_lo, MemoryRegion *io_hi) +{ + assert(!pci_dev->has_vga); + + assert(memory_region_size(mem) == QEMU_PCI_VGA_MEM_SIZE); + pci_dev->vga_regions[QEMU_PCI_VGA_MEM] = mem; + memory_region_add_subregion_overlap(pci_dev->bus->address_space_mem, + QEMU_PCI_VGA_MEM_BASE, mem, 1); + + assert(memory_region_size(io_lo) == QEMU_PCI_VGA_IO_LO_SIZE); + pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO] = io_lo; + memory_region_add_subregion_overlap(pci_dev->bus->address_space_io, + QEMU_PCI_VGA_IO_LO_BASE, io_lo, 1); + + assert(memory_region_size(io_hi) == QEMU_PCI_VGA_IO_HI_SIZE); + pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI] = io_hi; + memory_region_add_subregion_overlap(pci_dev->bus->address_space_io, + QEMU_PCI_VGA_IO_HI_BASE, io_hi, 1); + pci_dev->has_vga = true; + + pci_update_vga(pci_dev); +} + +void pci_unregister_vga(PCIDevice *pci_dev) +{ + if (!pci_dev->has_vga) { + return; + } + + memory_region_del_subregion(pci_dev->bus->address_space_mem, + pci_dev->vga_regions[QEMU_PCI_VGA_MEM]); + memory_region_del_subregion(pci_dev->bus->address_space_io, + pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO]); + memory_region_del_subregion(pci_dev->bus->address_space_io, + pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI]); + pci_dev->has_vga = false; +} + +pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num) +{ + return pci_dev->io_regions[region_num].addr; +} + +static pcibus_t pci_bar_address(PCIDevice *d, + int reg, uint8_t type, pcibus_t size) +{ + pcibus_t new_addr, last_addr; + int bar = pci_bar(d, reg); + uint16_t cmd = pci_get_word(d->config + PCI_COMMAND); + + if (type & PCI_BASE_ADDRESS_SPACE_IO) { + if (!(cmd & PCI_COMMAND_IO)) { + return PCI_BAR_UNMAPPED; + } + new_addr = pci_get_long(d->config + bar) & ~(size - 1); + last_addr = new_addr + size - 1; + /* Check if 32 bit BAR wraps around explicitly. + * TODO: make priorities correct and remove this work around. + */ + if (last_addr <= new_addr || new_addr == 0 || last_addr >= UINT32_MAX) { + return PCI_BAR_UNMAPPED; + } + return new_addr; + } + + if (!(cmd & PCI_COMMAND_MEMORY)) { + return PCI_BAR_UNMAPPED; + } + if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + new_addr = pci_get_quad(d->config + bar); + } else { + new_addr = pci_get_long(d->config + bar); + } + /* the ROM slot has a specific enable bit */ + if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) { + return PCI_BAR_UNMAPPED; + } + new_addr &= ~(size - 1); + last_addr = new_addr + size - 1; + /* NOTE: we do not support wrapping */ + /* XXX: as we cannot support really dynamic + mappings, we handle specific values as invalid + mappings. */ + if (last_addr <= new_addr || new_addr == 0 || + last_addr == PCI_BAR_UNMAPPED) { + return PCI_BAR_UNMAPPED; + } + + /* Now pcibus_t is 64bit. + * Check if 32 bit BAR wraps around explicitly. + * Without this, PC ide doesn't work well. + * TODO: remove this work around. + */ + if (!(type & PCI_BASE_ADDRESS_MEM_TYPE_64) && last_addr >= UINT32_MAX) { + return PCI_BAR_UNMAPPED; + } + + /* + * OS is allowed to set BAR beyond its addressable + * bits. For example, 32 bit OS can set 64bit bar + * to >4G. Check it. TODO: we might need to support + * it in the future for e.g. PAE. + */ + if (last_addr >= HWADDR_MAX) { + return PCI_BAR_UNMAPPED; + } + + return new_addr; +} + +static void pci_update_mappings(PCIDevice *d) +{ + PCIIORegion *r; + int i; + pcibus_t new_addr; + + for(i = 0; i < PCI_NUM_REGIONS; i++) { + r = &d->io_regions[i]; + + /* this region isn't registered */ + if (!r->size) + continue; + + new_addr = pci_bar_address(d, i, r->type, r->size); + + /* This bar isn't changed */ + if (new_addr == r->addr) + continue; + + /* now do the real mapping */ + if (r->addr != PCI_BAR_UNMAPPED) { + trace_pci_update_mappings_del(d, pci_bus_num(d->bus), + PCI_FUNC(d->devfn), + PCI_SLOT(d->devfn), + i, r->addr, r->size); + memory_region_del_subregion(r->address_space, r->memory); + } + r->addr = new_addr; + if (r->addr != PCI_BAR_UNMAPPED) { + trace_pci_update_mappings_add(d, pci_bus_num(d->bus), + PCI_FUNC(d->devfn), + PCI_SLOT(d->devfn), + i, r->addr, r->size); + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } + } + + pci_update_vga(d); +} + +static inline int pci_irq_disabled(PCIDevice *d) +{ + return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; +} + +/* Called after interrupt disabled field update in config space, + * assert/deassert interrupts if necessary. + * Gets original interrupt disable bit value (before update). */ +static void pci_update_irq_disabled(PCIDevice *d, int was_irq_disabled) +{ + int i, disabled = pci_irq_disabled(d); + if (disabled == was_irq_disabled) + return; + for (i = 0; i < PCI_NUM_PINS; ++i) { + int state = pci_irq_state(d, i); + pci_change_irq_level(d, i, disabled ? -state : state); + } +} + +uint32_t pci_default_read_config(PCIDevice *d, + uint32_t address, int len) +{ + uint32_t val = 0; + + memcpy(&val, d->config + address, len); + return le32_to_cpu(val); +} + +void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l) +{ + int i, was_irq_disabled = pci_irq_disabled(d); + uint32_t val = val_in; + + for (i = 0; i < l; val >>= 8, ++i) { + uint8_t wmask = d->wmask[addr + i]; + uint8_t w1cmask = d->w1cmask[addr + i]; + assert(!(wmask & w1cmask)); + d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask); + d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ + } + if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) || + ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) || + ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) || + range_covers_byte(addr, l, PCI_COMMAND)) + pci_update_mappings(d); + + if (range_covers_byte(addr, l, PCI_COMMAND)) { + pci_update_irq_disabled(d, was_irq_disabled); + memory_region_set_enabled(&d->bus_master_enable_region, + pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); + } + + msi_write_config(d, addr, val_in, l); + msix_write_config(d, addr, val_in, l); +} + +/***********************************************************/ +/* generic PCI irq support */ + +/* 0 <= irq_num <= 3. level must be 0 or 1 */ +static void pci_irq_handler(void *opaque, int irq_num, int level) +{ + PCIDevice *pci_dev = opaque; + int change; + + change = level - pci_irq_state(pci_dev, irq_num); + if (!change) + return; + + pci_set_irq_state(pci_dev, irq_num, level); + pci_update_irq_status(pci_dev); + if (pci_irq_disabled(pci_dev)) + return; + pci_change_irq_level(pci_dev, irq_num, change); +} + +static inline int pci_intx(PCIDevice *pci_dev) +{ + return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1; +} + +qemu_irq pci_allocate_irq(PCIDevice *pci_dev) +{ + int intx = pci_intx(pci_dev); + + return qemu_allocate_irq(pci_irq_handler, pci_dev, intx); +} + +void pci_set_irq(PCIDevice *pci_dev, int level) +{ + int intx = pci_intx(pci_dev); + pci_irq_handler(pci_dev, intx, level); +} + +/* Special hooks used by device assignment */ +void pci_bus_set_route_irq_fn(PCIBus *bus, pci_route_irq_fn route_intx_to_irq) +{ + assert(pci_bus_is_root(bus)); + bus->route_intx_to_irq = route_intx_to_irq; +} + +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin) +{ + PCIBus *bus; + + do { + bus = dev->bus; + pin = bus->map_irq(dev, pin); + dev = bus->parent_dev; + } while (dev); + + if (!bus->route_intx_to_irq) { + error_report("PCI: Bug - unimplemented PCI INTx routing (%s)", + object_get_typename(OBJECT(bus->qbus.parent))); + return (PCIINTxRoute) { PCI_INTX_DISABLED, -1 }; + } + + return bus->route_intx_to_irq(bus->irq_opaque, pin); +} + +bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new) +{ + return old->mode != new->mode || old->irq != new->irq; +} + +void pci_bus_fire_intx_routing_notifier(PCIBus *bus) +{ + PCIDevice *dev; + PCIBus *sec; + int i; + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + dev = bus->devices[i]; + if (dev && dev->intx_routing_notifier) { + dev->intx_routing_notifier(dev); + } + } + + QLIST_FOREACH(sec, &bus->child, sibling) { + pci_bus_fire_intx_routing_notifier(sec); + } +} + +void pci_device_set_intx_routing_notifier(PCIDevice *dev, + PCIINTxRoutingNotifier notifier) +{ + dev->intx_routing_notifier = notifier; +} + +/* + * PCI-to-PCI bridge specification + * 9.1: Interrupt routing. Table 9-1 + * + * the PCI Express Base Specification, Revision 2.1 + * 2.2.8.1: INTx interrutp signaling - Rules + * the Implementation Note + * Table 2-20 + */ +/* + * 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD + * 0-origin unlike PCI interrupt pin register. + */ +int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin) +{ + return (pin + PCI_SLOT(pci_dev->devfn)) % PCI_NUM_PINS; +} + +/***********************************************************/ +/* monitor info on PCI */ + +typedef struct { + uint16_t class; + const char *desc; + const char *fw_name; + uint16_t fw_ign_bits; +} pci_class_desc; + +static const pci_class_desc pci_class_descriptions[] = +{ + { 0x0001, "VGA controller", "display"}, + { 0x0100, "SCSI controller", "scsi"}, + { 0x0101, "IDE controller", "ide"}, + { 0x0102, "Floppy controller", "fdc"}, + { 0x0103, "IPI controller", "ipi"}, + { 0x0104, "RAID controller", "raid"}, + { 0x0106, "SATA controller"}, + { 0x0107, "SAS controller"}, + { 0x0180, "Storage controller"}, + { 0x0200, "Ethernet controller", "ethernet"}, + { 0x0201, "Token Ring controller", "token-ring"}, + { 0x0202, "FDDI controller", "fddi"}, + { 0x0203, "ATM controller", "atm"}, + { 0x0280, "Network controller"}, + { 0x0300, "VGA controller", "display", 0x00ff}, + { 0x0301, "XGA controller"}, + { 0x0302, "3D controller"}, + { 0x0380, "Display controller"}, + { 0x0400, "Video controller", "video"}, + { 0x0401, "Audio controller", "sound"}, + { 0x0402, "Phone"}, + { 0x0403, "Audio controller", "sound"}, + { 0x0480, "Multimedia controller"}, + { 0x0500, "RAM controller", "memory"}, + { 0x0501, "Flash controller", "flash"}, + { 0x0580, "Memory controller"}, + { 0x0600, "Host bridge", "host"}, + { 0x0601, "ISA bridge", "isa"}, + { 0x0602, "EISA bridge", "eisa"}, + { 0x0603, "MC bridge", "mca"}, + { 0x0604, "PCI bridge", "pci-bridge"}, + { 0x0605, "PCMCIA bridge", "pcmcia"}, + { 0x0606, "NUBUS bridge", "nubus"}, + { 0x0607, "CARDBUS bridge", "cardbus"}, + { 0x0608, "RACEWAY bridge"}, + { 0x0680, "Bridge"}, + { 0x0700, "Serial port", "serial"}, + { 0x0701, "Parallel port", "parallel"}, + { 0x0800, "Interrupt controller", "interrupt-controller"}, + { 0x0801, "DMA controller", "dma-controller"}, + { 0x0802, "Timer", "timer"}, + { 0x0803, "RTC", "rtc"}, + { 0x0900, "Keyboard", "keyboard"}, + { 0x0901, "Pen", "pen"}, + { 0x0902, "Mouse", "mouse"}, + { 0x0A00, "Dock station", "dock", 0x00ff}, + { 0x0B00, "i386 cpu", "cpu", 0x00ff}, + { 0x0c00, "Fireware contorller", "fireware"}, + { 0x0c01, "Access bus controller", "access-bus"}, + { 0x0c02, "SSA controller", "ssa"}, + { 0x0c03, "USB controller", "usb"}, + { 0x0c04, "Fibre channel controller", "fibre-channel"}, + { 0x0c05, "SMBus"}, + { 0, NULL} +}; + +static void pci_for_each_device_under_bus(PCIBus *bus, + void (*fn)(PCIBus *b, PCIDevice *d, + void *opaque), + void *opaque) +{ + PCIDevice *d; + int devfn; + + for(devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + d = bus->devices[devfn]; + if (d) { + fn(bus, d, opaque); + } + } +} + +void pci_for_each_device(PCIBus *bus, int bus_num, + void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), + void *opaque) +{ + bus = pci_find_bus_nr(bus, bus_num); + + if (bus) { + pci_for_each_device_under_bus(bus, fn, opaque); + } +} + +static const pci_class_desc *get_class_desc(int class) +{ + const pci_class_desc *desc; + + desc = pci_class_descriptions; + while (desc->desc && class != desc->class) { + desc++; + } + + return desc; +} + +static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num); + +static PciMemoryRegionList *qmp_query_pci_regions(const PCIDevice *dev) +{ + PciMemoryRegionList *head = NULL, *cur_item = NULL; + int i; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + const PCIIORegion *r = &dev->io_regions[i]; + PciMemoryRegionList *region; + + if (!r->size) { + continue; + } + + region = g_malloc0(sizeof(*region)); + region->value = g_malloc0(sizeof(*region->value)); + + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { + region->value->type = g_strdup("io"); + } else { + region->value->type = g_strdup("memory"); + region->value->has_prefetch = true; + region->value->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH); + region->value->has_mem_type_64 = true; + region->value->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64); + } + + region->value->bar = i; + region->value->address = r->addr; + region->value->size = r->size; + + /* XXX: waiting for the qapi to support GSList */ + if (!cur_item) { + head = cur_item = region; + } else { + cur_item->next = region; + cur_item = region; + } + } + + return head; +} + +static PciBridgeInfo *qmp_query_pci_bridge(PCIDevice *dev, PCIBus *bus, + int bus_num) +{ + PciBridgeInfo *info; + PciMemoryRange *range; + + info = g_new0(PciBridgeInfo, 1); + + info->bus = g_new0(PciBusInfo, 1); + info->bus->number = dev->config[PCI_PRIMARY_BUS]; + info->bus->secondary = dev->config[PCI_SECONDARY_BUS]; + info->bus->subordinate = dev->config[PCI_SUBORDINATE_BUS]; + + range = info->bus->io_range = g_new0(PciMemoryRange, 1); + range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); + range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); + + range = info->bus->memory_range = g_new0(PciMemoryRange, 1); + range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + + range = info->bus->prefetchable_range = g_new0(PciMemoryRange, 1); + range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + + if (dev->config[PCI_SECONDARY_BUS] != 0) { + PCIBus *child_bus = pci_find_bus_nr(bus, dev->config[PCI_SECONDARY_BUS]); + if (child_bus) { + info->has_devices = true; + info->devices = qmp_query_pci_devices(child_bus, dev->config[PCI_SECONDARY_BUS]); + } + } + + return info; +} + +static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus, + int bus_num) +{ + const pci_class_desc *desc; + PciDeviceInfo *info; + uint8_t type; + int class; + + info = g_new0(PciDeviceInfo, 1); + info->bus = bus_num; + info->slot = PCI_SLOT(dev->devfn); + info->function = PCI_FUNC(dev->devfn); + + info->class_info = g_new0(PciDeviceClass, 1); + class = pci_get_word(dev->config + PCI_CLASS_DEVICE); + info->class_info->q_class = class; + desc = get_class_desc(class); + if (desc->desc) { + info->class_info->has_desc = true; + info->class_info->desc = g_strdup(desc->desc); + } + + info->id = g_new0(PciDeviceId, 1); + info->id->vendor = pci_get_word(dev->config + PCI_VENDOR_ID); + info->id->device = pci_get_word(dev->config + PCI_DEVICE_ID); + info->regions = qmp_query_pci_regions(dev); + info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : ""); + + if (dev->config[PCI_INTERRUPT_PIN] != 0) { + info->has_irq = true; + info->irq = dev->config[PCI_INTERRUPT_LINE]; + } + + type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + if (type == PCI_HEADER_TYPE_BRIDGE) { + info->has_pci_bridge = true; + info->pci_bridge = qmp_query_pci_bridge(dev, bus, bus_num); + } + + return info; +} + +static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num) +{ + PciDeviceInfoList *info, *head = NULL, *cur_item = NULL; + PCIDevice *dev; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + dev = bus->devices[devfn]; + if (dev) { + info = g_malloc0(sizeof(*info)); + info->value = qmp_query_pci_device(dev, bus, bus_num); + + /* XXX: waiting for the qapi to support GSList */ + if (!cur_item) { + head = cur_item = info; + } else { + cur_item->next = info; + cur_item = info; + } + } + } + + return head; +} + +static PciInfo *qmp_query_pci_bus(PCIBus *bus, int bus_num) +{ + PciInfo *info = NULL; + + bus = pci_find_bus_nr(bus, bus_num); + if (bus) { + info = g_malloc0(sizeof(*info)); + info->bus = bus_num; + info->devices = qmp_query_pci_devices(bus, bus_num); + } + + return info; +} + +PciInfoList *qmp_query_pci(Error **errp) +{ + PciInfoList *info, *head = NULL, *cur_item = NULL; + PCIHostState *host_bridge; + + QLIST_FOREACH(host_bridge, &pci_host_bridges, next) { + info = g_malloc0(sizeof(*info)); + info->value = qmp_query_pci_bus(host_bridge->bus, + pci_bus_num(host_bridge->bus)); + + /* XXX: waiting for the qapi to support GSList */ + if (!cur_item) { + head = cur_item = info; + } else { + cur_item->next = info; + cur_item = info; + } + } + + return head; +} + +static const char * const pci_nic_models[] = { + "ne2k_pci", + "i82551", + "i82557b", + "i82559er", + "rtl8139", + "e1000", + "pcnet", + "virtio", + NULL +}; + +static const char * const pci_nic_names[] = { + "ne2k_pci", + "i82551", + "i82557b", + "i82559er", + "rtl8139", + "e1000", + "pcnet", + "virtio-net-pci", + NULL +}; + +/* Initialize a PCI NIC. */ +PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, + const char *default_model, + const char *default_devaddr) +{ + const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr; + Error *err = NULL; + PCIBus *bus; + PCIDevice *pci_dev; + DeviceState *dev; + int devfn; + int i; + + if (qemu_show_nic_models(nd->model, pci_nic_models)) { + exit(0); + } + + i = qemu_find_nic_model(nd, pci_nic_models, default_model); + if (i < 0) { + exit(1); + } + + bus = pci_get_bus_devfn(&devfn, rootbus, devaddr); + if (!bus) { + error_report("Invalid PCI device address %s for device %s", + devaddr, pci_nic_names[i]); + exit(1); + } + + pci_dev = pci_create(bus, devfn, pci_nic_names[i]); + dev = &pci_dev->qdev; + qdev_set_nic_properties(dev, nd); + + object_property_set_bool(OBJECT(dev), true, "realized", &err); + if (err) { + error_report_err(err); + object_unparent(OBJECT(dev)); + exit(1); + } + + return pci_dev; +} + +PCIDevice *pci_vga_init(PCIBus *bus) +{ + switch (vga_interface_type) { + case VGA_CIRRUS: + return pci_create_simple(bus, -1, "cirrus-vga"); + case VGA_QXL: + return pci_create_simple(bus, -1, "qxl-vga"); + case VGA_STD: + return pci_create_simple(bus, -1, "VGA"); + case VGA_VMWARE: + return pci_create_simple(bus, -1, "vmware-svga"); + case VGA_VIRTIO: + return pci_create_simple(bus, -1, "virtio-vga"); + case VGA_NONE: + default: /* Other non-PCI types. Checking for unsupported types is already + done in vl.c. */ + return NULL; + } +} + +/* Whether a given bus number is in range of the secondary + * bus of the given bridge device. */ +static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num) +{ + return !(pci_get_word(dev->config + PCI_BRIDGE_CONTROL) & + PCI_BRIDGE_CTL_BUS_RESET) /* Don't walk the bus if it's reset. */ && + dev->config[PCI_SECONDARY_BUS] <= bus_num && + bus_num <= dev->config[PCI_SUBORDINATE_BUS]; +} + +/* Whether a given bus number is in a range of a root bus */ +static bool pci_root_bus_in_range(PCIBus *bus, int bus_num) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + PCIDevice *dev = bus->devices[i]; + + if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + if (pci_secondary_bus_in_range(dev, bus_num)) { + return true; + } + } + } + + return false; +} + +static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) +{ + PCIBus *sec; + + if (!bus) { + return NULL; + } + + if (pci_bus_num(bus) == bus_num) { + return bus; + } + + /* Consider all bus numbers in range for the host pci bridge. */ + if (!pci_bus_is_root(bus) && + !pci_secondary_bus_in_range(bus->parent_dev, bus_num)) { + return NULL; + } + + /* try child bus */ + for (; bus; bus = sec) { + QLIST_FOREACH(sec, &bus->child, sibling) { + if (pci_bus_num(sec) == bus_num) { + return sec; + } + /* PXB buses assumed to be children of bus 0 */ + if (pci_bus_is_root(sec)) { + if (pci_root_bus_in_range(sec, bus_num)) { + break; + } + } else { + if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) { + break; + } + } + } + } + + return NULL; +} + +void pci_for_each_bus_depth_first(PCIBus *bus, + void *(*begin)(PCIBus *bus, void *parent_state), + void (*end)(PCIBus *bus, void *state), + void *parent_state) +{ + PCIBus *sec; + void *state; + + if (!bus) { + return; + } + + if (begin) { + state = begin(bus, parent_state); + } else { + state = parent_state; + } + + QLIST_FOREACH(sec, &bus->child, sibling) { + pci_for_each_bus_depth_first(sec, begin, end, state); + } + + if (end) { + end(bus, state); + } +} + + +PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) +{ + bus = pci_find_bus_nr(bus, bus_num); + + if (!bus) + return NULL; + + return bus->devices[devfn]; +} + +static void pci_qdev_realize(DeviceState *qdev, Error **errp) +{ + PCIDevice *pci_dev = (PCIDevice *)qdev; + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev); + Error *local_err = NULL; + PCIBus *bus; + bool is_default_rom; + + /* initialize cap_present for pci_is_express() and pci_config_size() */ + if (pc->is_express) { + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + } + + bus = PCI_BUS(qdev_get_parent_bus(qdev)); + pci_dev = do_pci_register_device(pci_dev, bus, + object_get_typename(OBJECT(qdev)), + pci_dev->devfn, errp); + if (pci_dev == NULL) + return; + + if (pc->realize) { + pc->realize(pci_dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + do_pci_unregister_device(pci_dev); + return; + } + } + + /* rom loading */ + is_default_rom = false; + if (pci_dev->romfile == NULL && pc->romfile != NULL) { + pci_dev->romfile = g_strdup(pc->romfile); + is_default_rom = true; + } + + pci_add_option_rom(pci_dev, is_default_rom, &local_err); + if (local_err) { + error_propagate(errp, local_err); + pci_qdev_unrealize(DEVICE(pci_dev), NULL); + return; + } +} + +static void pci_default_realize(PCIDevice *dev, Error **errp) +{ + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + + if (pc->init) { + if (pc->init(dev) < 0) { + error_setg(errp, "Device initialization failed"); + return; + } + } +} + +PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction, + const char *name) +{ + DeviceState *dev; + + dev = qdev_create(&bus->qbus, name); + qdev_prop_set_int32(dev, "addr", devfn); + qdev_prop_set_bit(dev, "multifunction", multifunction); + return PCI_DEVICE(dev); +} + +PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, + bool multifunction, + const char *name) +{ + PCIDevice *dev = pci_create_multifunction(bus, devfn, multifunction, name); + qdev_init_nofail(&dev->qdev); + return dev; +} + +PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name) +{ + return pci_create_multifunction(bus, devfn, false, name); +} + +PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name) +{ + return pci_create_simple_multifunction(bus, devfn, false, name); +} + +static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size) +{ + int offset = PCI_CONFIG_HEADER_SIZE; + int i; + for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) { + if (pdev->used[i]) + offset = i + 1; + else if (i - offset + 1 == size) + return offset; + } + return 0; +} + +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id, + uint8_t *prev_p) +{ + uint8_t next, prev; + + if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST)) + return 0; + + for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]); + prev = next + PCI_CAP_LIST_NEXT) + if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id) + break; + + if (prev_p) + *prev_p = prev; + return next; +} + +static uint8_t pci_find_capability_at_offset(PCIDevice *pdev, uint8_t offset) +{ + uint8_t next, prev, found = 0; + + if (!(pdev->used[offset])) { + return 0; + } + + assert(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST); + + for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]); + prev = next + PCI_CAP_LIST_NEXT) { + if (next <= offset && next > found) { + found = next; + } + } + return found; +} + +/* Patch the PCI vendor and device ids in a PCI rom image if necessary. + This is needed for an option rom which is used for more than one device. */ +static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) +{ + uint16_t vendor_id; + uint16_t device_id; + uint16_t rom_vendor_id; + uint16_t rom_device_id; + uint16_t rom_magic; + uint16_t pcir_offset; + uint8_t checksum; + + /* Words in rom data are little endian (like in PCI configuration), + so they can be read / written with pci_get_word / pci_set_word. */ + + /* Only a valid rom will be patched. */ + rom_magic = pci_get_word(ptr); + if (rom_magic != 0xaa55) { + PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic); + return; + } + pcir_offset = pci_get_word(ptr + 0x18); + if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) { + PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset); + return; + } + + vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); + device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + rom_vendor_id = pci_get_word(ptr + pcir_offset + 4); + rom_device_id = pci_get_word(ptr + pcir_offset + 6); + + PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile, + vendor_id, device_id, rom_vendor_id, rom_device_id); + + checksum = ptr[6]; + + if (vendor_id != rom_vendor_id) { + /* Patch vendor id and checksum (at offset 6 for etherboot roms). */ + checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8); + checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8); + PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + ptr[6] = checksum; + pci_set_word(ptr + pcir_offset + 4, vendor_id); + } + + if (device_id != rom_device_id) { + /* Patch device id and checksum (at offset 6 for etherboot roms). */ + checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8); + checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8); + PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + ptr[6] = checksum; + pci_set_word(ptr + pcir_offset + 6, device_id); + } +} + +/* Add an option rom for the device */ +static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + Error **errp) +{ + int size; + char *path; + void *ptr; + char name[32]; + const VMStateDescription *vmsd; + + if (!pdev->romfile) + return; + if (strlen(pdev->romfile) == 0) + return; + + if (!pdev->rom_bar) { + /* + * Load rom via fw_cfg instead of creating a rom bar, + * for 0.11 compatibility. + */ + int class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); + + /* + * Hot-plugged devices can't use the option ROM + * if the rom bar is disabled. + */ + if (DEVICE(pdev)->hotplugged) { + error_setg(errp, "Hot-plugged device without ROM bar" + " can't have an option ROM"); + return; + } + + if (class == 0x0300) { + rom_add_vga(pdev->romfile); + } else { + rom_add_option(pdev->romfile, -1); + } + return; + } + + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); + if (path == NULL) { + path = g_strdup(pdev->romfile); + } + + size = get_image_size(path); + if (size < 0) { + error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); + g_free(path); + return; + } else if (size == 0) { + error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); + g_free(path); + return; + } + if (size & (size - 1)) { + size = 1 << qemu_fls(size); + } + + vmsd = qdev_get_vmsd(DEVICE(pdev)); + + if (vmsd) { + snprintf(name, sizeof(name), "%s.rom", vmsd->name); + } else { + snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev))); + } + pdev->has_rom = true; + memory_region_init_ram(&pdev->rom, OBJECT(pdev), name, size, &error_abort); + vmstate_register_ram(&pdev->rom, &pdev->qdev); + ptr = memory_region_get_ram_ptr(&pdev->rom); + load_image(path, ptr); + g_free(path); + + if (is_default_rom) { + /* Only the default rom images will be patched (if needed). */ + pci_patch_ids(pdev, ptr, size); + } + + pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); +} + +static void pci_del_option_rom(PCIDevice *pdev) +{ + if (!pdev->has_rom) + return; + + vmstate_unregister_ram(&pdev->rom, &pdev->qdev); + pdev->has_rom = false; +} + +/* + * if offset = 0, + * Find and reserve space and add capability to the linked list + * in pci config space + */ +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, + uint8_t offset, uint8_t size) +{ + int ret; + Error *local_err = NULL; + + ret = pci_add_capability2(pdev, cap_id, offset, size, &local_err); + if (local_err) { + assert(ret < 0); + error_report_err(local_err); + } else { + /* success implies a positive offset in config space */ + assert(ret > 0); + } + return ret; +} + +int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id, + uint8_t offset, uint8_t size, + Error **errp) +{ + uint8_t *config; + int i, overlapping_cap; + + if (!offset) { + offset = pci_find_space(pdev, size); + if (!offset) { + error_setg(errp, "out of PCI config space"); + return -ENOSPC; + } + } else { + /* Verify that capabilities don't overlap. Note: device assignment + * depends on this check to verify that the device is not broken. + * Should never trigger for emulated devices, but it's helpful + * for debugging these. */ + for (i = offset; i < offset + size; i++) { + overlapping_cap = pci_find_capability_at_offset(pdev, i); + if (overlapping_cap) { + error_setg(errp, "%s:%02x:%02x.%x " + "Attempt to add PCI capability %x at offset " + "%x overlaps existing capability %x at offset %x", + pci_root_bus_path(pdev), pci_bus_num(pdev->bus), + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + cap_id, offset, overlapping_cap, i); + return -EINVAL; + } + } + } + + config = pdev->config + offset; + config[PCI_CAP_LIST_ID] = cap_id; + config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST]; + pdev->config[PCI_CAPABILITY_LIST] = offset; + pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST; + memset(pdev->used + offset, 0xFF, QEMU_ALIGN_UP(size, 4)); + /* Make capability read-only by default */ + memset(pdev->wmask + offset, 0, size); + /* Check capability by default */ + memset(pdev->cmask + offset, 0xFF, size); + return offset; +} + +/* Unlink capability from the pci config space. */ +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size) +{ + uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev); + if (!offset) + return; + pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT]; + /* Make capability writable again */ + memset(pdev->wmask + offset, 0xff, size); + memset(pdev->w1cmask + offset, 0, size); + /* Clear cmask as device-specific registers can't be checked */ + memset(pdev->cmask + offset, 0, size); + memset(pdev->used + offset, 0, QEMU_ALIGN_UP(size, 4)); + + if (!pdev->config[PCI_CAPABILITY_LIST]) + pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST; +} + +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id) +{ + return pci_find_capability_list(pdev, cap_id, NULL); +} + +static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent) +{ + PCIDevice *d = (PCIDevice *)dev; + const pci_class_desc *desc; + char ctxt[64]; + PCIIORegion *r; + int i, class; + + class = pci_get_word(d->config + PCI_CLASS_DEVICE); + desc = pci_class_descriptions; + while (desc->desc && class != desc->class) + desc++; + if (desc->desc) { + snprintf(ctxt, sizeof(ctxt), "%s", desc->desc); + } else { + snprintf(ctxt, sizeof(ctxt), "Class %04x", class); + } + + monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, " + "pci id %04x:%04x (sub %04x:%04x)\n", + indent, "", ctxt, pci_bus_num(d->bus), + PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), + pci_get_word(d->config + PCI_VENDOR_ID), + pci_get_word(d->config + PCI_DEVICE_ID), + pci_get_word(d->config + PCI_SUBSYSTEM_VENDOR_ID), + pci_get_word(d->config + PCI_SUBSYSTEM_ID)); + for (i = 0; i < PCI_NUM_REGIONS; i++) { + r = &d->io_regions[i]; + if (!r->size) + continue; + monitor_printf(mon, "%*sbar %d: %s at 0x%"FMT_PCIBUS + " [0x%"FMT_PCIBUS"]\n", + indent, "", + i, r->type & PCI_BASE_ADDRESS_SPACE_IO ? "i/o" : "mem", + r->addr, r->addr + r->size - 1); + } +} + +static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len) +{ + PCIDevice *d = (PCIDevice *)dev; + const char *name = NULL; + const pci_class_desc *desc = pci_class_descriptions; + int class = pci_get_word(d->config + PCI_CLASS_DEVICE); + + while (desc->desc && + (class & ~desc->fw_ign_bits) != + (desc->class & ~desc->fw_ign_bits)) { + desc++; + } + + if (desc->desc) { + name = desc->fw_name; + } + + if (name) { + pstrcpy(buf, len, name); + } else { + snprintf(buf, len, "pci%04x,%04x", + pci_get_word(d->config + PCI_VENDOR_ID), + pci_get_word(d->config + PCI_DEVICE_ID)); + } + + return buf; +} + +static char *pcibus_get_fw_dev_path(DeviceState *dev) +{ + PCIDevice *d = (PCIDevice *)dev; + char path[50], name[33]; + int off; + + off = snprintf(path, sizeof(path), "%s@%x", + pci_dev_fw_name(dev, name, sizeof name), + PCI_SLOT(d->devfn)); + if (PCI_FUNC(d->devfn)) + snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn)); + return g_strdup(path); +} + +static char *pcibus_get_dev_path(DeviceState *dev) +{ + PCIDevice *d = container_of(dev, PCIDevice, qdev); + PCIDevice *t; + int slot_depth; + /* Path format: Domain:00:Slot.Function:Slot.Function....:Slot.Function. + * 00 is added here to make this format compatible with + * domain:Bus:Slot.Func for systems without nested PCI bridges. + * Slot.Function list specifies the slot and function numbers for all + * devices on the path from root to the specific device. */ + const char *root_bus_path; + int root_bus_len; + char slot[] = ":SS.F"; + int slot_len = sizeof slot - 1 /* For '\0' */; + int path_len; + char *path, *p; + int s; + + root_bus_path = pci_root_bus_path(d); + root_bus_len = strlen(root_bus_path); + + /* Calculate # of slots on path between device and root. */; + slot_depth = 0; + for (t = d; t; t = t->bus->parent_dev) { + ++slot_depth; + } + + path_len = root_bus_len + slot_len * slot_depth; + + /* Allocate memory, fill in the terminating null byte. */ + path = g_malloc(path_len + 1 /* For '\0' */); + path[path_len] = '\0'; + + memcpy(path, root_bus_path, root_bus_len); + + /* Fill in slot numbers. We walk up from device to root, so need to print + * them in the reverse order, last to first. */ + p = path + path_len; + for (t = d; t; t = t->bus->parent_dev) { + p -= slot_len; + s = snprintf(slot, sizeof slot, ":%02x.%x", + PCI_SLOT(t->devfn), PCI_FUNC(t->devfn)); + assert(s == slot_len); + memcpy(p, slot, slot_len); + } + + return path; +} + +static int pci_qdev_find_recursive(PCIBus *bus, + const char *id, PCIDevice **pdev) +{ + DeviceState *qdev = qdev_find_recursive(&bus->qbus, id); + if (!qdev) { + return -ENODEV; + } + + /* roughly check if given qdev is pci device */ + if (object_dynamic_cast(OBJECT(qdev), TYPE_PCI_DEVICE)) { + *pdev = PCI_DEVICE(qdev); + return 0; + } + return -EINVAL; +} + +int pci_qdev_find_device(const char *id, PCIDevice **pdev) +{ + PCIHostState *host_bridge; + int rc = -ENODEV; + + QLIST_FOREACH(host_bridge, &pci_host_bridges, next) { + int tmp = pci_qdev_find_recursive(host_bridge->bus, id, pdev); + if (!tmp) { + rc = 0; + break; + } + if (tmp != -ENODEV) { + rc = tmp; + } + } + + return rc; +} + +MemoryRegion *pci_address_space(PCIDevice *dev) +{ + return dev->bus->address_space_mem; +} + +MemoryRegion *pci_address_space_io(PCIDevice *dev) +{ + return dev->bus->address_space_io; +} + +static void pci_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *k = DEVICE_CLASS(klass); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); + + k->realize = pci_qdev_realize; + k->unrealize = pci_qdev_unrealize; + k->bus_type = TYPE_PCI_BUS; + k->props = pci_props; + pc->realize = pci_default_realize; +} + +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +{ + PCIBus *bus = PCI_BUS(dev->bus); + + if (bus->iommu_fn) { + return bus->iommu_fn(bus, bus->iommu_opaque, dev->devfn); + } + + if (bus->parent_dev) { + /** We are ignoring the bus master DMA bit of the bridge + * as it would complicate things such as VFIO for no good reason */ + return pci_device_iommu_address_space(bus->parent_dev); + } + + return &address_space_memory; +} + +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque) +{ + bus->iommu_fn = fn; + bus->iommu_opaque = opaque; +} + +static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) +{ + Range *range = opaque; + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND); + int i; + + if (!(cmd & PCI_COMMAND_MEMORY)) { + return; + } + + if (pc->is_bridge) { + pcibus_t base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + pcibus_t limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + + base = MAX(base, 0x1ULL << 32); + + if (limit >= base) { + Range pref_range; + pref_range.begin = base; + pref_range.end = limit + 1; + range_extend(range, &pref_range); + } + } + for (i = 0; i < PCI_NUM_REGIONS; ++i) { + PCIIORegion *r = &dev->io_regions[i]; + Range region_range; + + if (!r->size || + (r->type & PCI_BASE_ADDRESS_SPACE_IO) || + !(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64)) { + continue; + } + region_range.begin = pci_bar_address(dev, i, r->type, r->size); + region_range.end = region_range.begin + r->size; + + if (region_range.begin == PCI_BAR_UNMAPPED) { + continue; + } + + region_range.begin = MAX(region_range.begin, 0x1ULL << 32); + + if (region_range.end - 1 >= region_range.begin) { + range_extend(range, ®ion_range); + } + } +} + +void pci_bus_get_w64_range(PCIBus *bus, Range *range) +{ + range->begin = range->end = 0; + pci_for_each_device_under_bus(bus, pci_dev_get_w64, range); +} + +static const TypeInfo pci_device_type_info = { + .name = TYPE_PCI_DEVICE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PCIDevice), + .abstract = true, + .class_size = sizeof(PCIDeviceClass), + .class_init = pci_device_class_init, +}; + +static void pci_register_types(void) +{ + type_register_static(&pci_bus_info); + type_register_static(&pcie_bus_info); + type_register_static(&pci_device_type_info); +} + +type_init(pci_register_types) diff --git a/qemu/hw/pci/pci_bridge.c b/qemu/hw/pci/pci_bridge.c new file mode 100644 index 000000000..40c97b155 --- /dev/null +++ b/qemu/hw/pci/pci_bridge.c @@ -0,0 +1,419 @@ +/* + * QEMU PCI bus manager + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to dea + + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM + + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/* + * split out from pci.c + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + */ + +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "qemu/range.h" + +/* PCI bridge subsystem vendor ID helper functions */ +#define PCI_SSVID_SIZEOF 8 +#define PCI_SSVID_SVID 4 +#define PCI_SSVID_SSID 6 + +int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset, + uint16_t svid, uint16_t ssid) +{ + int pos; + pos = pci_add_capability(dev, PCI_CAP_ID_SSVID, offset, PCI_SSVID_SIZEOF); + if (pos < 0) { + return pos; + } + + pci_set_word(dev->config + pos + PCI_SSVID_SVID, svid); + pci_set_word(dev->config + pos + PCI_SSVID_SSID, ssid); + return pos; +} + +/* Accessor function to get parent bridge device from pci bus. */ +PCIDevice *pci_bridge_get_device(PCIBus *bus) +{ + return bus->parent_dev; +} + +/* Accessor function to get secondary bus from pci-to-pci bridge device */ +PCIBus *pci_bridge_get_sec_bus(PCIBridge *br) +{ + return &br->sec_bus; +} + +static uint32_t pci_config_get_io_base(const PCIDevice *d, + uint32_t base, uint32_t base_upper16) +{ + uint32_t val; + + val = ((uint32_t)d->config[base] & PCI_IO_RANGE_MASK) << 8; + if (d->config[base] & PCI_IO_RANGE_TYPE_32) { + val |= (uint32_t)pci_get_word(d->config + base_upper16) << 16; + } + return val; +} + +static pcibus_t pci_config_get_memory_base(const PCIDevice *d, uint32_t base) +{ + return ((pcibus_t)pci_get_word(d->config + base) & PCI_MEMORY_RANGE_MASK) + << 16; +} + +static pcibus_t pci_config_get_pref_base(const PCIDevice *d, + uint32_t base, uint32_t upper) +{ + pcibus_t tmp; + pcibus_t val; + + tmp = (pcibus_t)pci_get_word(d->config + base); + val = (tmp & PCI_PREF_RANGE_MASK) << 16; + if (tmp & PCI_PREF_RANGE_TYPE_64) { + val |= (pcibus_t)pci_get_long(d->config + upper) << 32; + } + return val; +} + +/* accessor function to get bridge filtering base address */ +pcibus_t pci_bridge_get_base(const PCIDevice *bridge, uint8_t type) +{ + pcibus_t base; + if (type & PCI_BASE_ADDRESS_SPACE_IO) { + base = pci_config_get_io_base(bridge, + PCI_IO_BASE, PCI_IO_BASE_UPPER16); + } else { + if (type & PCI_BASE_ADDRESS_MEM_PREFETCH) { + base = pci_config_get_pref_base( + bridge, PCI_PREF_MEMORY_BASE, PCI_PREF_BASE_UPPER32); + } else { + base = pci_config_get_memory_base(bridge, PCI_MEMORY_BASE); + } + } + + return base; +} + +/* accessor funciton to get bridge filtering limit */ +pcibus_t pci_bridge_get_limit(const PCIDevice *bridge, uint8_t type) +{ + pcibus_t limit; + if (type & PCI_BASE_ADDRESS_SPACE_IO) { + limit = pci_config_get_io_base(bridge, + PCI_IO_LIMIT, PCI_IO_LIMIT_UPPER16); + limit |= 0xfff; /* PCI bridge spec 3.2.5.6. */ + } else { + if (type & PCI_BASE_ADDRESS_MEM_PREFETCH) { + limit = pci_config_get_pref_base( + bridge, PCI_PREF_MEMORY_LIMIT, PCI_PREF_LIMIT_UPPER32); + } else { + limit = pci_config_get_memory_base(bridge, PCI_MEMORY_LIMIT); + } + limit |= 0xfffff; /* PCI bridge spec 3.2.5.{1, 8}. */ + } + return limit; +} + +static void pci_bridge_init_alias(PCIBridge *bridge, MemoryRegion *alias, + uint8_t type, const char *name, + MemoryRegion *space, + MemoryRegion *parent_space, + bool enabled) +{ + PCIDevice *bridge_dev = PCI_DEVICE(bridge); + pcibus_t base = pci_bridge_get_base(bridge_dev, type); + pcibus_t limit = pci_bridge_get_limit(bridge_dev, type); + /* TODO: this doesn't handle base = 0 limit = 2^64 - 1 correctly. + * Apparently no way to do this with existing memory APIs. */ + pcibus_t size = enabled && limit >= base ? limit + 1 - base : 0; + + memory_region_init_alias(alias, OBJECT(bridge), name, space, base, size); + memory_region_add_subregion_overlap(parent_space, base, alias, 1); +} + +static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent, + MemoryRegion *alias_vga) +{ + PCIDevice *pd = PCI_DEVICE(br); + uint16_t brctl = pci_get_word(pd->config + PCI_BRIDGE_CONTROL); + + memory_region_init_alias(&alias_vga[QEMU_PCI_VGA_IO_LO], OBJECT(br), + "pci_bridge_vga_io_lo", &br->address_space_io, + QEMU_PCI_VGA_IO_LO_BASE, QEMU_PCI_VGA_IO_LO_SIZE); + memory_region_init_alias(&alias_vga[QEMU_PCI_VGA_IO_HI], OBJECT(br), + "pci_bridge_vga_io_hi", &br->address_space_io, + QEMU_PCI_VGA_IO_HI_BASE, QEMU_PCI_VGA_IO_HI_SIZE); + memory_region_init_alias(&alias_vga[QEMU_PCI_VGA_MEM], OBJECT(br), + "pci_bridge_vga_mem", &br->address_space_mem, + QEMU_PCI_VGA_MEM_BASE, QEMU_PCI_VGA_MEM_SIZE); + + if (brctl & PCI_BRIDGE_CTL_VGA) { + pci_register_vga(pd, &alias_vga[QEMU_PCI_VGA_MEM], + &alias_vga[QEMU_PCI_VGA_IO_LO], + &alias_vga[QEMU_PCI_VGA_IO_HI]); + } +} + +static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br) +{ + PCIDevice *pd = PCI_DEVICE(br); + PCIBus *parent = pd->bus; + PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1); + uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND); + + pci_bridge_init_alias(br, &w->alias_pref_mem, + PCI_BASE_ADDRESS_MEM_PREFETCH, + "pci_bridge_pref_mem", + &br->address_space_mem, + parent->address_space_mem, + cmd & PCI_COMMAND_MEMORY); + pci_bridge_init_alias(br, &w->alias_mem, + PCI_BASE_ADDRESS_SPACE_MEMORY, + "pci_bridge_mem", + &br->address_space_mem, + parent->address_space_mem, + cmd & PCI_COMMAND_MEMORY); + pci_bridge_init_alias(br, &w->alias_io, + PCI_BASE_ADDRESS_SPACE_IO, + "pci_bridge_io", + &br->address_space_io, + parent->address_space_io, + cmd & PCI_COMMAND_IO); + + pci_bridge_init_vga_aliases(br, parent, w->alias_vga); + + return w; +} + +static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w) +{ + PCIDevice *pd = PCI_DEVICE(br); + PCIBus *parent = pd->bus; + + memory_region_del_subregion(parent->address_space_io, &w->alias_io); + memory_region_del_subregion(parent->address_space_mem, &w->alias_mem); + memory_region_del_subregion(parent->address_space_mem, &w->alias_pref_mem); + pci_unregister_vga(pd); +} + +static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w) +{ + object_unparent(OBJECT(&w->alias_io)); + object_unparent(OBJECT(&w->alias_mem)); + object_unparent(OBJECT(&w->alias_pref_mem)); + object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_LO])); + object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_HI])); + object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_MEM])); + g_free(w); +} + +void pci_bridge_update_mappings(PCIBridge *br) +{ + PCIBridgeWindows *w = br->windows; + + /* Make updates atomic to: handle the case of one VCPU updating the bridge + * while another accesses an unaffected region. */ + memory_region_transaction_begin(); + pci_bridge_region_del(br, br->windows); + br->windows = pci_bridge_region_init(br); + memory_region_transaction_commit(); + pci_bridge_region_cleanup(br, w); +} + +/* default write_config function for PCI-to-PCI bridge */ +void pci_bridge_write_config(PCIDevice *d, + uint32_t address, uint32_t val, int len) +{ + PCIBridge *s = PCI_BRIDGE(d); + uint16_t oldctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); + uint16_t newctl; + + pci_default_write_config(d, address, val, len); + + if (ranges_overlap(address, len, PCI_COMMAND, 2) || + + /* io base/limit */ + ranges_overlap(address, len, PCI_IO_BASE, 2) || + + /* memory base/limit, prefetchable base/limit and + io base/limit upper 16 */ + ranges_overlap(address, len, PCI_MEMORY_BASE, 20) || + + /* vga enable */ + ranges_overlap(address, len, PCI_BRIDGE_CONTROL, 2)) { + pci_bridge_update_mappings(s); + } + + newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); + if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) { + /* Trigger hot reset on 0->1 transition. */ + qbus_reset_all(&s->sec_bus.qbus); + } +} + +void pci_bridge_disable_base_limit(PCIDevice *dev) +{ + uint8_t *conf = dev->config; + + pci_byte_test_and_set_mask(conf + PCI_IO_BASE, + PCI_IO_RANGE_MASK & 0xff); + pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT, + PCI_IO_RANGE_MASK & 0xff); + pci_word_test_and_set_mask(conf + PCI_MEMORY_BASE, + PCI_MEMORY_RANGE_MASK & 0xffff); + pci_word_test_and_clear_mask(conf + PCI_MEMORY_LIMIT, + PCI_MEMORY_RANGE_MASK & 0xffff); + pci_word_test_and_set_mask(conf + PCI_PREF_MEMORY_BASE, + PCI_PREF_RANGE_MASK & 0xffff); + pci_word_test_and_clear_mask(conf + PCI_PREF_MEMORY_LIMIT, + PCI_PREF_RANGE_MASK & 0xffff); + pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0); + pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0); +} + +/* reset bridge specific configuration registers */ +void pci_bridge_reset(DeviceState *qdev) +{ + PCIDevice *dev = PCI_DEVICE(qdev); + uint8_t *conf = dev->config; + + conf[PCI_PRIMARY_BUS] = 0; + conf[PCI_SECONDARY_BUS] = 0; + conf[PCI_SUBORDINATE_BUS] = 0; + conf[PCI_SEC_LATENCY_TIMER] = 0; + + /* + * the default values for base/limit registers aren't specified + * in the PCI-to-PCI-bridge spec. So we don't thouch them here. + * Each implementation can override it. + * typical implementation does + * zero base/limit registers or + * disable forwarding: pci_bridge_disable_base_limit() + * If disable forwarding is wanted, call pci_bridge_disable_base_limit() + * after this function. + */ + pci_byte_test_and_clear_mask(conf + PCI_IO_BASE, + PCI_IO_RANGE_MASK & 0xff); + pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT, + PCI_IO_RANGE_MASK & 0xff); + pci_word_test_and_clear_mask(conf + PCI_MEMORY_BASE, + PCI_MEMORY_RANGE_MASK & 0xffff); + pci_word_test_and_clear_mask(conf + PCI_MEMORY_LIMIT, + PCI_MEMORY_RANGE_MASK & 0xffff); + pci_word_test_and_clear_mask(conf + PCI_PREF_MEMORY_BASE, + PCI_PREF_RANGE_MASK & 0xffff); + pci_word_test_and_clear_mask(conf + PCI_PREF_MEMORY_LIMIT, + PCI_PREF_RANGE_MASK & 0xffff); + pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0); + pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0); + + pci_set_word(conf + PCI_BRIDGE_CONTROL, 0); +} + +/* default qdev initialization function for PCI-to-PCI bridge */ +int pci_bridge_initfn(PCIDevice *dev, const char *typename) +{ + PCIBus *parent = dev->bus; + PCIBridge *br = PCI_BRIDGE(dev); + PCIBus *sec_bus = &br->sec_bus; + + pci_word_test_and_set_mask(dev->config + PCI_STATUS, + PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK); + + /* + * TODO: We implement VGA Enable in the Bridge Control Register + * therefore per the PCI to PCI bridge spec we must also implement + * VGA Palette Snooping. When done, set this bit writable: + * + * pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, + * PCI_COMMAND_VGA_PALETTE); + */ + + pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_PCI); + dev->config[PCI_HEADER_TYPE] = + (dev->config[PCI_HEADER_TYPE] & PCI_HEADER_TYPE_MULTI_FUNCTION) | + PCI_HEADER_TYPE_BRIDGE; + pci_set_word(dev->config + PCI_SEC_STATUS, + PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK); + + /* + * If we don't specify the name, the bus will be addressed as <id>.0, where + * id is the device id. + * Since PCI Bridge devices have a single bus each, we don't need the index: + * let users address the bus using the device name. + */ + if (!br->bus_name && dev->qdev.id && *dev->qdev.id) { + br->bus_name = dev->qdev.id; + } + + qbus_create_inplace(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev), + br->bus_name); + sec_bus->parent_dev = dev; + sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn; + sec_bus->address_space_mem = &br->address_space_mem; + memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); + sec_bus->address_space_io = &br->address_space_io; + memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536); + br->windows = pci_bridge_region_init(br); + QLIST_INIT(&sec_bus->child); + QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); + return 0; +} + +/* default qdev clean up function for PCI-to-PCI bridge */ +void pci_bridge_exitfn(PCIDevice *pci_dev) +{ + PCIBridge *s = PCI_BRIDGE(pci_dev); + assert(QLIST_EMPTY(&s->sec_bus.child)); + QLIST_REMOVE(&s->sec_bus, sibling); + pci_bridge_region_del(s, s->windows); + pci_bridge_region_cleanup(s, s->windows); + /* object_unparent() is called automatically during device deletion */ +} + +/* + * before qdev initialization(qdev_init()), this function sets bus_name and + * map_irq callback which are necessry for pci_bridge_initfn() to + * initialize bus. + */ +void pci_bridge_map_irq(PCIBridge *br, const char* bus_name, + pci_map_irq_fn map_irq) +{ + br->map_irq = map_irq; + br->bus_name = bus_name; +} + +static const TypeInfo pci_bridge_type_info = { + .name = TYPE_PCI_BRIDGE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIBridge), + .abstract = true, +}; + +static void pci_bridge_register_types(void) +{ + type_register_static(&pci_bridge_type_info); +} + +type_init(pci_bridge_register_types) diff --git a/qemu/hw/pci/pci_host.c b/qemu/hw/pci/pci_host.c new file mode 100644 index 000000000..3e26f9256 --- /dev/null +++ b/qemu/hw/pci/pci_host.c @@ -0,0 +1,191 @@ +/* + * pci_host.c + * + * Copyright (c) 2009 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "trace.h" + +/* debug PCI */ +//#define DEBUG_PCI + +#ifdef DEBUG_PCI +#define PCI_DPRINTF(fmt, ...) \ +do { printf("pci_host_data: " fmt , ## __VA_ARGS__); } while (0) +#else +#define PCI_DPRINTF(fmt, ...) +#endif + +/* + * PCI address + * bit 16 - 24: bus number + * bit 8 - 15: devfun number + * bit 0 - 7: offset in configuration space of a given pci device + */ + +/* the helper function to get a PCIDevice* for a given pci address */ +static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr) +{ + uint8_t bus_num = addr >> 16; + uint8_t devfn = addr >> 8; + + return pci_find_device(bus, bus_num, devfn); +} + +void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, + uint32_t limit, uint32_t val, uint32_t len) +{ + assert(len <= 4); + trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn), addr, val); + pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr)); +} + +uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, + uint32_t limit, uint32_t len) +{ + uint32_t ret; + + assert(len <= 4); + ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr)); + trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn), addr, ret); + + return ret; +} + +void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len) +{ + PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr); + uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1); + + if (!pci_dev) { + return; + } + + PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n", + __func__, pci_dev->name, config_addr, val, len); + pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE, + val, len); +} + +uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len) +{ + PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr); + uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1); + uint32_t val; + + if (!pci_dev) { + return ~0x0; + } + + val = pci_host_config_read_common(pci_dev, config_addr, + PCI_CONFIG_SPACE_SIZE, len); + PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n", + __func__, pci_dev->name, config_addr, val, len); + + return val; +} + +static void pci_host_config_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + + PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx64"\n", + __func__, addr, len, val); + if (addr != 0 || len != 4) { + return; + } + s->config_reg = val; +} + +static uint64_t pci_host_config_read(void *opaque, hwaddr addr, + unsigned len) +{ + PCIHostState *s = opaque; + uint32_t val = s->config_reg; + + PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx32"\n", + __func__, addr, len, val); + return val; +} + +static void pci_host_data_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + PCI_DPRINTF("write addr " TARGET_FMT_plx " len %d val %x\n", + addr, len, (unsigned)val); + if (s->config_reg & (1u << 31)) + pci_data_write(s->bus, s->config_reg | (addr & 3), val, len); +} + +static uint64_t pci_host_data_read(void *opaque, + hwaddr addr, unsigned len) +{ + PCIHostState *s = opaque; + uint32_t val; + if (!(s->config_reg & (1U << 31))) { + return 0xffffffff; + } + val = pci_data_read(s->bus, s->config_reg | (addr & 3), len); + PCI_DPRINTF("read addr " TARGET_FMT_plx " len %d val %x\n", + addr, len, val); + return val; +} + +const MemoryRegionOps pci_host_conf_le_ops = { + .read = pci_host_config_read, + .write = pci_host_config_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +const MemoryRegionOps pci_host_conf_be_ops = { + .read = pci_host_config_read, + .write = pci_host_config_write, + .endianness = DEVICE_BIG_ENDIAN, +}; + +const MemoryRegionOps pci_host_data_le_ops = { + .read = pci_host_data_read, + .write = pci_host_data_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +const MemoryRegionOps pci_host_data_be_ops = { + .read = pci_host_data_read, + .write = pci_host_data_write, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static const TypeInfo pci_host_type_info = { + .name = TYPE_PCI_HOST_BRIDGE, + .parent = TYPE_SYS_BUS_DEVICE, + .abstract = true, + .class_size = sizeof(PCIHostBridgeClass), + .instance_size = sizeof(PCIHostState), +}; + +static void pci_host_register_types(void) +{ + type_register_static(&pci_host_type_info); +} + +type_init(pci_host_register_types) diff --git a/qemu/hw/pci/pcie.c b/qemu/hw/pci/pcie.c new file mode 100644 index 000000000..6e28985bd --- /dev/null +++ b/qemu/hw/pci/pcie.c @@ -0,0 +1,634 @@ +/* + * pcie.c + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu-common.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pcie.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pcie_regs.h" +#include "qemu/range.h" + +//#define DEBUG_PCIE +#ifdef DEBUG_PCIE +# define PCIE_DPRINTF(fmt, ...) \ + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) +#else +# define PCIE_DPRINTF(fmt, ...) do {} while (0) +#endif +#define PCIE_DEV_PRINTF(dev, fmt, ...) \ + PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) + + +/*************************************************************************** + * pci express capability helper functions + */ +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) +{ + int pos; + uint8_t *exp_cap; + + assert(pci_is_express(dev)); + + pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, + PCI_EXP_VER2_SIZEOF); + if (pos < 0) { + return pos; + } + dev->exp.exp_cap = pos; + exp_cap = dev->config + pos; + + /* capability register + interrupt message number defaults to 0 */ + pci_set_word(exp_cap + PCI_EXP_FLAGS, + ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) | + PCI_EXP_FLAGS_VER2); + + /* device capability register + * table 7-12: + * roll based error reporting bit must be set by all + * Functions conforming to the ECN, PCI Express Base + * Specification, Revision 1.1., or subsequent PCI Express Base + * Specification revisions. + */ + pci_set_long(exp_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER); + + pci_set_long(exp_cap + PCI_EXP_LNKCAP, + (port << PCI_EXP_LNKCAP_PN_SHIFT) | + PCI_EXP_LNKCAP_ASPMS_0S | + PCI_EXP_LNK_MLW_1 | + PCI_EXP_LNK_LS_25); + + pci_set_word(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA); + + pci_set_long(exp_cap + PCI_EXP_DEVCAP2, + PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); + + pci_set_word(dev->wmask + pos + PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_EETLPPB); + return pos; +} + +int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) +{ + uint8_t type = PCI_EXP_TYPE_ENDPOINT; + + /* + * Windows guests will report Code 10, device cannot start, if + * a regular Endpoint type is exposed on a root complex. These + * should instead be Root Complex Integrated Endpoints. + */ + if (pci_bus_is_express(dev->bus) && pci_bus_is_root(dev->bus)) { + type = PCI_EXP_TYPE_RC_END; + } + + return pcie_cap_init(dev, offset, type, 0); +} + +void pcie_cap_exit(PCIDevice *dev) +{ + pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF); +} + +uint8_t pcie_cap_get_type(const PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + assert(pos > 0); + return (pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & + PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT; +} + +/* MSI/MSI-X */ +/* pci express interrupt message number */ +/* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */ +void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector) +{ + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + assert(vector < 32); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_FLAGS, PCI_EXP_FLAGS_IRQ); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_FLAGS, + vector << PCI_EXP_FLAGS_IRQ_SHIFT); +} + +uint8_t pcie_cap_flags_get_vector(PCIDevice *dev) +{ + return (pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_FLAGS) & + PCI_EXP_FLAGS_IRQ) >> PCI_EXP_FLAGS_IRQ_SHIFT; +} + +void pcie_cap_deverr_init(PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_DEVCAP, + PCI_EXP_DEVCAP_RBER); + pci_long_test_and_set_mask(dev->wmask + pos + PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE); + pci_long_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED | + PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD); +} + +void pcie_cap_deverr_reset(PCIDevice *dev) +{ + uint8_t *devctl = dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL; + pci_long_test_and_clear_mask(devctl, + PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE); +} + +static void hotplug_event_update_event_status(PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + uint8_t *exp_cap = dev->config + pos; + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); + + dev->exp.hpev_notified = (sltctl & PCI_EXP_SLTCTL_HPIE) && + (sltsta & sltctl & PCI_EXP_HP_EV_SUPPORTED); +} + +static void hotplug_event_notify(PCIDevice *dev) +{ + bool prev = dev->exp.hpev_notified; + + hotplug_event_update_event_status(dev); + + if (prev == dev->exp.hpev_notified) { + return; + } + + /* Note: the logic above does not take into account whether interrupts + * are masked. The result is that interrupt will be sent when it is + * subsequently unmasked. This appears to be legal: Section 6.7.3.4: + * The Port may optionally send an MSI when there are hot-plug events that + * occur while interrupt generation is disabled, and interrupt generation is + * subsequently enabled. */ + if (msix_enabled(dev)) { + msix_notify(dev, pcie_cap_flags_get_vector(dev)); + } else if (msi_enabled(dev)) { + msi_notify(dev, pcie_cap_flags_get_vector(dev)); + } else { + pci_set_irq(dev, dev->exp.hpev_notified); + } +} + +static void hotplug_event_clear(PCIDevice *dev) +{ + hotplug_event_update_event_status(dev); + if (!msix_enabled(dev) && !msi_enabled(dev) && !dev->exp.hpev_notified) { + pci_irq_deassert(dev); + } +} + +/* + * A PCI Express Hot-Plug Event has occurred, so update slot status register + * and notify OS of the event if necessary. + * + * 6.7.3 PCI Express Hot-Plug Events + * 6.7.3.4 Software Notification of Hot-Plug Events + */ +static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event) +{ + /* Minor optimization: if nothing changed - no event is needed. */ + if (pci_word_test_and_set_mask(dev->config + dev->exp.exp_cap + + PCI_EXP_SLTSTA, event)) { + return; + } + hotplug_event_notify(dev); +} + +static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev, + DeviceState *dev, + uint8_t **exp_cap, Error **errp) +{ + *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; + uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA); + + PCIE_DEV_PRINTF(PCI_DEVICE(dev), "hotplug state: 0x%x\n", sltsta); + if (sltsta & PCI_EXP_SLTSTA_EIS) { + /* the slot is electromechanically locked. + * This error is propagated up to qdev and then to HMP/QMP. + */ + error_setg_errno(errp, EBUSY, "slot is electromechanically locked"); + } +} + +void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + uint8_t *exp_cap; + PCIDevice *pci_dev = PCI_DEVICE(dev); + + pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + + /* Don't send event when device is enabled during qemu machine creation: + * it is present on boot, no hotplug event is necessary. We do send an + * event when the device is disabled later. */ + if (!dev->hotplugged) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + return; + } + + /* TODO: multifunction hot-plug. + * Right now, only a device of function = 0 is allowed to be + * hot plugged/unplugged. + */ + assert(PCI_FUNC(pci_dev->devfn) == 0); + + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), + PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); +} + +void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + uint8_t *exp_cap; + + pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + + pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); +} + +/* pci express slot for pci express root/downstream port + PCI express capability slot registers */ +void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) +{ + uint32_t pos = dev->exp.exp_cap; + + pci_word_test_and_set_mask(dev->config + pos + PCI_EXP_FLAGS, + PCI_EXP_FLAGS_SLOT); + + pci_long_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCAP, + ~PCI_EXP_SLTCAP_PSN); + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, + (slot << PCI_EXP_SLTCAP_PSN_SHIFT) | + PCI_EXP_SLTCAP_EIP | + PCI_EXP_SLTCAP_HPS | + PCI_EXP_SLTCAP_HPC | + PCI_EXP_SLTCAP_PIP | + PCI_EXP_SLTCAP_AIP | + PCI_EXP_SLTCAP_ABP); + + if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, + PCI_EXP_SLTCAP_PCP); + pci_word_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } + + pci_word_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PIC | + PCI_EXP_SLTCTL_AIC); + pci_word_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PIC_OFF | + PCI_EXP_SLTCTL_AIC_OFF); + pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PIC | + PCI_EXP_SLTCTL_AIC | + PCI_EXP_SLTCTL_HPIE | + PCI_EXP_SLTCTL_CCIE | + PCI_EXP_SLTCTL_PDCE | + PCI_EXP_SLTCTL_ABPE); + /* Although reading PCI_EXP_SLTCTL_EIC returns always 0, + * make the bit writable here in order to detect 1b is written. + * pcie_cap_slot_write_config() test-and-clear the bit, so + * this bit always returns 0 to the guest. + */ + pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_EIC); + + pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, + PCI_EXP_HP_EV_SUPPORTED); + + dev->exp.hpev_notified = false; + + qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), + DEVICE(dev), NULL); +} + +void pcie_cap_slot_reset(PCIDevice *dev) +{ + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint8_t port_type = pcie_cap_get_type(dev); + + assert(port_type == PCI_EXP_TYPE_DOWNSTREAM || + port_type == PCI_EXP_TYPE_ROOT_PORT); + + PCIE_DEV_PRINTF(dev, "reset\n"); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_EIC | + PCI_EXP_SLTCTL_PIC | + PCI_EXP_SLTCTL_AIC | + PCI_EXP_SLTCTL_HPIE | + PCI_EXP_SLTCTL_CCIE | + PCI_EXP_SLTCTL_PDCE | + PCI_EXP_SLTCTL_ABPE); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_AIC_OFF); + + if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { + /* Downstream ports enforce device number 0. */ + bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0]; + uint16_t pic; + + if (populated) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } else { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } + + pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF; + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic); + } + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_EIS |/* on reset, + the lock is released */ + PCI_EXP_SLTSTA_CC | + PCI_EXP_SLTSTA_PDC | + PCI_EXP_SLTSTA_ABP); + + hotplug_event_update_event_status(dev); +} + +static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + object_unparent(OBJECT(dev)); +} + +void pcie_cap_slot_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len) +{ + uint32_t pos = dev->exp.exp_cap; + uint8_t *exp_cap = dev->config + pos; + uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); + + if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { + hotplug_event_clear(dev); + } + + if (!ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) { + return; + } + + if (pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_EIC)) { + sltsta ^= PCI_EXP_SLTSTA_EIS; /* toggle PCI_EXP_SLTSTA_EIS bit */ + pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta); + PCIE_DEV_PRINTF(dev, "PCI_EXP_SLTCTL_EIC: " + "sltsta -> 0x%02"PRIx16"\n", + sltsta); + } + + /* + * If the slot is polulated, power indicator is off and power + * controller is off, it is safe to detach the devices. + */ + if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && + ((val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF)) { + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); + } + + hotplug_event_notify(dev); + + /* + * 6.7.3.2 Command Completed Events + * + * Software issues a command to a hot-plug capable Downstream Port by + * issuing a write transaction that targets any portion of the Port’s Slot + * Control register. A single write to the Slot Control register is + * considered to be a single command, even if the write affects more than + * one field in the Slot Control register. In response to this transaction, + * the Port must carry out the requested actions and then set the + * associated status field for the command completed event. */ + + /* Real hardware might take a while to complete requested command because + * physical movement would be involved like locking the electromechanical + * lock. However in our case, command is completed instantaneously above, + * so send a command completion event right now. + */ + pcie_cap_slot_event(dev, PCI_EXP_HP_EV_CCI); +} + +int pcie_cap_slot_post_load(void *opaque, int version_id) +{ + PCIDevice *dev = opaque; + hotplug_event_update_event_status(dev); + return 0; +} + +void pcie_cap_slot_push_attention_button(PCIDevice *dev) +{ + pcie_cap_slot_event(dev, PCI_EXP_HP_EV_ABP); +} + +/* root control/capabilities/status. PME isn't emulated for now */ +void pcie_cap_root_init(PCIDevice *dev) +{ + pci_set_word(dev->wmask + dev->exp.exp_cap + PCI_EXP_RTCTL, + PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE | + PCI_EXP_RTCTL_SEFEE); +} + +void pcie_cap_root_reset(PCIDevice *dev) +{ + pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_RTCTL, 0); +} + +/* function level reset(FLR) */ +void pcie_cap_flr_init(PCIDevice *dev) +{ + pci_long_test_and_set_mask(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP, + PCI_EXP_DEVCAP_FLR); + + /* Although reading BCR_FLR returns always 0, + * the bit is made writable here in order to detect the 1b is written + * pcie_cap_flr_write_config() test-and-clear the bit, so + * this bit always returns 0 to the guest. + */ + pci_word_test_and_set_mask(dev->wmask + dev->exp.exp_cap + PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR); +} + +void pcie_cap_flr_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len) +{ + uint8_t *devctl = dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL; + if (pci_get_word(devctl) & PCI_EXP_DEVCTL_BCR_FLR) { + /* Clear PCI_EXP_DEVCTL_BCR_FLR after invoking the reset handler + so the handler can detect FLR by looking at this bit. */ + pci_device_reset(dev); + pci_word_test_and_clear_mask(devctl, PCI_EXP_DEVCTL_BCR_FLR); + } +} + +/* Alternative Routing-ID Interpretation (ARI) + * forwarding support for root and downstream ports + */ +void pcie_cap_arifwd_init(PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_DEVCAP2, + PCI_EXP_DEVCAP2_ARI); + pci_long_test_and_set_mask(dev->wmask + pos + PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ARI); +} + +void pcie_cap_arifwd_reset(PCIDevice *dev) +{ + uint8_t *devctl2 = dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2; + pci_long_test_and_clear_mask(devctl2, PCI_EXP_DEVCTL2_ARI); +} + +bool pcie_cap_is_arifwd_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev)) { + return false; + } + if (!dev->exp.exp_cap) { + return false; + } + + return pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) & + PCI_EXP_DEVCTL2_ARI; +} + +/************************************************************************** + * pci express extended capability list management functions + * uint16_t ext_cap_id (16 bit) + * uint8_t cap_ver (4 bit) + * uint16_t cap_offset (12 bit) + * uint16_t ext_cap_size + */ + +static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id, + uint16_t *prev_p) +{ + uint16_t prev = 0; + uint16_t next; + uint32_t header = pci_get_long(dev->config + PCI_CONFIG_SPACE_SIZE); + + if (!header) { + /* no extended capability */ + next = 0; + goto out; + } + for (next = PCI_CONFIG_SPACE_SIZE; next; + prev = next, next = PCI_EXT_CAP_NEXT(header)) { + + assert(next >= PCI_CONFIG_SPACE_SIZE); + assert(next <= PCIE_CONFIG_SPACE_SIZE - 8); + + header = pci_get_long(dev->config + next); + if (PCI_EXT_CAP_ID(header) == cap_id) { + break; + } + } + +out: + if (prev_p) { + *prev_p = prev; + } + return next; +} + +uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id) +{ + return pcie_find_capability_list(dev, cap_id, NULL); +} + +static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next) +{ + uint32_t header = pci_get_long(dev->config + pos); + assert(!(next & (PCI_EXT_CAP_ALIGN - 1))); + header = (header & ~PCI_EXT_CAP_NEXT_MASK) | + ((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK); + pci_set_long(dev->config + pos, header); +} + +/* + * caller must supply valid (offset, size) * such that the range shouldn't + * overlap with other capability or other registers. + * This function doesn't check it. + */ +void pcie_add_capability(PCIDevice *dev, + uint16_t cap_id, uint8_t cap_ver, + uint16_t offset, uint16_t size) +{ + uint32_t header; + uint16_t next; + + assert(offset >= PCI_CONFIG_SPACE_SIZE); + assert(offset < offset + size); + assert(offset + size < PCIE_CONFIG_SPACE_SIZE); + assert(size >= 8); + assert(pci_is_express(dev)); + + if (offset == PCI_CONFIG_SPACE_SIZE) { + header = pci_get_long(dev->config + offset); + next = PCI_EXT_CAP_NEXT(header); + } else { + uint16_t prev; + + /* 0 is reserved cap id. use internally to find the last capability + in the linked list */ + next = pcie_find_capability_list(dev, 0, &prev); + + assert(prev >= PCI_CONFIG_SPACE_SIZE); + assert(next == 0); + pcie_ext_cap_set_next(dev, prev, offset); + } + pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next)); + + /* Make capability read-only by default */ + memset(dev->wmask + offset, 0, size); + memset(dev->w1cmask + offset, 0, size); + /* Check capability by default */ + memset(dev->cmask + offset, 0xFF, size); +} + +/************************************************************************** + * pci express extended capability helper functions + */ + +/* ARI */ +void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn) +{ + pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER, + offset, PCI_ARI_SIZEOF); + pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8); +} diff --git a/qemu/hw/pci/pcie_aer.c b/qemu/hw/pci/pcie_aer.c new file mode 100644 index 000000000..f1847ac21 --- /dev/null +++ b/qemu/hw/pci/pcie_aer.c @@ -0,0 +1,1042 @@ +/* + * pcie_aer.c + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "sysemu/sysemu.h" +#include "qapi/qmp/types.h" +#include "monitor/monitor.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pcie.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pcie_regs.h" + +//#define DEBUG_PCIE +#ifdef DEBUG_PCIE +# define PCIE_DPRINTF(fmt, ...) \ + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) +#else +# define PCIE_DPRINTF(fmt, ...) do {} while (0) +#endif +#define PCIE_DEV_PRINTF(dev, fmt, ...) \ + PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) + +#define PCI_ERR_SRC_COR_OFFS 0 +#define PCI_ERR_SRC_UNCOR_OFFS 2 + +/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */ +static uint32_t pcie_aer_uncor_default_severity(uint32_t status) +{ + switch (status) { + case PCI_ERR_UNC_INTN: + case PCI_ERR_UNC_DLP: + case PCI_ERR_UNC_SDN: + case PCI_ERR_UNC_RX_OVER: + case PCI_ERR_UNC_FCP: + case PCI_ERR_UNC_MALF_TLP: + return PCI_ERR_ROOT_CMD_FATAL_EN; + case PCI_ERR_UNC_POISON_TLP: + case PCI_ERR_UNC_ECRC: + case PCI_ERR_UNC_UNSUP: + case PCI_ERR_UNC_COMP_TIME: + case PCI_ERR_UNC_COMP_ABORT: + case PCI_ERR_UNC_UNX_COMP: + case PCI_ERR_UNC_ACSV: + case PCI_ERR_UNC_MCBTLP: + case PCI_ERR_UNC_ATOP_EBLOCKED: + case PCI_ERR_UNC_TLP_PRF_BLOCKED: + return PCI_ERR_ROOT_CMD_NONFATAL_EN; + default: + abort(); + break; + } + return PCI_ERR_ROOT_CMD_FATAL_EN; +} + +static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err) +{ + if (aer_log->log_num == aer_log->log_max) { + return -1; + } + memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err); + aer_log->log_num++; + return 0; +} + +static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err) +{ + assert(aer_log->log_num); + *err = aer_log->log[0]; + aer_log->log_num--; + memmove(&aer_log->log[0], &aer_log->log[1], + aer_log->log_num * sizeof *err); +} + +static void aer_log_clear_all_err(PCIEAERLog *aer_log) +{ + aer_log->log_num = 0; +} + +int pcie_aer_init(PCIDevice *dev, uint16_t offset) +{ + PCIExpressDevice *exp; + + pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER, + offset, PCI_ERR_SIZEOF); + exp = &dev->exp; + exp->aer_cap = offset; + + /* log_max is property */ + if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) { + dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT; + } + /* clip down the value to avoid unreasobale memory usage */ + if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) { + return -EINVAL; + } + dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] * + dev->exp.aer_log.log_max); + + pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, + PCI_ERR_UNC_SUPPORTED); + + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SEVERITY_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SUPPORTED); + + pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS, + PCI_ERR_COR_SUPPORTED); + + pci_set_long(dev->config + offset + PCI_ERR_COR_MASK, + PCI_ERR_COR_MASK_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK, + PCI_ERR_COR_SUPPORTED); + + /* capabilities and control. multiple header logging is supported */ + if (dev->exp.aer_log.log_max > 0) { + pci_set_long(dev->config + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC | + PCI_ERR_CAP_MHRC); + pci_set_long(dev->wmask + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE | + PCI_ERR_CAP_MHRE); + } else { + pci_set_long(dev->config + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC); + pci_set_long(dev->wmask + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + } + + switch (pcie_cap_get_type(dev)) { + case PCI_EXP_TYPE_ROOT_PORT: + /* this case will be set by pcie_aer_root_init() */ + /* fallthrough */ + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_UPSTREAM: + pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_SERR); + pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS, + PCI_SEC_STATUS_RCV_SYSTEM_ERROR); + break; + default: + /* nothing */ + break; + } + return 0; +} + +void pcie_aer_exit(PCIDevice *dev) +{ + g_free(dev->exp.aer_log.log); +} + +static void pcie_aer_update_uncor_status(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + PCIEAERLog *aer_log = &dev->exp.aer_log; + + uint16_t i; + for (i = 0; i < aer_log->log_num; i++) { + pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS, + dev->exp.aer_log.log[i].status); + } +} + +/* + * return value: + * true: error message needs to be sent up + * false: error message is masked + * + * 6.2.6 Error Message Control + * Figure 6-3 + * all pci express devices part + */ +static bool +pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg) +{ + if (!(pcie_aer_msg_is_uncor(msg) && + (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) { + return false; + } + + /* Signaled System Error + * + * 7.5.1.1 Command register + * Bit 8 SERR# Enable + * + * When Set, this bit enables reporting of Non-fatal and Fatal + * errors detected by the Function to the Root Complex. Note that + * errors are reported if enabled either through this bit or through + * the PCI Express specific bits in the Device Control register (see + * Section 7.8.4). + */ + pci_word_test_and_set_mask(dev->config + PCI_STATUS, + PCI_STATUS_SIG_SYSTEM_ERROR); + + if (!(msg->severity & + pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) { + return false; + } + + /* send up error message */ + return true; +} + +/* + * return value: + * true: error message is sent up + * false: error message is masked + * + * 6.2.6 Error Message Control + * Figure 6-3 + * virtual pci bridge part + */ +static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg) +{ + uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL); + + if (pcie_aer_msg_is_uncor(msg)) { + /* Received System Error */ + pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS, + PCI_SEC_STATUS_RCV_SYSTEM_ERROR); + } + + if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) { + return false; + } + return true; +} + +void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + assert(vector < PCI_ERR_ROOT_IRQ_MAX); + pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS, + PCI_ERR_ROOT_IRQ); + pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS, + vector << PCI_ERR_ROOT_IRQ_SHIFT); +} + +static unsigned int pcie_aer_root_get_vector(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT; +} + +/* Given a status register, get corresponding bits in the command register */ +static uint32_t pcie_aer_status_to_cmd(uint32_t status) +{ + uint32_t cmd = 0; + if (status & PCI_ERR_ROOT_COR_RCV) { + cmd |= PCI_ERR_ROOT_CMD_COR_EN; + } + if (status & PCI_ERR_ROOT_NONFATAL_RCV) { + cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN; + } + if (status & PCI_ERR_ROOT_FATAL_RCV) { + cmd |= PCI_ERR_ROOT_CMD_FATAL_EN; + } + return cmd; +} + +static void pcie_aer_root_notify(PCIDevice *dev) +{ + if (msix_enabled(dev)) { + msix_notify(dev, pcie_aer_root_get_vector(dev)); + } else if (msi_enabled(dev)) { + msi_notify(dev, pcie_aer_root_get_vector(dev)); + } else { + pci_irq_assert(dev); + } +} + +/* + * 6.2.6 Error Message Control + * Figure 6-3 + * root port part + */ +static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) +{ + uint16_t cmd; + uint8_t *aer_cap; + uint32_t root_cmd; + uint32_t root_status, prev_status; + + cmd = pci_get_word(dev->config + PCI_COMMAND); + aer_cap = dev->config + dev->exp.aer_cap; + root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); + prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + + if (cmd & PCI_COMMAND_SERR) { + /* System Error. + * + * The way to report System Error is platform specific and + * it isn't implemented in qemu right now. + * So just discard the error for now. + * OS which cares of aer would receive errors via + * native aer mechanims, so this wouldn't matter. + */ + } + + /* Errro Message Received: Root Error Status register */ + switch (msg->severity) { + case PCI_ERR_ROOT_CMD_COR_EN: + if (root_status & PCI_ERR_ROOT_COR_RCV) { + root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; + } else { + pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS, + msg->source_id); + } + root_status |= PCI_ERR_ROOT_COR_RCV; + break; + case PCI_ERR_ROOT_CMD_NONFATAL_EN: + root_status |= PCI_ERR_ROOT_NONFATAL_RCV; + break; + case PCI_ERR_ROOT_CMD_FATAL_EN: + if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) { + root_status |= PCI_ERR_ROOT_FIRST_FATAL; + } + root_status |= PCI_ERR_ROOT_FATAL_RCV; + break; + default: + abort(); + break; + } + if (pcie_aer_msg_is_uncor(msg)) { + if (root_status & PCI_ERR_ROOT_UNCOR_RCV) { + root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; + } else { + pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + + PCI_ERR_SRC_UNCOR_OFFS, msg->source_id); + } + root_status |= PCI_ERR_ROOT_UNCOR_RCV; + } + pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status); + + /* 6.2.4.1.2 Interrupt Generation */ + /* All the above did was set some bits in the status register. + * Specifically these that match message severity. + * The below code relies on this fact. */ + if (!(root_cmd & msg->severity) || + (pcie_aer_status_to_cmd(prev_status) & root_cmd)) { + /* Condition is not being set or was already true so nothing to do. */ + return; + } + + pcie_aer_root_notify(dev); +} + +/* + * 6.2.6 Error Message Control Figure 6-3 + * + * Walk up the bus tree from the device, propagate the error message. + */ +static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg) +{ + uint8_t type; + + while (dev) { + if (!pci_is_express(dev)) { + /* just ignore it */ + /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR? + * Consider e.g. a PCI bridge above a PCI Express device. */ + return; + } + + type = pcie_cap_get_type(dev); + if ((type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_UPSTREAM || + type == PCI_EXP_TYPE_DOWNSTREAM) && + !pcie_aer_msg_vbridge(dev, msg)) { + return; + } + if (!pcie_aer_msg_alldev(dev, msg)) { + return; + } + if (type == PCI_EXP_TYPE_ROOT_PORT) { + pcie_aer_msg_root_port(dev, msg); + /* Root port can notify system itself, + or send the error message to root complex event collector. */ + /* + * if root port is associated with an event collector, + * return the root complex event collector here. + * For now root complex event collector isn't supported. + */ + return; + } + dev = pci_bridge_get_device(dev->bus); + } +} + +static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint8_t first_bit = ctz32(err->status); + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + int i; + + assert(err->status); + assert(!(err->status & (err->status - 1))); + + errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP); + errcap |= PCI_ERR_CAP_FEP(first_bit); + + if (err->flags & PCIE_AER_ERR_HEADER_VALID) { + for (i = 0; i < ARRAY_SIZE(err->header); ++i) { + /* 7.10.8 Header Log Register */ + uint8_t *header_log = + aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0]; + stl_be_p(header_log, err->header[i]); + } + } else { + assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT)); + memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE); + } + + if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) && + (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) & + PCI_EXP_DEVCAP2_EETLPP)) { + for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) { + /* 7.10.12 tlp prefix log register */ + uint8_t *prefix_log = + aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0]; + stl_be_p(prefix_log, err->prefix[i]); + } + errcap |= PCI_ERR_CAP_TLP; + } else { + memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, + PCI_ERR_TLP_PREFIX_LOG_SIZE); + } + pci_set_long(aer_cap + PCI_ERR_CAP, errcap); +} + +static void pcie_aer_clear_log(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + + pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP, + PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP); + + memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE); + memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE); +} + +static void pcie_aer_clear_error(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + PCIEAERLog *aer_log = &dev->exp.aer_log; + PCIEAERErr err; + + if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) { + pcie_aer_clear_log(dev); + return; + } + + /* + * If more errors are queued, set corresponding bits in uncorrectable + * error status. + * We emulate uncorrectable error status register as W1CS. + * So set bit in uncorrectable error status here again for multiple + * error recording support. + * + * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability) + */ + pcie_aer_update_uncor_status(dev); + + aer_log_del_err(aer_log, &err); + pcie_aer_update_log(dev, &err); +} + +static int pcie_aer_record_error(PCIDevice *dev, + const PCIEAERErr *err) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + int fep = PCI_ERR_CAP_FEP(errcap); + + assert(err->status); + assert(!(err->status & (err->status - 1))); + + if (errcap & PCI_ERR_CAP_MHRE && + (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) { + /* Not first error. queue error */ + if (aer_log_add_err(&dev->exp.aer_log, err) < 0) { + /* overflow */ + return -1; + } + return 0; + } + + pcie_aer_update_log(dev, err); + return 0; +} + +typedef struct PCIEAERInject { + PCIDevice *dev; + uint8_t *aer_cap; + const PCIEAERErr *err; + uint16_t devctl; + uint16_t devsta; + uint32_t error_status; + bool unsupported_request; + bool log_overflow; + PCIEAERMsg msg; +} PCIEAERInject; + +static bool pcie_aer_inject_cor_error(PCIEAERInject *inj, + uint32_t uncor_status, + bool is_advisory_nonfatal) +{ + PCIDevice *dev = inj->dev; + + inj->devsta |= PCI_EXP_DEVSTA_CED; + if (inj->unsupported_request) { + inj->devsta |= PCI_EXP_DEVSTA_URD; + } + pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta); + + if (inj->aer_cap) { + uint32_t mask; + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS, + inj->error_status); + mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK); + if (mask & inj->error_status) { + return false; + } + if (is_advisory_nonfatal) { + uint32_t uncor_mask = + pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK); + if (!(uncor_mask & uncor_status)) { + inj->log_overflow = !!pcie_aer_record_error(dev, inj->err); + } + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS, + uncor_status); + } + } + + if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) { + return false; + } + if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) { + return false; + } + + inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN; + return true; +} + +static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal) +{ + PCIDevice *dev = inj->dev; + uint16_t cmd; + + if (is_fatal) { + inj->devsta |= PCI_EXP_DEVSTA_FED; + } else { + inj->devsta |= PCI_EXP_DEVSTA_NFED; + } + if (inj->unsupported_request) { + inj->devsta |= PCI_EXP_DEVSTA_URD; + } + pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta); + + if (inj->aer_cap) { + uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK); + if (mask & inj->error_status) { + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS, + inj->error_status); + return false; + } + + inj->log_overflow = !!pcie_aer_record_error(dev, inj->err); + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS, + inj->error_status); + } + + cmd = pci_get_word(dev->config + PCI_COMMAND); + if (inj->unsupported_request && + !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) { + return false; + } + if (is_fatal) { + if (!((cmd & PCI_COMMAND_SERR) || + (inj->devctl & PCI_EXP_DEVCTL_FERE))) { + return false; + } + inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN; + } else { + if (!((cmd & PCI_COMMAND_SERR) || + (inj->devctl & PCI_EXP_DEVCTL_NFERE))) { + return false; + } + inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN; + } + return true; +} + +/* + * non-Function specific error must be recorded in all functions. + * It is the responsibility of the caller of this function. + * It is also caller's responsibility to determine which function should + * report the error. + * + * 6.2.4 Error Logging + * 6.2.5 Sequence of Device Error Signaling and Logging Operations + * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging + * Operations + */ +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err) +{ + uint8_t *aer_cap = NULL; + uint16_t devctl = 0; + uint16_t devsta = 0; + uint32_t error_status = err->status; + PCIEAERInject inj; + + if (!pci_is_express(dev)) { + return -ENOSYS; + } + + if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) { + error_status &= PCI_ERR_COR_SUPPORTED; + } else { + error_status &= PCI_ERR_UNC_SUPPORTED; + } + + /* invalid status bit. one and only one bit must be set */ + if (!error_status || (error_status & (error_status - 1))) { + return -EINVAL; + } + + if (dev->exp.aer_cap) { + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + aer_cap = dev->config + dev->exp.aer_cap; + devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL); + devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA); + } + + inj.dev = dev; + inj.aer_cap = aer_cap; + inj.err = err; + inj.devctl = devctl; + inj.devsta = devsta; + inj.error_status = error_status; + inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) && + err->status == PCI_ERR_UNC_UNSUP; + inj.log_overflow = false; + + if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) { + if (!pcie_aer_inject_cor_error(&inj, 0, false)) { + return 0; + } + } else { + bool is_fatal = + pcie_aer_uncor_default_severity(error_status) == + PCI_ERR_ROOT_CMD_FATAL_EN; + if (aer_cap) { + is_fatal = + error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER); + } + if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) { + inj.error_status = PCI_ERR_COR_ADV_NONFATAL; + if (!pcie_aer_inject_cor_error(&inj, error_status, true)) { + return 0; + } + } else { + if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) { + return 0; + } + } + } + + /* send up error message */ + inj.msg.source_id = err->source_id; + pcie_aer_msg(dev, &inj.msg); + + if (inj.log_overflow) { + PCIEAERErr header_log_overflow = { + .status = PCI_ERR_COR_HL_OVERFLOW, + .flags = PCIE_AER_ERR_IS_CORRECTABLE, + }; + int ret = pcie_aer_inject_error(dev, &header_log_overflow); + assert(!ret); + } + return 0; +} + +void pcie_aer_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap); + uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS); + + /* uncorrectable error */ + if (!(uncorsta & first_error)) { + /* the bit that corresponds to the first error is cleared */ + pcie_aer_clear_error(dev); + } else if (errcap & PCI_ERR_CAP_MHRE) { + /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared + * nothing should happen. So we have to revert the modification to + * the register. + */ + pcie_aer_update_uncor_status(dev); + } else { + /* capability & control + * PCI_ERR_CAP_MHRE might be cleared, so clear of header log. + */ + aer_log_clear_all_err(&dev->exp.aer_log); + } +} + +void pcie_aer_root_init(PCIDevice *dev) +{ + uint16_t pos = dev->exp.aer_cap; + + pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND, + PCI_ERR_ROOT_CMD_EN_MASK); + pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS, + PCI_ERR_ROOT_STATUS_REPORT_MASK); + /* PCI_ERR_ROOT_IRQ is RO but devices change it using a + * device-specific method. + */ + pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS, + ~PCI_ERR_ROOT_IRQ); +} + +void pcie_aer_root_reset(PCIDevice *dev) +{ + uint8_t* aer_cap = dev->config + dev->exp.aer_cap; + + pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0); + + /* + * Advanced Error Interrupt Message Number in Root Error Status Register + * must be updated by chip dependent code because it's chip dependent + * which number is used. + */ +} + +void pcie_aer_root_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len, + uint32_t root_cmd_prev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status); + uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); + /* 6.2.4.1.2 Interrupt Generation */ + if (!msix_enabled(dev) && !msi_enabled(dev)) { + pci_set_irq(dev, !!(root_cmd & enabled_cmd)); + return; + } + + if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) { + /* Send MSI on transition from false to true. */ + return; + } + + pcie_aer_root_notify(dev); +} + +static const VMStateDescription vmstate_pcie_aer_err = { + .name = "PCIE_AER_ERROR", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(status, PCIEAERErr), + VMSTATE_UINT16(source_id, PCIEAERErr), + VMSTATE_UINT16(flags, PCIEAERErr), + VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4), + VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4), + VMSTATE_END_OF_LIST() + } +}; + +static bool pcie_aer_state_log_num_valid(void *opaque, int version_id) +{ + PCIEAERLog *s = opaque; + + return s->log_num <= s->log_max; +} + +const VMStateDescription vmstate_pcie_aer_log = { + .name = "PCIE_AER_ERROR_LOG", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(log_num, PCIEAERLog), + VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog), + VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid), + VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num, + vmstate_pcie_aer_err, PCIEAERErr), + VMSTATE_END_OF_LIST() + } +}; + +typedef struct PCIEAERErrorName { + const char *name; + uint32_t val; + bool correctable; +} PCIEAERErrorName; + +/* + * AER error name -> value conversion table + * This naming scheme is same to linux aer-injection tool. + */ +static const struct PCIEAERErrorName pcie_aer_error_list[] = { + { + .name = "TRAIN", + .val = PCI_ERR_UNC_TRAIN, + .correctable = false, + }, { + .name = "DLP", + .val = PCI_ERR_UNC_DLP, + .correctable = false, + }, { + .name = "SDN", + .val = PCI_ERR_UNC_SDN, + .correctable = false, + }, { + .name = "POISON_TLP", + .val = PCI_ERR_UNC_POISON_TLP, + .correctable = false, + }, { + .name = "FCP", + .val = PCI_ERR_UNC_FCP, + .correctable = false, + }, { + .name = "COMP_TIME", + .val = PCI_ERR_UNC_COMP_TIME, + .correctable = false, + }, { + .name = "COMP_ABORT", + .val = PCI_ERR_UNC_COMP_ABORT, + .correctable = false, + }, { + .name = "UNX_COMP", + .val = PCI_ERR_UNC_UNX_COMP, + .correctable = false, + }, { + .name = "RX_OVER", + .val = PCI_ERR_UNC_RX_OVER, + .correctable = false, + }, { + .name = "MALF_TLP", + .val = PCI_ERR_UNC_MALF_TLP, + .correctable = false, + }, { + .name = "ECRC", + .val = PCI_ERR_UNC_ECRC, + .correctable = false, + }, { + .name = "UNSUP", + .val = PCI_ERR_UNC_UNSUP, + .correctable = false, + }, { + .name = "ACSV", + .val = PCI_ERR_UNC_ACSV, + .correctable = false, + }, { + .name = "INTN", + .val = PCI_ERR_UNC_INTN, + .correctable = false, + }, { + .name = "MCBTLP", + .val = PCI_ERR_UNC_MCBTLP, + .correctable = false, + }, { + .name = "ATOP_EBLOCKED", + .val = PCI_ERR_UNC_ATOP_EBLOCKED, + .correctable = false, + }, { + .name = "TLP_PRF_BLOCKED", + .val = PCI_ERR_UNC_TLP_PRF_BLOCKED, + .correctable = false, + }, { + .name = "RCVR", + .val = PCI_ERR_COR_RCVR, + .correctable = true, + }, { + .name = "BAD_TLP", + .val = PCI_ERR_COR_BAD_TLP, + .correctable = true, + }, { + .name = "BAD_DLLP", + .val = PCI_ERR_COR_BAD_DLLP, + .correctable = true, + }, { + .name = "REP_ROLL", + .val = PCI_ERR_COR_REP_ROLL, + .correctable = true, + }, { + .name = "REP_TIMER", + .val = PCI_ERR_COR_REP_TIMER, + .correctable = true, + }, { + .name = "ADV_NONFATAL", + .val = PCI_ERR_COR_ADV_NONFATAL, + .correctable = true, + }, { + .name = "INTERNAL", + .val = PCI_ERR_COR_INTERNAL, + .correctable = true, + }, { + .name = "HL_OVERFLOW", + .val = PCI_ERR_COR_HL_OVERFLOW, + .correctable = true, + }, +}; + +static int pcie_aer_parse_error_string(const char *error_name, + uint32_t *status, bool *correctable) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) { + const PCIEAERErrorName *e = &pcie_aer_error_list[i]; + if (strcmp(error_name, e->name)) { + continue; + } + + *status = e->val; + *correctable = e->correctable; + return 0; + } + return -EINVAL; +} + +static int do_pcie_aer_inject_error(Monitor *mon, + const QDict *qdict, QObject **ret_data) +{ + const char *id = qdict_get_str(qdict, "id"); + const char *error_name; + uint32_t error_status; + bool correctable; + PCIDevice *dev; + PCIEAERErr err; + int ret; + + ret = pci_qdev_find_device(id, &dev); + if (ret < 0) { + monitor_printf(mon, + "id or pci device path is invalid or device not " + "found. %s\n", id); + return ret; + } + if (!pci_is_express(dev)) { + monitor_printf(mon, "the device doesn't support pci express. %s\n", + id); + return -ENOSYS; + } + + error_name = qdict_get_str(qdict, "error_status"); + if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) { + char *e = NULL; + error_status = strtoul(error_name, &e, 0); + correctable = qdict_get_try_bool(qdict, "correctable", false); + if (!e || *e != '\0') { + monitor_printf(mon, "invalid error status value. \"%s\"", + error_name); + return -EINVAL; + } + } + err.status = error_status; + err.source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn; + + err.flags = 0; + if (correctable) { + err.flags |= PCIE_AER_ERR_IS_CORRECTABLE; + } + if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) { + err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY; + } + if (qdict_haskey(qdict, "header0")) { + err.flags |= PCIE_AER_ERR_HEADER_VALID; + } + if (qdict_haskey(qdict, "prefix0")) { + err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT; + } + + err.header[0] = qdict_get_try_int(qdict, "header0", 0); + err.header[1] = qdict_get_try_int(qdict, "header1", 0); + err.header[2] = qdict_get_try_int(qdict, "header2", 0); + err.header[3] = qdict_get_try_int(qdict, "header3", 0); + + err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0); + err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0); + err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0); + err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0); + + ret = pcie_aer_inject_error(dev, &err); + *ret_data = qobject_from_jsonf("{'id': %s, " + "'root_bus': %s, 'bus': %d, 'devfn': %d, " + "'ret': %d}", + id, pci_root_bus_path(dev), + pci_bus_num(dev->bus), dev->devfn, + ret); + assert(*ret_data); + + return 0; +} + +void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict) +{ + QObject *data; + int devfn; + + if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) { + return; + } + + assert(qobject_type(data) == QTYPE_QDICT); + qdict = qobject_to_qdict(data); + + devfn = (int)qdict_get_int(qdict, "devfn"); + monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n", + qdict_get_str(qdict, "id"), + qdict_get_str(qdict, "root_bus"), + (int) qdict_get_int(qdict, "bus"), + PCI_SLOT(devfn), PCI_FUNC(devfn)); +} diff --git a/qemu/hw/pci/pcie_host.c b/qemu/hw/pci/pcie_host.c new file mode 100644 index 000000000..d8afba863 --- /dev/null +++ b/qemu/hw/pci/pcie_host.c @@ -0,0 +1,146 @@ +/* + * pcie_host.c + * utility functions for pci express host bridge. + * + * Copyright (c) 2009 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie_host.h" +#include "exec/address-spaces.h" + +/* a helper function to get a PCIDevice for a given mmconfig address */ +static inline PCIDevice *pcie_dev_find_by_mmcfg_addr(PCIBus *s, + uint32_t mmcfg_addr) +{ + return pci_find_device(s, PCIE_MMCFG_BUS(mmcfg_addr), + PCIE_MMCFG_DEVFN(mmcfg_addr)); +} + +static void pcie_mmcfg_data_write(void *opaque, hwaddr mmcfg_addr, + uint64_t val, unsigned len) +{ + PCIExpressHost *e = opaque; + PCIBus *s = e->pci.bus; + PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr); + uint32_t addr; + uint32_t limit; + + if (!pci_dev) { + return; + } + addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr); + limit = pci_config_size(pci_dev); + if (limit <= addr) { + /* conventional pci device can be behind pcie-to-pci bridge. + 256 <= addr < 4K has no effects. */ + return; + } + pci_host_config_write_common(pci_dev, addr, limit, val, len); +} + +static uint64_t pcie_mmcfg_data_read(void *opaque, + hwaddr mmcfg_addr, + unsigned len) +{ + PCIExpressHost *e = opaque; + PCIBus *s = e->pci.bus; + PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr); + uint32_t addr; + uint32_t limit; + + if (!pci_dev) { + return ~0x0; + } + addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr); + limit = pci_config_size(pci_dev); + if (limit <= addr) { + /* conventional pci device can be behind pcie-to-pci bridge. + 256 <= addr < 4K has no effects. */ + return ~0x0; + } + return pci_host_config_read_common(pci_dev, addr, limit, len); +} + +static const MemoryRegionOps pcie_mmcfg_ops = { + .read = pcie_mmcfg_data_read, + .write = pcie_mmcfg_data_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void pcie_host_init(Object *obj) +{ + PCIExpressHost *e = PCIE_HOST_BRIDGE(obj); + + e->base_addr = PCIE_BASE_ADDR_UNMAPPED; + memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e, "pcie-mmcfg-mmio", + PCIE_MMCFG_SIZE_MAX); +} + +void pcie_host_mmcfg_unmap(PCIExpressHost *e) +{ + if (e->base_addr != PCIE_BASE_ADDR_UNMAPPED) { + memory_region_del_subregion(get_system_memory(), &e->mmio); + e->base_addr = PCIE_BASE_ADDR_UNMAPPED; + } +} + +void pcie_host_mmcfg_init(PCIExpressHost *e, uint32_t size) +{ + assert(!(size & (size - 1))); /* power of 2 */ + assert(size >= PCIE_MMCFG_SIZE_MIN); + assert(size <= PCIE_MMCFG_SIZE_MAX); + e->size = size; + memory_region_set_size(&e->mmio, e->size); +} + +void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, + uint32_t size) +{ + pcie_host_mmcfg_init(e, size); + e->base_addr = addr; + memory_region_add_subregion(get_system_memory(), e->base_addr, &e->mmio); +} + +void pcie_host_mmcfg_update(PCIExpressHost *e, + int enable, + hwaddr addr, + uint32_t size) +{ + memory_region_transaction_begin(); + pcie_host_mmcfg_unmap(e); + if (enable) { + pcie_host_mmcfg_map(e, addr, size); + } + memory_region_transaction_commit(); +} + +static const TypeInfo pcie_host_type_info = { + .name = TYPE_PCIE_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .abstract = true, + .instance_size = sizeof(PCIExpressHost), + .instance_init = pcie_host_init, +}; + +static void pcie_host_register_types(void) +{ + type_register_static(&pcie_host_type_info); +} + +type_init(pcie_host_register_types) diff --git a/qemu/hw/pci/pcie_port.c b/qemu/hw/pci/pcie_port.c new file mode 100644 index 000000000..40ca8d5d1 --- /dev/null +++ b/qemu/hw/pci/pcie_port.c @@ -0,0 +1,178 @@ +/* + * pcie_port.c + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/pci/pcie_port.h" +#include "hw/hotplug.h" + +void pcie_port_init_reg(PCIDevice *d) +{ + /* Unlike pci bridge, + 66MHz and fast back to back don't apply to pci express port. */ + pci_set_word(d->config + PCI_STATUS, 0); + pci_set_word(d->config + PCI_SEC_STATUS, 0); + + /* + * Unlike conventional pci bridge, for some bits the spec states: + * Does not apply to PCI Express and must be hardwired to 0. + */ + pci_word_test_and_clear_mask(d->wmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_MASTER_ABORT | + PCI_BRIDGE_CTL_FAST_BACK | + PCI_BRIDGE_CTL_DISCARD | + PCI_BRIDGE_CTL_SEC_DISCARD | + PCI_BRIDGE_CTL_DISCARD_STATUS | + PCI_BRIDGE_CTL_DISCARD_SERR); +} + +/************************************************************************** + * (chassis number, pcie physical slot number) -> pcie slot conversion + */ +struct PCIEChassis { + uint8_t number; + + QLIST_HEAD(, PCIESlot) slots; + QLIST_ENTRY(PCIEChassis) next; +}; + +static QLIST_HEAD(, PCIEChassis) chassis = QLIST_HEAD_INITIALIZER(chassis); + +static struct PCIEChassis *pcie_chassis_find(uint8_t chassis_number) +{ + struct PCIEChassis *c; + QLIST_FOREACH(c, &chassis, next) { + if (c->number == chassis_number) { + break; + } + } + return c; +} + +void pcie_chassis_create(uint8_t chassis_number) +{ + struct PCIEChassis *c; + c = pcie_chassis_find(chassis_number); + if (c) { + return; + } + c = g_malloc0(sizeof(*c)); + c->number = chassis_number; + QLIST_INIT(&c->slots); + QLIST_INSERT_HEAD(&chassis, c, next); +} + +static PCIESlot *pcie_chassis_find_slot_with_chassis(struct PCIEChassis *c, + uint8_t slot) +{ + PCIESlot *s; + QLIST_FOREACH(s, &c->slots, next) { + if (s->slot == slot) { + break; + } + } + return s; +} + +PCIESlot *pcie_chassis_find_slot(uint8_t chassis_number, uint16_t slot) +{ + struct PCIEChassis *c; + c = pcie_chassis_find(chassis_number); + if (!c) { + return NULL; + } + return pcie_chassis_find_slot_with_chassis(c, slot); +} + +int pcie_chassis_add_slot(struct PCIESlot *slot) +{ + struct PCIEChassis *c; + c = pcie_chassis_find(slot->chassis); + if (!c) { + return -ENODEV; + } + if (pcie_chassis_find_slot_with_chassis(c, slot->slot)) { + return -EBUSY; + } + QLIST_INSERT_HEAD(&c->slots, slot, next); + return 0; +} + +void pcie_chassis_del_slot(PCIESlot *s) +{ + QLIST_REMOVE(s, next); +} + +static Property pcie_port_props[] = { + DEFINE_PROP_UINT8("port", PCIEPort, port, 0), + DEFINE_PROP_UINT16("aer_log_max", PCIEPort, + parent_obj.parent_obj.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), + DEFINE_PROP_END_OF_LIST() +}; + +static void pcie_port_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->props = pcie_port_props; +} + +static const TypeInfo pcie_port_type_info = { + .name = TYPE_PCIE_PORT, + .parent = TYPE_PCI_BRIDGE, + .instance_size = sizeof(PCIEPort), + .abstract = true, + .class_init = pcie_port_class_init, +}; + +static Property pcie_slot_props[] = { + DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), + DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), + DEFINE_PROP_END_OF_LIST() +}; + +static void pcie_slot_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + dc->props = pcie_slot_props; + hc->plug = pcie_cap_slot_hotplug_cb; + hc->unplug_request = pcie_cap_slot_hot_unplug_request_cb; +} + +static const TypeInfo pcie_slot_type_info = { + .name = TYPE_PCIE_SLOT, + .parent = TYPE_PCIE_PORT, + .instance_size = sizeof(PCIESlot), + .abstract = true, + .class_init = pcie_slot_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static void pcie_port_register_types(void) +{ + type_register_static(&pcie_port_type_info); + type_register_static(&pcie_slot_type_info); +} + +type_init(pcie_port_register_types) diff --git a/qemu/hw/pci/shpc.c b/qemu/hw/pci/shpc.c new file mode 100644 index 000000000..bfb4d31b6 --- /dev/null +++ b/qemu/hw/pci/shpc.c @@ -0,0 +1,721 @@ +#include "qemu-common.h" +#include <strings.h> +#include <stdint.h> +#include "qemu/range.h" +#include "qemu/error-report.h" +#include "hw/pci/shpc.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/msi.h" + +/* TODO: model power only and disabled slot states. */ +/* TODO: handle SERR and wakeups */ +/* TODO: consider enabling 66MHz support */ + +/* TODO: remove fully only on state DISABLED and LED off. + * track state to properly record this. */ + +/* SHPC Working Register Set */ +#define SHPC_BASE_OFFSET 0x00 /* 4 bytes */ +#define SHPC_SLOTS_33 0x04 /* 4 bytes. Also encodes PCI-X slots. */ +#define SHPC_SLOTS_66 0x08 /* 4 bytes. */ +#define SHPC_NSLOTS 0x0C /* 1 byte */ +#define SHPC_FIRST_DEV 0x0D /* 1 byte */ +#define SHPC_PHYS_SLOT 0x0E /* 2 byte */ +#define SHPC_PHYS_NUM_MAX 0x7ff +#define SHPC_PHYS_NUM_UP 0x2000 +#define SHPC_PHYS_MRL 0x4000 +#define SHPC_PHYS_BUTTON 0x8000 +#define SHPC_SEC_BUS 0x10 /* 2 bytes */ +#define SHPC_SEC_BUS_33 0x0 +#define SHPC_SEC_BUS_66 0x1 /* Unused */ +#define SHPC_SEC_BUS_MASK 0x7 +#define SHPC_MSI_CTL 0x12 /* 1 byte */ +#define SHPC_PROG_IFC 0x13 /* 1 byte */ +#define SHPC_PROG_IFC_1_0 0x1 +#define SHPC_CMD_CODE 0x14 /* 1 byte */ +#define SHPC_CMD_TRGT 0x15 /* 1 byte */ +#define SHPC_CMD_TRGT_MIN 0x1 +#define SHPC_CMD_TRGT_MAX 0x1f +#define SHPC_CMD_STATUS 0x16 /* 2 bytes */ +#define SHPC_CMD_STATUS_BUSY 0x1 +#define SHPC_CMD_STATUS_MRL_OPEN 0x2 +#define SHPC_CMD_STATUS_INVALID_CMD 0x4 +#define SHPC_CMD_STATUS_INVALID_MODE 0x8 +#define SHPC_INT_LOCATOR 0x18 /* 4 bytes */ +#define SHPC_INT_COMMAND 0x1 +#define SHPC_SERR_LOCATOR 0x1C /* 4 bytes */ +#define SHPC_SERR_INT 0x20 /* 4 bytes */ +#define SHPC_INT_DIS 0x1 +#define SHPC_SERR_DIS 0x2 +#define SHPC_CMD_INT_DIS 0x4 +#define SHPC_ARB_SERR_DIS 0x8 +#define SHPC_CMD_DETECTED 0x10000 +#define SHPC_ARB_DETECTED 0x20000 + /* 4 bytes * slot # (start from 0) */ +#define SHPC_SLOT_REG(s) (0x24 + (s) * 4) + /* 2 bytes */ +#define SHPC_SLOT_STATUS(s) (0x0 + SHPC_SLOT_REG(s)) + +/* Same slot state masks are used for command and status registers */ +#define SHPC_SLOT_STATE_MASK 0x03 +#define SHPC_SLOT_STATE_SHIFT \ + ctz32(SHPC_SLOT_STATE_MASK) + +#define SHPC_STATE_NO 0x0 +#define SHPC_STATE_PWRONLY 0x1 +#define SHPC_STATE_ENABLED 0x2 +#define SHPC_STATE_DISABLED 0x3 + +#define SHPC_SLOT_PWR_LED_MASK 0xC +#define SHPC_SLOT_PWR_LED_SHIFT \ + ctz32(SHPC_SLOT_PWR_LED_MASK) +#define SHPC_SLOT_ATTN_LED_MASK 0x30 +#define SHPC_SLOT_ATTN_LED_SHIFT \ + ctz32(SHPC_SLOT_ATTN_LED_MASK) + +#define SHPC_LED_NO 0x0 +#define SHPC_LED_ON 0x1 +#define SHPC_LED_BLINK 0x2 +#define SHPC_LED_OFF 0x3 + +#define SHPC_SLOT_STATUS_PWR_FAULT 0x40 +#define SHPC_SLOT_STATUS_BUTTON 0x80 +#define SHPC_SLOT_STATUS_MRL_OPEN 0x100 +#define SHPC_SLOT_STATUS_66 0x200 +#define SHPC_SLOT_STATUS_PRSNT_MASK 0xC00 +#define SHPC_SLOT_STATUS_PRSNT_EMPTY 0x3 +#define SHPC_SLOT_STATUS_PRSNT_25W 0x1 +#define SHPC_SLOT_STATUS_PRSNT_15W 0x2 +#define SHPC_SLOT_STATUS_PRSNT_7_5W 0x0 + +#define SHPC_SLOT_STATUS_PRSNT_PCIX 0x3000 + + + /* 1 byte */ +#define SHPC_SLOT_EVENT_LATCH(s) (0x2 + SHPC_SLOT_REG(s)) + /* 1 byte */ +#define SHPC_SLOT_EVENT_SERR_INT_DIS(d, s) (0x3 + SHPC_SLOT_REG(s)) +#define SHPC_SLOT_EVENT_PRESENCE 0x01 +#define SHPC_SLOT_EVENT_ISOLATED_FAULT 0x02 +#define SHPC_SLOT_EVENT_BUTTON 0x04 +#define SHPC_SLOT_EVENT_MRL 0x08 +#define SHPC_SLOT_EVENT_CONNECTED_FAULT 0x10 +/* Bits below are used for Serr/Int disable only */ +#define SHPC_SLOT_EVENT_MRL_SERR_DIS 0x20 +#define SHPC_SLOT_EVENT_CONNECTED_FAULT_SERR_DIS 0x40 + +#define SHPC_MIN_SLOTS 1 +#define SHPC_MAX_SLOTS 31 +#define SHPC_SIZEOF(d) SHPC_SLOT_REG((d)->shpc->nslots) + +/* SHPC Slot identifiers */ + +/* Hotplug supported at 31 slots out of the total 32. We reserve slot 0, + and give the rest of them physical *and* pci numbers starting from 1, so + they match logical numbers. Note: this means that multiple slots must have + different chassis number values, to make chassis+physical slot unique. + TODO: make this configurable? */ +#define SHPC_IDX_TO_LOGICAL(slot) ((slot) + 1) +#define SHPC_LOGICAL_TO_IDX(target) ((target) - 1) +#define SHPC_IDX_TO_PCI(slot) ((slot) + 1) +#define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) +#define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) + +static int roundup_pow_of_two(int x) +{ + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} + +static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) +{ + uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); + return (pci_get_word(status) & msk) >> ctz32(msk); +} + +static void shpc_set_status(SHPCDevice *shpc, + int slot, uint8_t value, uint16_t msk) +{ + uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); + pci_word_test_and_clear_mask(status, msk); + pci_word_test_and_set_mask(status, value << ctz32(msk)); +} + +static void shpc_interrupt_update(PCIDevice *d) +{ + SHPCDevice *shpc = d->shpc; + int slot; + int level = 0; + uint32_t serr_int; + uint32_t int_locator = 0; + + /* Update interrupt locator register */ + for (slot = 0; slot < shpc->nslots; ++slot) { + uint8_t event = shpc->config[SHPC_SLOT_EVENT_LATCH(slot)]; + uint8_t disable = shpc->config[SHPC_SLOT_EVENT_SERR_INT_DIS(d, slot)]; + uint32_t mask = 1U << SHPC_IDX_TO_LOGICAL(slot); + if (event & ~disable) { + int_locator |= mask; + } + } + serr_int = pci_get_long(shpc->config + SHPC_SERR_INT); + if ((serr_int & SHPC_CMD_DETECTED) && !(serr_int & SHPC_CMD_INT_DIS)) { + int_locator |= SHPC_INT_COMMAND; + } + pci_set_long(shpc->config + SHPC_INT_LOCATOR, int_locator); + level = (!(serr_int & SHPC_INT_DIS) && int_locator) ? 1 : 0; + if (msi_enabled(d) && shpc->msi_requested != level) + msi_notify(d, 0); + else + pci_set_irq(d, level); + shpc->msi_requested = level; +} + +static void shpc_set_sec_bus_speed(SHPCDevice *shpc, uint8_t speed) +{ + switch (speed) { + case SHPC_SEC_BUS_33: + shpc->config[SHPC_SEC_BUS] &= ~SHPC_SEC_BUS_MASK; + shpc->config[SHPC_SEC_BUS] |= speed; + break; + default: + pci_word_test_and_set_mask(shpc->config + SHPC_CMD_STATUS, + SHPC_CMD_STATUS_INVALID_MODE); + } +} + +void shpc_reset(PCIDevice *d) +{ + SHPCDevice *shpc = d->shpc; + int nslots = shpc->nslots; + int i; + memset(shpc->config, 0, SHPC_SIZEOF(d)); + pci_set_byte(shpc->config + SHPC_NSLOTS, nslots); + pci_set_long(shpc->config + SHPC_SLOTS_33, nslots); + pci_set_long(shpc->config + SHPC_SLOTS_66, 0); + pci_set_byte(shpc->config + SHPC_FIRST_DEV, SHPC_IDX_TO_PCI(0)); + pci_set_word(shpc->config + SHPC_PHYS_SLOT, + SHPC_IDX_TO_PHYSICAL(0) | + SHPC_PHYS_NUM_UP | + SHPC_PHYS_MRL | + SHPC_PHYS_BUTTON); + pci_set_long(shpc->config + SHPC_SERR_INT, SHPC_INT_DIS | + SHPC_SERR_DIS | + SHPC_CMD_INT_DIS | + SHPC_ARB_SERR_DIS); + pci_set_byte(shpc->config + SHPC_PROG_IFC, SHPC_PROG_IFC_1_0); + pci_set_word(shpc->config + SHPC_SEC_BUS, SHPC_SEC_BUS_33); + for (i = 0; i < shpc->nslots; ++i) { + pci_set_byte(shpc->config + SHPC_SLOT_EVENT_SERR_INT_DIS(d, i), + SHPC_SLOT_EVENT_PRESENCE | + SHPC_SLOT_EVENT_ISOLATED_FAULT | + SHPC_SLOT_EVENT_BUTTON | + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_CONNECTED_FAULT | + SHPC_SLOT_EVENT_MRL_SERR_DIS | + SHPC_SLOT_EVENT_CONNECTED_FAULT_SERR_DIS); + if (shpc->sec_bus->devices[PCI_DEVFN(SHPC_IDX_TO_PCI(i), 0)]) { + shpc_set_status(shpc, i, SHPC_STATE_ENABLED, SHPC_SLOT_STATE_MASK); + shpc_set_status(shpc, i, 0, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, i, SHPC_SLOT_STATUS_PRSNT_7_5W, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc_set_status(shpc, i, SHPC_LED_ON, SHPC_SLOT_PWR_LED_MASK); + } else { + shpc_set_status(shpc, i, SHPC_STATE_DISABLED, SHPC_SLOT_STATE_MASK); + shpc_set_status(shpc, i, 1, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, i, SHPC_SLOT_STATUS_PRSNT_EMPTY, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_PWR_LED_MASK); + } + shpc_set_status(shpc, i, 0, SHPC_SLOT_STATUS_66); + } + shpc_set_sec_bus_speed(shpc, SHPC_SEC_BUS_33); + shpc->msi_requested = 0; + shpc_interrupt_update(d); +} + +static void shpc_invalid_command(SHPCDevice *shpc) +{ + pci_word_test_and_set_mask(shpc->config + SHPC_CMD_STATUS, + SHPC_CMD_STATUS_INVALID_CMD); +} + +static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) +{ + int devfn; + int pci_slot = SHPC_IDX_TO_PCI(slot); + for (devfn = PCI_DEVFN(pci_slot, 0); + devfn <= PCI_DEVFN(pci_slot, PCI_FUNC_MAX - 1); + ++devfn) { + PCIDevice *affected_dev = shpc->sec_bus->devices[devfn]; + if (affected_dev) { + object_unparent(OBJECT(affected_dev)); + } + } +} + +static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, + uint8_t state, uint8_t power, uint8_t attn) +{ + uint8_t current_state; + int slot = SHPC_LOGICAL_TO_IDX(target); + if (target < SHPC_CMD_TRGT_MIN || slot >= shpc->nslots) { + shpc_invalid_command(shpc); + return; + } + current_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); + if (current_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { + shpc_invalid_command(shpc); + return; + } + + switch (power) { + case SHPC_LED_NO: + break; + default: + /* TODO: send event to monitor */ + shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK); + } + switch (attn) { + case SHPC_LED_NO: + break; + default: + /* TODO: send event to monitor */ + shpc_set_status(shpc, slot, attn, SHPC_SLOT_ATTN_LED_MASK); + } + + if ((current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_PWRONLY) || + (current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_ENABLED)) { + shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); + } else if ((current_state == SHPC_STATE_ENABLED || + current_state == SHPC_STATE_PWRONLY) && + state == SHPC_STATE_DISABLED) { + shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); + power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); + /* TODO: track what monitor requested. */ + /* Look at LED to figure out whether it's ok to remove the device. */ + if (power == SHPC_LED_OFF) { + shpc_free_devices_in_slot(shpc, slot); + shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_BUTTON | + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_PRESENCE; + } + } +} + +static void shpc_command(SHPCDevice *shpc) +{ + uint8_t code = pci_get_byte(shpc->config + SHPC_CMD_CODE); + uint8_t speed; + uint8_t target; + uint8_t attn; + uint8_t power; + uint8_t state; + int i; + + /* Clear status from the previous command. */ + pci_word_test_and_clear_mask(shpc->config + SHPC_CMD_STATUS, + SHPC_CMD_STATUS_BUSY | + SHPC_CMD_STATUS_MRL_OPEN | + SHPC_CMD_STATUS_INVALID_CMD | + SHPC_CMD_STATUS_INVALID_MODE); + switch (code) { + case 0x00 ... 0x3f: + target = shpc->config[SHPC_CMD_TRGT] & SHPC_CMD_TRGT_MAX; + state = (code & SHPC_SLOT_STATE_MASK) >> SHPC_SLOT_STATE_SHIFT; + power = (code & SHPC_SLOT_PWR_LED_MASK) >> SHPC_SLOT_PWR_LED_SHIFT; + attn = (code & SHPC_SLOT_ATTN_LED_MASK) >> SHPC_SLOT_ATTN_LED_SHIFT; + shpc_slot_command(shpc, target, state, power, attn); + break; + case 0x40 ... 0x47: + speed = code & SHPC_SEC_BUS_MASK; + shpc_set_sec_bus_speed(shpc, speed); + break; + case 0x48: + /* Power only all slots */ + /* first verify no slots are enabled */ + for (i = 0; i < shpc->nslots; ++i) { + state = shpc_get_status(shpc, i, SHPC_SLOT_STATE_MASK); + if (state == SHPC_STATE_ENABLED) { + shpc_invalid_command(shpc); + goto done; + } + } + for (i = 0; i < shpc->nslots; ++i) { + if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { + shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + SHPC_STATE_PWRONLY, SHPC_LED_ON, SHPC_LED_NO); + } else { + shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); + } + } + break; + case 0x49: + /* Enable all slots */ + /* TODO: Spec says this shall fail if some are already enabled. + * This doesn't make sense - why not? a spec bug? */ + for (i = 0; i < shpc->nslots; ++i) { + state = shpc_get_status(shpc, i, SHPC_SLOT_STATE_MASK); + if (state == SHPC_STATE_ENABLED) { + shpc_invalid_command(shpc); + goto done; + } + } + for (i = 0; i < shpc->nslots; ++i) { + if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { + shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + SHPC_STATE_ENABLED, SHPC_LED_ON, SHPC_LED_NO); + } else { + shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); + } + } + break; + default: + shpc_invalid_command(shpc); + break; + } +done: + pci_long_test_and_set_mask(shpc->config + SHPC_SERR_INT, SHPC_CMD_DETECTED); +} + +static void shpc_write(PCIDevice *d, unsigned addr, uint64_t val, int l) +{ + SHPCDevice *shpc = d->shpc; + int i; + if (addr >= SHPC_SIZEOF(d)) { + return; + } + l = MIN(l, SHPC_SIZEOF(d) - addr); + + /* TODO: code duplicated from pci.c */ + for (i = 0; i < l; val >>= 8, ++i) { + unsigned a = addr + i; + uint8_t wmask = shpc->wmask[a]; + uint8_t w1cmask = shpc->w1cmask[a]; + assert(!(wmask & w1cmask)); + shpc->config[a] = (shpc->config[a] & ~wmask) | (val & wmask); + shpc->config[a] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ + } + if (ranges_overlap(addr, l, SHPC_CMD_CODE, 2)) { + shpc_command(shpc); + } + shpc_interrupt_update(d); +} + +static uint64_t shpc_read(PCIDevice *d, unsigned addr, int l) +{ + uint64_t val = 0x0; + if (addr >= SHPC_SIZEOF(d)) { + return val; + } + l = MIN(l, SHPC_SIZEOF(d) - addr); + memcpy(&val, d->shpc->config + addr, l); + return val; +} + +/* SHPC Bridge Capability */ +#define SHPC_CAP_LENGTH 0x08 +#define SHPC_CAP_DWORD_SELECT 0x2 /* 1 byte */ +#define SHPC_CAP_CxP 0x3 /* 1 byte: CSP, CIP */ +#define SHPC_CAP_DWORD_DATA 0x4 /* 4 bytes */ +#define SHPC_CAP_CSP_MASK 0x4 +#define SHPC_CAP_CIP_MASK 0x8 + +static uint8_t shpc_cap_dword(PCIDevice *d) +{ + return pci_get_byte(d->config + d->shpc->cap + SHPC_CAP_DWORD_SELECT); +} + +/* Update dword data capability register */ +static void shpc_cap_update_dword(PCIDevice *d) +{ + unsigned data; + data = shpc_read(d, shpc_cap_dword(d) * 4, 4); + pci_set_long(d->config + d->shpc->cap + SHPC_CAP_DWORD_DATA, data); +} + +/* Add SHPC capability to the config space for the device. */ +static int shpc_cap_add_config(PCIDevice *d) +{ + uint8_t *config; + int config_offset; + config_offset = pci_add_capability(d, PCI_CAP_ID_SHPC, + 0, SHPC_CAP_LENGTH); + if (config_offset < 0) { + return config_offset; + } + config = d->config + config_offset; + + pci_set_byte(config + SHPC_CAP_DWORD_SELECT, 0); + pci_set_byte(config + SHPC_CAP_CxP, 0); + pci_set_long(config + SHPC_CAP_DWORD_DATA, 0); + d->shpc->cap = config_offset; + /* Make dword select and data writeable. */ + pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff); + pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0xffffffff); + return 0; +} + +static uint64_t shpc_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + return shpc_read(opaque, addr, size); +} + +static void shpc_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + shpc_write(opaque, addr, val, size); +} + +static const MemoryRegionOps shpc_mmio_ops = { + .read = shpc_mmio_read, + .write = shpc_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + /* SHPC ECN requires dword accesses, but the original 1.0 spec doesn't. + * It's easier to suppport all sizes than worry about it. */ + .min_access_size = 1, + .max_access_size = 4, + }, +}; +static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot, + SHPCDevice *shpc, Error **errp) +{ + int pci_slot = PCI_SLOT(affected_dev->devfn); + *slot = SHPC_PCI_TO_IDX(pci_slot); + + if (pci_slot < SHPC_IDX_TO_PCI(0) || *slot >= shpc->nslots) { + error_setg(errp, "Unsupported PCI slot %d for standard hotplug " + "controller. Valid slots are between %d and %d.", + pci_slot, SHPC_IDX_TO_PCI(0), + SHPC_IDX_TO_PCI(shpc->nslots) - 1); + return; + } +} + +void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + Error *local_err = NULL; + PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); + SHPCDevice *shpc = pci_hotplug_dev->shpc; + int slot; + + shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Don't send event when device is enabled during qemu machine creation: + * it is present on boot, no hotplug event is necessary. We do send an + * event when the device is disabled later. */ + if (!dev->hotplugged) { + shpc_set_status(shpc, slot, 0, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_7_5W, + SHPC_SLOT_STATUS_PRSNT_MASK); + return; + } + + /* This could be a cancellation of the previous removal. + * We check MRL state to figure out. */ + if (shpc_get_status(shpc, slot, SHPC_SLOT_STATUS_MRL_OPEN)) { + shpc_set_status(shpc, slot, 0, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_7_5W, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_BUTTON | + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_PRESENCE; + } else { + /* Press attention button to cancel removal */ + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_BUTTON; + } + shpc_set_status(shpc, slot, 0, SHPC_SLOT_STATUS_66); + shpc_interrupt_update(pci_hotplug_dev); +} + +void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + Error *local_err = NULL; + PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); + SHPCDevice *shpc = pci_hotplug_dev->shpc; + uint8_t state; + uint8_t led; + int slot; + + shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= SHPC_SLOT_EVENT_BUTTON; + state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); + led = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); + if (state == SHPC_STATE_DISABLED && led == SHPC_LED_OFF) { + shpc_free_devices_in_slot(shpc, slot); + shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_PRESENCE; + } + shpc_set_status(shpc, slot, 0, SHPC_SLOT_STATUS_66); + shpc_interrupt_update(pci_hotplug_dev); +} + +/* Initialize the SHPC structure in bridge's BAR. */ +int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, unsigned offset) +{ + int i, ret; + int nslots = SHPC_MAX_SLOTS; /* TODO: qdev property? */ + SHPCDevice *shpc = d->shpc = g_malloc0(sizeof(*d->shpc)); + shpc->sec_bus = sec_bus; + ret = shpc_cap_add_config(d); + if (ret) { + g_free(d->shpc); + return ret; + } + if (nslots < SHPC_MIN_SLOTS) { + return 0; + } + if (nslots > SHPC_MAX_SLOTS || + SHPC_IDX_TO_PCI(nslots) > PCI_SLOT_MAX) { + /* TODO: report an error mesage that makes sense. */ + return -EINVAL; + } + shpc->nslots = nslots; + shpc->config = g_malloc0(SHPC_SIZEOF(d)); + shpc->cmask = g_malloc0(SHPC_SIZEOF(d)); + shpc->wmask = g_malloc0(SHPC_SIZEOF(d)); + shpc->w1cmask = g_malloc0(SHPC_SIZEOF(d)); + + shpc_reset(d); + + pci_set_long(shpc->config + SHPC_BASE_OFFSET, offset); + + pci_set_byte(shpc->wmask + SHPC_CMD_CODE, 0xff); + pci_set_byte(shpc->wmask + SHPC_CMD_TRGT, SHPC_CMD_TRGT_MAX); + pci_set_byte(shpc->wmask + SHPC_CMD_TRGT, SHPC_CMD_TRGT_MAX); + pci_set_long(shpc->wmask + SHPC_SERR_INT, + SHPC_INT_DIS | + SHPC_SERR_DIS | + SHPC_CMD_INT_DIS | + SHPC_ARB_SERR_DIS); + pci_set_long(shpc->w1cmask + SHPC_SERR_INT, + SHPC_CMD_DETECTED | + SHPC_ARB_DETECTED); + for (i = 0; i < nslots; ++i) { + pci_set_byte(shpc->wmask + + SHPC_SLOT_EVENT_SERR_INT_DIS(d, i), + SHPC_SLOT_EVENT_PRESENCE | + SHPC_SLOT_EVENT_ISOLATED_FAULT | + SHPC_SLOT_EVENT_BUTTON | + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_CONNECTED_FAULT | + SHPC_SLOT_EVENT_MRL_SERR_DIS | + SHPC_SLOT_EVENT_CONNECTED_FAULT_SERR_DIS); + pci_set_byte(shpc->w1cmask + + SHPC_SLOT_EVENT_LATCH(i), + SHPC_SLOT_EVENT_PRESENCE | + SHPC_SLOT_EVENT_ISOLATED_FAULT | + SHPC_SLOT_EVENT_BUTTON | + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_CONNECTED_FAULT); + } + + /* TODO: init cmask */ + memory_region_init_io(&shpc->mmio, OBJECT(d), &shpc_mmio_ops, + d, "shpc-mmio", SHPC_SIZEOF(d)); + shpc_cap_update_dword(d); + memory_region_add_subregion(bar, offset, &shpc->mmio); + + qbus_set_hotplug_handler(BUS(sec_bus), DEVICE(d), NULL); + + d->cap_present |= QEMU_PCI_CAP_SHPC; + return 0; +} + +int shpc_bar_size(PCIDevice *d) +{ + return roundup_pow_of_two(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); +} + +void shpc_cleanup(PCIDevice *d, MemoryRegion *bar) +{ + SHPCDevice *shpc = d->shpc; + d->cap_present &= ~QEMU_PCI_CAP_SHPC; + memory_region_del_subregion(bar, &shpc->mmio); + /* TODO: cleanup config space changes? */ +} + +void shpc_free(PCIDevice *d) +{ + SHPCDevice *shpc = d->shpc; + if (!shpc) { + return; + } + object_unparent(OBJECT(&shpc->mmio)); + g_free(shpc->config); + g_free(shpc->cmask); + g_free(shpc->wmask); + g_free(shpc->w1cmask); + g_free(shpc); + d->shpc = NULL; +} + +void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) +{ + if (!ranges_overlap(addr, l, d->shpc->cap, SHPC_CAP_LENGTH)) { + return; + } + if (ranges_overlap(addr, l, d->shpc->cap + SHPC_CAP_DWORD_DATA, 4)) { + unsigned dword_data; + dword_data = pci_get_long(d->shpc->config + d->shpc->cap + + SHPC_CAP_DWORD_DATA); + shpc_write(d, shpc_cap_dword(d) * 4, dword_data, 4); + } + /* Update cap dword data in case guest is going to read it. */ + shpc_cap_update_dword(d); +} + +static void shpc_save(QEMUFile *f, void *pv, size_t size) +{ + PCIDevice *d = container_of(pv, PCIDevice, shpc); + qemu_put_buffer(f, d->shpc->config, SHPC_SIZEOF(d)); +} + +static int shpc_load(QEMUFile *f, void *pv, size_t size) +{ + PCIDevice *d = container_of(pv, PCIDevice, shpc); + int ret = qemu_get_buffer(f, d->shpc->config, SHPC_SIZEOF(d)); + if (ret != SHPC_SIZEOF(d)) { + return -EINVAL; + } + /* Make sure we don't lose notifications. An extra interrupt is harmless. */ + d->shpc->msi_requested = 0; + shpc_interrupt_update(d); + return 0; +} + +VMStateInfo shpc_vmstate_info = { + .name = "shpc", + .get = shpc_load, + .put = shpc_save, +}; diff --git a/qemu/hw/pci/slotid_cap.c b/qemu/hw/pci/slotid_cap.c new file mode 100644 index 000000000..1c01d346c --- /dev/null +++ b/qemu/hw/pci/slotid_cap.c @@ -0,0 +1,45 @@ +#include "hw/pci/slotid_cap.h" +#include "hw/pci/pci.h" +#include "qemu/error-report.h" + +#define SLOTID_CAP_LENGTH 4 +#define SLOTID_NSLOTS_SHIFT ctz32(PCI_SID_ESR_NSLOTS) + +int slotid_cap_init(PCIDevice *d, int nslots, + uint8_t chassis, + unsigned offset) +{ + int cap; + if (!chassis) { + error_report("Bridge chassis not specified. Each bridge is required " + "to be assigned a unique chassis id > 0."); + return -EINVAL; + } + if (nslots < 0 || nslots > (PCI_SID_ESR_NSLOTS >> SLOTID_NSLOTS_SHIFT)) { + /* TODO: error report? */ + return -EINVAL; + } + + cap = pci_add_capability(d, PCI_CAP_ID_SLOTID, offset, SLOTID_CAP_LENGTH); + if (cap < 0) { + return cap; + } + /* We make each chassis unique, this way each bridge is First in Chassis */ + d->config[cap + PCI_SID_ESR] = PCI_SID_ESR_FIC | + (nslots << SLOTID_NSLOTS_SHIFT); + d->cmask[cap + PCI_SID_ESR] = 0xff; + d->config[cap + PCI_SID_CHASSIS_NR] = chassis; + /* Note: Chassis number register is non-volatile, + so we don't reset it. */ + /* TODO: store in eeprom? */ + d->wmask[cap + PCI_SID_CHASSIS_NR] = 0xff; + + d->cap_present |= QEMU_PCI_CAP_SLOTID; + return 0; +} + +void slotid_cap_cleanup(PCIDevice *d) +{ + /* TODO: cleanup config space? */ + d->cap_present &= ~QEMU_PCI_CAP_SLOTID; +} |