diff options
author | Yang Zhang <yang.z.zhang@intel.com> | 2015-08-28 09:58:54 +0800 |
---|---|---|
committer | Yang Zhang <yang.z.zhang@intel.com> | 2015-09-01 12:44:00 +0800 |
commit | e44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch) | |
tree | 66b09f592c55df2878107a468a91d21506104d3f /qemu/hw/scsi | |
parent | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff) |
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5
Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/hw/scsi')
-rw-r--r-- | qemu/hw/scsi/Makefile.objs | 13 | ||||
-rw-r--r-- | qemu/hw/scsi/esp-pci.c | 529 | ||||
-rw-r--r-- | qemu/hw/scsi/esp.c | 743 | ||||
-rw-r--r-- | qemu/hw/scsi/lsi53c895a.c | 2163 | ||||
-rw-r--r-- | qemu/hw/scsi/megasas.c | 2547 | ||||
-rw-r--r-- | qemu/hw/scsi/mfi.h | 1272 | ||||
-rw-r--r-- | qemu/hw/scsi/scsi-bus.c | 2049 | ||||
-rw-r--r-- | qemu/hw/scsi/scsi-disk.c | 2776 | ||||
-rw-r--r-- | qemu/hw/scsi/scsi-generic.c | 496 | ||||
-rw-r--r-- | qemu/hw/scsi/spapr_vscsi.c | 1302 | ||||
-rw-r--r-- | qemu/hw/scsi/srp.h | 247 | ||||
-rw-r--r-- | qemu/hw/scsi/vhost-scsi.c | 351 | ||||
-rw-r--r-- | qemu/hw/scsi/viosrp.h | 216 | ||||
-rw-r--r-- | qemu/hw/scsi/virtio-scsi-dataplane.c | 316 | ||||
-rw-r--r-- | qemu/hw/scsi/virtio-scsi.c | 1016 | ||||
-rw-r--r-- | qemu/hw/scsi/vmw_pvscsi.c | 1219 | ||||
-rw-r--r-- | qemu/hw/scsi/vmw_pvscsi.h | 434 |
17 files changed, 17689 insertions, 0 deletions
diff --git a/qemu/hw/scsi/Makefile.objs b/qemu/hw/scsi/Makefile.objs new file mode 100644 index 000000000..40c79d34c --- /dev/null +++ b/qemu/hw/scsi/Makefile.objs @@ -0,0 +1,13 @@ +common-obj-y += scsi-disk.o +common-obj-y += scsi-generic.o scsi-bus.o +common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o +common-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o +common-obj-$(CONFIG_VMW_PVSCSI_SCSI_PCI) += vmw_pvscsi.o +common-obj-$(CONFIG_ESP) += esp.o +common-obj-$(CONFIG_ESP_PCI) += esp-pci.o +obj-$(CONFIG_PSERIES) += spapr_vscsi.o + +ifeq ($(CONFIG_VIRTIO),y) +obj-y += virtio-scsi.o virtio-scsi-dataplane.o +obj-$(CONFIG_VHOST_SCSI) += vhost-scsi.o +endif diff --git a/qemu/hw/scsi/esp-pci.c b/qemu/hw/scsi/esp-pci.c new file mode 100644 index 000000000..8d2242d0a --- /dev/null +++ b/qemu/hw/scsi/esp-pci.c @@ -0,0 +1,529 @@ +/* + * QEMU ESP/NCR53C9x emulation + * + * Copyright (c) 2005-2006 Fabrice Bellard + * Copyright (c) 2012 Herve Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/pci/pci.h" +#include "hw/nvram/eeprom93xx.h" +#include "hw/scsi/esp.h" +#include "trace.h" +#include "qemu/log.h" + +#define TYPE_AM53C974_DEVICE "am53c974" + +#define PCI_ESP(obj) \ + OBJECT_CHECK(PCIESPState, (obj), TYPE_AM53C974_DEVICE) + +#define DMA_CMD 0x0 +#define DMA_STC 0x1 +#define DMA_SPA 0x2 +#define DMA_WBC 0x3 +#define DMA_WAC 0x4 +#define DMA_STAT 0x5 +#define DMA_SMDLA 0x6 +#define DMA_WMAC 0x7 + +#define DMA_CMD_MASK 0x03 +#define DMA_CMD_DIAG 0x04 +#define DMA_CMD_MDL 0x10 +#define DMA_CMD_INTE_P 0x20 +#define DMA_CMD_INTE_D 0x40 +#define DMA_CMD_DIR 0x80 + +#define DMA_STAT_PWDN 0x01 +#define DMA_STAT_ERROR 0x02 +#define DMA_STAT_ABORT 0x04 +#define DMA_STAT_DONE 0x08 +#define DMA_STAT_SCSIINT 0x10 +#define DMA_STAT_BCMBLT 0x20 + +#define SBAC_STATUS 0x1000 + +typedef struct PCIESPState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + MemoryRegion io; + uint32_t dma_regs[8]; + uint32_t sbac; + ESPState esp; +} PCIESPState; + +static void esp_pci_handle_idle(PCIESPState *pci, uint32_t val) +{ + trace_esp_pci_dma_idle(val); + esp_dma_enable(&pci->esp, 0, 0); +} + +static void esp_pci_handle_blast(PCIESPState *pci, uint32_t val) +{ + trace_esp_pci_dma_blast(val); + qemu_log_mask(LOG_UNIMP, "am53c974: cmd BLAST not implemented\n"); +} + +static void esp_pci_handle_abort(PCIESPState *pci, uint32_t val) +{ + trace_esp_pci_dma_abort(val); + if (pci->esp.current_req) { + scsi_req_cancel(pci->esp.current_req); + } +} + +static void esp_pci_handle_start(PCIESPState *pci, uint32_t val) +{ + trace_esp_pci_dma_start(val); + + pci->dma_regs[DMA_WBC] = pci->dma_regs[DMA_STC]; + pci->dma_regs[DMA_WAC] = pci->dma_regs[DMA_SPA]; + pci->dma_regs[DMA_WMAC] = pci->dma_regs[DMA_SMDLA]; + + pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_BCMBLT | DMA_STAT_SCSIINT + | DMA_STAT_DONE | DMA_STAT_ABORT + | DMA_STAT_ERROR | DMA_STAT_PWDN); + + esp_dma_enable(&pci->esp, 0, 1); +} + +static void esp_pci_dma_write(PCIESPState *pci, uint32_t saddr, uint32_t val) +{ + trace_esp_pci_dma_write(saddr, pci->dma_regs[saddr], val); + switch (saddr) { + case DMA_CMD: + pci->dma_regs[saddr] = val; + switch (val & DMA_CMD_MASK) { + case 0x0: /* IDLE */ + esp_pci_handle_idle(pci, val); + break; + case 0x1: /* BLAST */ + esp_pci_handle_blast(pci, val); + break; + case 0x2: /* ABORT */ + esp_pci_handle_abort(pci, val); + break; + case 0x3: /* START */ + esp_pci_handle_start(pci, val); + break; + default: /* can't happen */ + abort(); + } + break; + case DMA_STC: + case DMA_SPA: + case DMA_SMDLA: + pci->dma_regs[saddr] = val; + break; + case DMA_STAT: + if (!(pci->sbac & SBAC_STATUS)) { + /* clear some bits on write */ + uint32_t mask = DMA_STAT_ERROR | DMA_STAT_ABORT | DMA_STAT_DONE; + pci->dma_regs[DMA_STAT] &= ~(val & mask); + } + break; + default: + trace_esp_pci_error_invalid_write_dma(val, saddr); + return; + } +} + +static uint32_t esp_pci_dma_read(PCIESPState *pci, uint32_t saddr) +{ + uint32_t val; + + val = pci->dma_regs[saddr]; + if (saddr == DMA_STAT) { + if (pci->esp.rregs[ESP_RSTAT] & STAT_INT) { + val |= DMA_STAT_SCSIINT; + } + if (pci->sbac & SBAC_STATUS) { + pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_ERROR | DMA_STAT_ABORT | + DMA_STAT_DONE); + } + } + + trace_esp_pci_dma_read(saddr, val); + return val; +} + +static void esp_pci_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + PCIESPState *pci = opaque; + + if (size < 4 || addr & 3) { + /* need to upgrade request: we only support 4-bytes accesses */ + uint32_t current = 0, mask; + int shift; + + if (addr < 0x40) { + current = pci->esp.wregs[addr >> 2]; + } else if (addr < 0x60) { + current = pci->dma_regs[(addr - 0x40) >> 2]; + } else if (addr < 0x74) { + current = pci->sbac; + } + + shift = (4 - size) * 8; + mask = (~(uint32_t)0 << shift) >> shift; + + shift = ((4 - (addr & 3)) & 3) * 8; + val <<= shift; + val |= current & ~(mask << shift); + addr &= ~3; + size = 4; + } + + if (addr < 0x40) { + /* SCSI core reg */ + esp_reg_write(&pci->esp, addr >> 2, val); + } else if (addr < 0x60) { + /* PCI DMA CCB */ + esp_pci_dma_write(pci, (addr - 0x40) >> 2, val); + } else if (addr == 0x70) { + /* DMA SCSI Bus and control */ + trace_esp_pci_sbac_write(pci->sbac, val); + pci->sbac = val; + } else { + trace_esp_pci_error_invalid_write((int)addr); + } +} + +static uint64_t esp_pci_io_read(void *opaque, hwaddr addr, + unsigned int size) +{ + PCIESPState *pci = opaque; + uint32_t ret; + + if (addr < 0x40) { + /* SCSI core reg */ + ret = esp_reg_read(&pci->esp, addr >> 2); + } else if (addr < 0x60) { + /* PCI DMA CCB */ + ret = esp_pci_dma_read(pci, (addr - 0x40) >> 2); + } else if (addr == 0x70) { + /* DMA SCSI Bus and control */ + trace_esp_pci_sbac_read(pci->sbac); + ret = pci->sbac; + } else { + /* Invalid region */ + trace_esp_pci_error_invalid_read((int)addr); + ret = 0; + } + + /* give only requested data */ + ret >>= (addr & 3) * 8; + ret &= ~(~(uint64_t)0 << (8 * size)); + + return ret; +} + +static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len, + DMADirection dir) +{ + dma_addr_t addr; + DMADirection expected_dir; + + if (pci->dma_regs[DMA_CMD] & DMA_CMD_DIR) { + expected_dir = DMA_DIRECTION_FROM_DEVICE; + } else { + expected_dir = DMA_DIRECTION_TO_DEVICE; + } + + if (dir != expected_dir) { + trace_esp_pci_error_invalid_dma_direction(); + return; + } + + if (pci->dma_regs[DMA_STAT] & DMA_CMD_MDL) { + qemu_log_mask(LOG_UNIMP, "am53c974: MDL transfer not implemented\n"); + } + + addr = pci->dma_regs[DMA_SPA]; + if (pci->dma_regs[DMA_WBC] < len) { + len = pci->dma_regs[DMA_WBC]; + } + + pci_dma_rw(PCI_DEVICE(pci), addr, buf, len, dir); + + /* update status registers */ + pci->dma_regs[DMA_WBC] -= len; + pci->dma_regs[DMA_WAC] += len; + if (pci->dma_regs[DMA_WBC] == 0) { + pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE; + } +} + +static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len) +{ + PCIESPState *pci = opaque; + esp_pci_dma_memory_rw(pci, buf, len, DMA_DIRECTION_TO_DEVICE); +} + +static void esp_pci_dma_memory_write(void *opaque, uint8_t *buf, int len) +{ + PCIESPState *pci = opaque; + esp_pci_dma_memory_rw(pci, buf, len, DMA_DIRECTION_FROM_DEVICE); +} + +static const MemoryRegionOps esp_pci_io_ops = { + .read = esp_pci_io_read, + .write = esp_pci_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +static void esp_pci_hard_reset(DeviceState *dev) +{ + PCIESPState *pci = PCI_ESP(dev); + esp_hard_reset(&pci->esp); + pci->dma_regs[DMA_CMD] &= ~(DMA_CMD_DIR | DMA_CMD_INTE_D | DMA_CMD_INTE_P + | DMA_CMD_MDL | DMA_CMD_DIAG | DMA_CMD_MASK); + pci->dma_regs[DMA_WBC] &= ~0xffff; + pci->dma_regs[DMA_WAC] = 0xffffffff; + pci->dma_regs[DMA_STAT] &= ~(DMA_STAT_BCMBLT | DMA_STAT_SCSIINT + | DMA_STAT_DONE | DMA_STAT_ABORT + | DMA_STAT_ERROR); + pci->dma_regs[DMA_WMAC] = 0xfffffffd; +} + +static const VMStateDescription vmstate_esp_pci_scsi = { + .name = "pciespscsi", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PCIESPState), + VMSTATE_BUFFER_UNSAFE(dma_regs, PCIESPState, 0, 8 * sizeof(uint32_t)), + VMSTATE_STRUCT(esp, PCIESPState, 0, vmstate_esp, ESPState), + VMSTATE_END_OF_LIST() + } +}; + +static void esp_pci_command_complete(SCSIRequest *req, uint32_t status, + size_t resid) +{ + ESPState *s = req->hba_private; + PCIESPState *pci = container_of(s, PCIESPState, esp); + + esp_command_complete(req, status, resid); + pci->dma_regs[DMA_WBC] = 0; + pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE; +} + +static const struct SCSIBusInfo esp_pci_scsi_info = { + .tcq = false, + .max_target = ESP_MAX_DEVS, + .max_lun = 7, + + .transfer_data = esp_transfer_data, + .complete = esp_pci_command_complete, + .cancel = esp_request_cancelled, +}; + +static void esp_pci_scsi_realize(PCIDevice *dev, Error **errp) +{ + PCIESPState *pci = PCI_ESP(dev); + DeviceState *d = DEVICE(dev); + ESPState *s = &pci->esp; + uint8_t *pci_conf; + + pci_conf = dev->config; + + /* Interrupt pin A */ + pci_conf[PCI_INTERRUPT_PIN] = 0x01; + + s->dma_memory_read = esp_pci_dma_memory_read; + s->dma_memory_write = esp_pci_dma_memory_write; + s->dma_opaque = pci; + s->chip_id = TCHI_AM53C974; + memory_region_init_io(&pci->io, OBJECT(pci), &esp_pci_io_ops, pci, + "esp-io", 0x80); + + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->io); + s->irq = pci_allocate_irq(dev); + + scsi_bus_new(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info, NULL); + if (!d->hotplugged) { + scsi_bus_legacy_handle_cmdline(&s->bus, errp); + } +} + +static void esp_pci_scsi_uninit(PCIDevice *d) +{ + PCIESPState *pci = PCI_ESP(d); + + qemu_free_irq(pci->esp.irq); +} + +static void esp_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = esp_pci_scsi_realize; + k->exit = esp_pci_scsi_uninit; + k->vendor_id = PCI_VENDOR_ID_AMD; + k->device_id = PCI_DEVICE_ID_AMD_SCSI; + k->revision = 0x10; + k->class_id = PCI_CLASS_STORAGE_SCSI; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "AMD Am53c974 PCscsi-PCI SCSI adapter"; + dc->reset = esp_pci_hard_reset; + dc->vmsd = &vmstate_esp_pci_scsi; +} + +static const TypeInfo esp_pci_info = { + .name = TYPE_AM53C974_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIESPState), + .class_init = esp_pci_class_init, +}; + +typedef struct { + PCIESPState pci; + eeprom_t *eeprom; +} DC390State; + +#define TYPE_DC390_DEVICE "dc390" +#define DC390(obj) \ + OBJECT_CHECK(DC390State, obj, TYPE_DC390_DEVICE) + +#define EE_ADAPT_SCSI_ID 64 +#define EE_MODE2 65 +#define EE_DELAY 66 +#define EE_TAG_CMD_NUM 67 +#define EE_ADAPT_OPTIONS 68 +#define EE_BOOT_SCSI_ID 69 +#define EE_BOOT_SCSI_LUN 70 +#define EE_CHKSUM1 126 +#define EE_CHKSUM2 127 + +#define EE_ADAPT_OPTION_F6_F8_AT_BOOT 0x01 +#define EE_ADAPT_OPTION_BOOT_FROM_CDROM 0x02 +#define EE_ADAPT_OPTION_INT13 0x04 +#define EE_ADAPT_OPTION_SCAM_SUPPORT 0x08 + + +static uint32_t dc390_read_config(PCIDevice *dev, uint32_t addr, int l) +{ + DC390State *pci = DC390(dev); + uint32_t val; + + val = pci_default_read_config(dev, addr, l); + + if (addr == 0x00 && l == 1) { + /* First byte of address space is AND-ed with EEPROM DO line */ + if (!eeprom93xx_read(pci->eeprom)) { + val &= ~0xff; + } + } + + return val; +} + +static void dc390_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int l) +{ + DC390State *pci = DC390(dev); + if (addr == 0x80) { + /* EEPROM write */ + int eesk = val & 0x80 ? 1 : 0; + int eedi = val & 0x40 ? 1 : 0; + eeprom93xx_write(pci->eeprom, 1, eesk, eedi); + } else if (addr == 0xc0) { + /* EEPROM CS low */ + eeprom93xx_write(pci->eeprom, 0, 0, 0); + } else { + pci_default_write_config(dev, addr, val, l); + } +} + +static void dc390_scsi_realize(PCIDevice *dev, Error **errp) +{ + DC390State *pci = DC390(dev); + Error *err = NULL; + uint8_t *contents; + uint16_t chksum = 0; + int i; + + /* init base class */ + esp_pci_scsi_realize(dev, &err); + if (err) { + error_propagate(errp, err); + return; + } + + /* EEPROM */ + pci->eeprom = eeprom93xx_new(DEVICE(dev), 64); + + /* set default eeprom values */ + contents = (uint8_t *)eeprom93xx_data(pci->eeprom); + + for (i = 0; i < 16; i++) { + contents[i * 2] = 0x57; + contents[i * 2 + 1] = 0x00; + } + contents[EE_ADAPT_SCSI_ID] = 7; + contents[EE_MODE2] = 0x0f; + contents[EE_TAG_CMD_NUM] = 0x04; + contents[EE_ADAPT_OPTIONS] = EE_ADAPT_OPTION_F6_F8_AT_BOOT + | EE_ADAPT_OPTION_BOOT_FROM_CDROM + | EE_ADAPT_OPTION_INT13; + + /* update eeprom checksum */ + for (i = 0; i < EE_CHKSUM1; i += 2) { + chksum += contents[i] + (((uint16_t)contents[i + 1]) << 8); + } + chksum = 0x1234 - chksum; + contents[EE_CHKSUM1] = chksum & 0xff; + contents[EE_CHKSUM2] = chksum >> 8; +} + +static void dc390_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = dc390_scsi_realize; + k->config_read = dc390_read_config; + k->config_write = dc390_write_config; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "Tekram DC-390 SCSI adapter"; +} + +static const TypeInfo dc390_info = { + .name = "dc390", + .parent = TYPE_AM53C974_DEVICE, + .instance_size = sizeof(DC390State), + .class_init = dc390_class_init, +}; + +static void esp_pci_register_types(void) +{ + type_register_static(&esp_pci_info); + type_register_static(&dc390_info); +} + +type_init(esp_pci_register_types) diff --git a/qemu/hw/scsi/esp.c b/qemu/hw/scsi/esp.c new file mode 100644 index 000000000..272d13d63 --- /dev/null +++ b/qemu/hw/scsi/esp.c @@ -0,0 +1,743 @@ +/* + * QEMU ESP/NCR53C9x emulation + * + * Copyright (c) 2005-2006 Fabrice Bellard + * Copyright (c) 2012 Herve Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/sysbus.h" +#include "hw/scsi/esp.h" +#include "trace.h" +#include "qemu/log.h" + +/* + * On Sparc32, this is the ESP (NCR53C90) part of chip STP2000 (Master I/O), + * also produced as NCR89C100. See + * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR89C100.txt + * and + * http://www.ibiblio.org/pub/historic-linux/early-ports/Sparc/NCR/NCR53C9X.txt + */ + +static void esp_raise_irq(ESPState *s) +{ + if (!(s->rregs[ESP_RSTAT] & STAT_INT)) { + s->rregs[ESP_RSTAT] |= STAT_INT; + qemu_irq_raise(s->irq); + trace_esp_raise_irq(); + } +} + +static void esp_lower_irq(ESPState *s) +{ + if (s->rregs[ESP_RSTAT] & STAT_INT) { + s->rregs[ESP_RSTAT] &= ~STAT_INT; + qemu_irq_lower(s->irq); + trace_esp_lower_irq(); + } +} + +void esp_dma_enable(ESPState *s, int irq, int level) +{ + if (level) { + s->dma_enabled = 1; + trace_esp_dma_enable(); + if (s->dma_cb) { + s->dma_cb(s); + s->dma_cb = NULL; + } + } else { + trace_esp_dma_disable(); + s->dma_enabled = 0; + } +} + +void esp_request_cancelled(SCSIRequest *req) +{ + ESPState *s = req->hba_private; + + if (req == s->current_req) { + scsi_req_unref(s->current_req); + s->current_req = NULL; + s->current_dev = NULL; + } +} + +static uint32_t get_cmd(ESPState *s, uint8_t *buf) +{ + uint32_t dmalen; + int target; + + target = s->wregs[ESP_WBUSID] & BUSID_DID; + if (s->dma) { + dmalen = s->rregs[ESP_TCLO]; + dmalen |= s->rregs[ESP_TCMID] << 8; + dmalen |= s->rregs[ESP_TCHI] << 16; + s->dma_memory_read(s->dma_opaque, buf, dmalen); + } else { + dmalen = s->ti_size; + memcpy(buf, s->ti_buf, dmalen); + buf[0] = buf[2] >> 5; + } + trace_esp_get_cmd(dmalen, target); + + s->ti_size = 0; + s->ti_rptr = 0; + s->ti_wptr = 0; + + if (s->current_req) { + /* Started a new command before the old one finished. Cancel it. */ + scsi_req_cancel(s->current_req); + s->async_len = 0; + } + + s->current_dev = scsi_device_find(&s->bus, 0, target, 0); + if (!s->current_dev) { + // No such drive + s->rregs[ESP_RSTAT] = 0; + s->rregs[ESP_RINTR] = INTR_DC; + s->rregs[ESP_RSEQ] = SEQ_0; + esp_raise_irq(s); + return 0; + } + return dmalen; +} + +static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid) +{ + int32_t datalen; + int lun; + SCSIDevice *current_lun; + + trace_esp_do_busid_cmd(busid); + lun = busid & 7; + current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun); + s->current_req = scsi_req_new(current_lun, 0, lun, buf, s); + datalen = scsi_req_enqueue(s->current_req); + s->ti_size = datalen; + if (datalen != 0) { + s->rregs[ESP_RSTAT] = STAT_TC; + s->dma_left = 0; + s->dma_counter = 0; + if (datalen > 0) { + s->rregs[ESP_RSTAT] |= STAT_DI; + } else { + s->rregs[ESP_RSTAT] |= STAT_DO; + } + scsi_req_continue(s->current_req); + } + s->rregs[ESP_RINTR] = INTR_BS | INTR_FC; + s->rregs[ESP_RSEQ] = SEQ_CD; + esp_raise_irq(s); +} + +static void do_cmd(ESPState *s, uint8_t *buf) +{ + uint8_t busid = buf[0]; + + do_busid_cmd(s, &buf[1], busid); +} + +static void handle_satn(ESPState *s) +{ + uint8_t buf[32]; + int len; + + if (s->dma && !s->dma_enabled) { + s->dma_cb = handle_satn; + return; + } + len = get_cmd(s, buf); + if (len) + do_cmd(s, buf); +} + +static void handle_s_without_atn(ESPState *s) +{ + uint8_t buf[32]; + int len; + + if (s->dma && !s->dma_enabled) { + s->dma_cb = handle_s_without_atn; + return; + } + len = get_cmd(s, buf); + if (len) { + do_busid_cmd(s, buf, 0); + } +} + +static void handle_satn_stop(ESPState *s) +{ + if (s->dma && !s->dma_enabled) { + s->dma_cb = handle_satn_stop; + return; + } + s->cmdlen = get_cmd(s, s->cmdbuf); + if (s->cmdlen) { + trace_esp_handle_satn_stop(s->cmdlen); + s->do_cmd = 1; + s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD; + s->rregs[ESP_RINTR] = INTR_BS | INTR_FC; + s->rregs[ESP_RSEQ] = SEQ_CD; + esp_raise_irq(s); + } +} + +static void write_response(ESPState *s) +{ + trace_esp_write_response(s->status); + s->ti_buf[0] = s->status; + s->ti_buf[1] = 0; + if (s->dma) { + s->dma_memory_write(s->dma_opaque, s->ti_buf, 2); + s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST; + s->rregs[ESP_RINTR] = INTR_BS | INTR_FC; + s->rregs[ESP_RSEQ] = SEQ_CD; + } else { + s->ti_size = 2; + s->ti_rptr = 0; + s->ti_wptr = 0; + s->rregs[ESP_RFLAGS] = 2; + } + esp_raise_irq(s); +} + +static void esp_dma_done(ESPState *s) +{ + s->rregs[ESP_RSTAT] |= STAT_TC; + s->rregs[ESP_RINTR] = INTR_BS; + s->rregs[ESP_RSEQ] = 0; + s->rregs[ESP_RFLAGS] = 0; + s->rregs[ESP_TCLO] = 0; + s->rregs[ESP_TCMID] = 0; + s->rregs[ESP_TCHI] = 0; + esp_raise_irq(s); +} + +static void esp_do_dma(ESPState *s) +{ + uint32_t len; + int to_device; + + to_device = (s->ti_size < 0); + len = s->dma_left; + if (s->do_cmd) { + trace_esp_do_dma(s->cmdlen, len); + s->dma_memory_read(s->dma_opaque, &s->cmdbuf[s->cmdlen], len); + s->ti_size = 0; + s->cmdlen = 0; + s->do_cmd = 0; + do_cmd(s, s->cmdbuf); + return; + } + if (s->async_len == 0) { + /* Defer until data is available. */ + return; + } + if (len > s->async_len) { + len = s->async_len; + } + if (to_device) { + s->dma_memory_read(s->dma_opaque, s->async_buf, len); + } else { + s->dma_memory_write(s->dma_opaque, s->async_buf, len); + } + s->dma_left -= len; + s->async_buf += len; + s->async_len -= len; + if (to_device) + s->ti_size += len; + else + s->ti_size -= len; + if (s->async_len == 0) { + scsi_req_continue(s->current_req); + /* If there is still data to be read from the device then + complete the DMA operation immediately. Otherwise defer + until the scsi layer has completed. */ + if (to_device || s->dma_left != 0 || s->ti_size == 0) { + return; + } + } + + /* Partially filled a scsi buffer. Complete immediately. */ + esp_dma_done(s); +} + +void esp_command_complete(SCSIRequest *req, uint32_t status, + size_t resid) +{ + ESPState *s = req->hba_private; + + trace_esp_command_complete(); + if (s->ti_size != 0) { + trace_esp_command_complete_unexpected(); + } + s->ti_size = 0; + s->dma_left = 0; + s->async_len = 0; + if (status) { + trace_esp_command_complete_fail(); + } + s->status = status; + s->rregs[ESP_RSTAT] = STAT_ST; + esp_dma_done(s); + if (s->current_req) { + scsi_req_unref(s->current_req); + s->current_req = NULL; + s->current_dev = NULL; + } +} + +void esp_transfer_data(SCSIRequest *req, uint32_t len) +{ + ESPState *s = req->hba_private; + + trace_esp_transfer_data(s->dma_left, s->ti_size); + s->async_len = len; + s->async_buf = scsi_req_get_buf(req); + if (s->dma_left) { + esp_do_dma(s); + } else if (s->dma_counter != 0 && s->ti_size <= 0) { + /* If this was the last part of a DMA transfer then the + completion interrupt is deferred to here. */ + esp_dma_done(s); + } +} + +static void handle_ti(ESPState *s) +{ + uint32_t dmalen, minlen; + + if (s->dma && !s->dma_enabled) { + s->dma_cb = handle_ti; + return; + } + + dmalen = s->rregs[ESP_TCLO]; + dmalen |= s->rregs[ESP_TCMID] << 8; + dmalen |= s->rregs[ESP_TCHI] << 16; + if (dmalen==0) { + dmalen=0x10000; + } + s->dma_counter = dmalen; + + if (s->do_cmd) + minlen = (dmalen < 32) ? dmalen : 32; + else if (s->ti_size < 0) + minlen = (dmalen < -s->ti_size) ? dmalen : -s->ti_size; + else + minlen = (dmalen < s->ti_size) ? dmalen : s->ti_size; + trace_esp_handle_ti(minlen); + if (s->dma) { + s->dma_left = minlen; + s->rregs[ESP_RSTAT] &= ~STAT_TC; + esp_do_dma(s); + } else if (s->do_cmd) { + trace_esp_handle_ti_cmd(s->cmdlen); + s->ti_size = 0; + s->cmdlen = 0; + s->do_cmd = 0; + do_cmd(s, s->cmdbuf); + return; + } +} + +void esp_hard_reset(ESPState *s) +{ + memset(s->rregs, 0, ESP_REGS); + memset(s->wregs, 0, ESP_REGS); + s->tchi_written = 0; + s->ti_size = 0; + s->ti_rptr = 0; + s->ti_wptr = 0; + s->dma = 0; + s->do_cmd = 0; + s->dma_cb = NULL; + + s->rregs[ESP_CFG1] = 7; +} + +static void esp_soft_reset(ESPState *s) +{ + qemu_irq_lower(s->irq); + esp_hard_reset(s); +} + +static void parent_esp_reset(ESPState *s, int irq, int level) +{ + if (level) { + esp_soft_reset(s); + } +} + +uint64_t esp_reg_read(ESPState *s, uint32_t saddr) +{ + uint32_t old_val; + + trace_esp_mem_readb(saddr, s->rregs[saddr]); + switch (saddr) { + case ESP_FIFO: + if (s->ti_size > 0) { + s->ti_size--; + if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) { + /* Data out. */ + qemu_log_mask(LOG_UNIMP, + "esp: PIO data read not implemented\n"); + s->rregs[ESP_FIFO] = 0; + } else { + s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++]; + } + esp_raise_irq(s); + } + if (s->ti_size == 0) { + s->ti_rptr = 0; + s->ti_wptr = 0; + } + break; + case ESP_RINTR: + /* Clear sequence step, interrupt register and all status bits + except TC */ + old_val = s->rregs[ESP_RINTR]; + s->rregs[ESP_RINTR] = 0; + s->rregs[ESP_RSTAT] &= ~STAT_TC; + s->rregs[ESP_RSEQ] = SEQ_CD; + esp_lower_irq(s); + + return old_val; + case ESP_TCHI: + /* Return the unique id if the value has never been written */ + if (!s->tchi_written) { + return s->chip_id; + } + default: + break; + } + return s->rregs[saddr]; +} + +void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) +{ + trace_esp_mem_writeb(saddr, s->wregs[saddr], val); + switch (saddr) { + case ESP_TCHI: + s->tchi_written = true; + /* fall through */ + case ESP_TCLO: + case ESP_TCMID: + s->rregs[ESP_RSTAT] &= ~STAT_TC; + break; + case ESP_FIFO: + if (s->do_cmd) { + s->cmdbuf[s->cmdlen++] = val & 0xff; + } else if (s->ti_size == TI_BUFSZ - 1) { + trace_esp_error_fifo_overrun(); + } else { + s->ti_size++; + s->ti_buf[s->ti_wptr++] = val & 0xff; + } + break; + case ESP_CMD: + s->rregs[saddr] = val; + if (val & CMD_DMA) { + s->dma = 1; + /* Reload DMA counter. */ + s->rregs[ESP_TCLO] = s->wregs[ESP_TCLO]; + s->rregs[ESP_TCMID] = s->wregs[ESP_TCMID]; + s->rregs[ESP_TCHI] = s->wregs[ESP_TCHI]; + } else { + s->dma = 0; + } + switch(val & CMD_CMD) { + case CMD_NOP: + trace_esp_mem_writeb_cmd_nop(val); + break; + case CMD_FLUSH: + trace_esp_mem_writeb_cmd_flush(val); + //s->ti_size = 0; + s->rregs[ESP_RINTR] = INTR_FC; + s->rregs[ESP_RSEQ] = 0; + s->rregs[ESP_RFLAGS] = 0; + break; + case CMD_RESET: + trace_esp_mem_writeb_cmd_reset(val); + esp_soft_reset(s); + break; + case CMD_BUSRESET: + trace_esp_mem_writeb_cmd_bus_reset(val); + s->rregs[ESP_RINTR] = INTR_RST; + if (!(s->wregs[ESP_CFG1] & CFG1_RESREPT)) { + esp_raise_irq(s); + } + break; + case CMD_TI: + handle_ti(s); + break; + case CMD_ICCS: + trace_esp_mem_writeb_cmd_iccs(val); + write_response(s); + s->rregs[ESP_RINTR] = INTR_FC; + s->rregs[ESP_RSTAT] |= STAT_MI; + break; + case CMD_MSGACC: + trace_esp_mem_writeb_cmd_msgacc(val); + s->rregs[ESP_RINTR] = INTR_DC; + s->rregs[ESP_RSEQ] = 0; + s->rregs[ESP_RFLAGS] = 0; + esp_raise_irq(s); + break; + case CMD_PAD: + trace_esp_mem_writeb_cmd_pad(val); + s->rregs[ESP_RSTAT] = STAT_TC; + s->rregs[ESP_RINTR] = INTR_FC; + s->rregs[ESP_RSEQ] = 0; + break; + case CMD_SATN: + trace_esp_mem_writeb_cmd_satn(val); + break; + case CMD_RSTATN: + trace_esp_mem_writeb_cmd_rstatn(val); + break; + case CMD_SEL: + trace_esp_mem_writeb_cmd_sel(val); + handle_s_without_atn(s); + break; + case CMD_SELATN: + trace_esp_mem_writeb_cmd_selatn(val); + handle_satn(s); + break; + case CMD_SELATNS: + trace_esp_mem_writeb_cmd_selatns(val); + handle_satn_stop(s); + break; + case CMD_ENSEL: + trace_esp_mem_writeb_cmd_ensel(val); + s->rregs[ESP_RINTR] = 0; + break; + case CMD_DISSEL: + trace_esp_mem_writeb_cmd_dissel(val); + s->rregs[ESP_RINTR] = 0; + esp_raise_irq(s); + break; + default: + trace_esp_error_unhandled_command(val); + break; + } + break; + case ESP_WBUSID ... ESP_WSYNO: + break; + case ESP_CFG1: + case ESP_CFG2: case ESP_CFG3: + case ESP_RES3: case ESP_RES4: + s->rregs[saddr] = val; + break; + case ESP_WCCF ... ESP_WTEST: + break; + default: + trace_esp_error_invalid_write(val, saddr); + return; + } + s->wregs[saddr] = val; +} + +static bool esp_mem_accepts(void *opaque, hwaddr addr, + unsigned size, bool is_write) +{ + return (size == 1) || (is_write && size == 4); +} + +const VMStateDescription vmstate_esp = { + .name ="esp", + .version_id = 3, + .minimum_version_id = 3, + .fields = (VMStateField[]) { + VMSTATE_BUFFER(rregs, ESPState), + VMSTATE_BUFFER(wregs, ESPState), + VMSTATE_INT32(ti_size, ESPState), + VMSTATE_UINT32(ti_rptr, ESPState), + VMSTATE_UINT32(ti_wptr, ESPState), + VMSTATE_BUFFER(ti_buf, ESPState), + VMSTATE_UINT32(status, ESPState), + VMSTATE_UINT32(dma, ESPState), + VMSTATE_BUFFER(cmdbuf, ESPState), + VMSTATE_UINT32(cmdlen, ESPState), + VMSTATE_UINT32(do_cmd, ESPState), + VMSTATE_UINT32(dma_left, ESPState), + VMSTATE_END_OF_LIST() + } +}; + +#define TYPE_ESP "esp" +#define ESP(obj) OBJECT_CHECK(SysBusESPState, (obj), TYPE_ESP) + +typedef struct { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + uint32_t it_shift; + ESPState esp; +} SysBusESPState; + +static void sysbus_esp_mem_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + SysBusESPState *sysbus = opaque; + uint32_t saddr; + + saddr = addr >> sysbus->it_shift; + esp_reg_write(&sysbus->esp, saddr, val); +} + +static uint64_t sysbus_esp_mem_read(void *opaque, hwaddr addr, + unsigned int size) +{ + SysBusESPState *sysbus = opaque; + uint32_t saddr; + + saddr = addr >> sysbus->it_shift; + return esp_reg_read(&sysbus->esp, saddr); +} + +static const MemoryRegionOps sysbus_esp_mem_ops = { + .read = sysbus_esp_mem_read, + .write = sysbus_esp_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.accepts = esp_mem_accepts, +}; + +void esp_init(hwaddr espaddr, int it_shift, + ESPDMAMemoryReadWriteFunc dma_memory_read, + ESPDMAMemoryReadWriteFunc dma_memory_write, + void *dma_opaque, qemu_irq irq, qemu_irq *reset, + qemu_irq *dma_enable) +{ + DeviceState *dev; + SysBusDevice *s; + SysBusESPState *sysbus; + ESPState *esp; + + dev = qdev_create(NULL, TYPE_ESP); + sysbus = ESP(dev); + esp = &sysbus->esp; + esp->dma_memory_read = dma_memory_read; + esp->dma_memory_write = dma_memory_write; + esp->dma_opaque = dma_opaque; + sysbus->it_shift = it_shift; + /* XXX for now until rc4030 has been changed to use DMA enable signal */ + esp->dma_enabled = 1; + qdev_init_nofail(dev); + s = SYS_BUS_DEVICE(dev); + sysbus_connect_irq(s, 0, irq); + sysbus_mmio_map(s, 0, espaddr); + *reset = qdev_get_gpio_in(dev, 0); + *dma_enable = qdev_get_gpio_in(dev, 1); +} + +static const struct SCSIBusInfo esp_scsi_info = { + .tcq = false, + .max_target = ESP_MAX_DEVS, + .max_lun = 7, + + .transfer_data = esp_transfer_data, + .complete = esp_command_complete, + .cancel = esp_request_cancelled +}; + +static void sysbus_esp_gpio_demux(void *opaque, int irq, int level) +{ + SysBusESPState *sysbus = ESP(opaque); + ESPState *s = &sysbus->esp; + + switch (irq) { + case 0: + parent_esp_reset(s, irq, level); + break; + case 1: + esp_dma_enable(opaque, irq, level); + break; + } +} + +static void sysbus_esp_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + SysBusESPState *sysbus = ESP(dev); + ESPState *s = &sysbus->esp; + Error *err = NULL; + + sysbus_init_irq(sbd, &s->irq); + assert(sysbus->it_shift != -1); + + s->chip_id = TCHI_FAS100A; + memory_region_init_io(&sysbus->iomem, OBJECT(sysbus), &sysbus_esp_mem_ops, + sysbus, "esp", ESP_REGS << sysbus->it_shift); + sysbus_init_mmio(sbd, &sysbus->iomem); + + qdev_init_gpio_in(dev, sysbus_esp_gpio_demux, 2); + + scsi_bus_new(&s->bus, sizeof(s->bus), dev, &esp_scsi_info, NULL); + scsi_bus_legacy_handle_cmdline(&s->bus, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } +} + +static void sysbus_esp_hard_reset(DeviceState *dev) +{ + SysBusESPState *sysbus = ESP(dev); + esp_hard_reset(&sysbus->esp); +} + +static const VMStateDescription vmstate_sysbus_esp_scsi = { + .name = "sysbusespscsi", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(esp, SysBusESPState, 0, vmstate_esp, ESPState), + VMSTATE_END_OF_LIST() + } +}; + +static void sysbus_esp_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = sysbus_esp_realize; + dc->reset = sysbus_esp_hard_reset; + dc->vmsd = &vmstate_sysbus_esp_scsi; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo sysbus_esp_info = { + .name = TYPE_ESP, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SysBusESPState), + .class_init = sysbus_esp_class_init, +}; + +static void esp_register_types(void) +{ + type_register_static(&sysbus_esp_info); +} + +type_init(esp_register_types) diff --git a/qemu/hw/scsi/lsi53c895a.c b/qemu/hw/scsi/lsi53c895a.c new file mode 100644 index 000000000..c5b0cc5ca --- /dev/null +++ b/qemu/hw/scsi/lsi53c895a.c @@ -0,0 +1,2163 @@ +/* + * QEMU LSI53C895A SCSI Host Bus Adapter emulation + * + * Copyright (c) 2006 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the LGPL. + */ + +/* Note: + * LSI53C810 emulation is incorrect, in the sense that it supports + * features added in later evolutions. This should not be a problem, + * as well-behaved operating systems will not try to use them. + */ + +#include <assert.h> + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/scsi/scsi.h" +#include "sysemu/dma.h" + +//#define DEBUG_LSI +//#define DEBUG_LSI_REG + +#ifdef DEBUG_LSI +#define DPRINTF(fmt, ...) \ +do { printf("lsi_scsi: " fmt , ## __VA_ARGS__); } while (0) +#define BADF(fmt, ...) \ +do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__); exit(1);} while (0) +#else +#define DPRINTF(fmt, ...) do {} while(0) +#define BADF(fmt, ...) \ +do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__);} while (0) +#endif + +#define LSI_MAX_DEVS 7 + +#define LSI_SCNTL0_TRG 0x01 +#define LSI_SCNTL0_AAP 0x02 +#define LSI_SCNTL0_EPC 0x08 +#define LSI_SCNTL0_WATN 0x10 +#define LSI_SCNTL0_START 0x20 + +#define LSI_SCNTL1_SST 0x01 +#define LSI_SCNTL1_IARB 0x02 +#define LSI_SCNTL1_AESP 0x04 +#define LSI_SCNTL1_RST 0x08 +#define LSI_SCNTL1_CON 0x10 +#define LSI_SCNTL1_DHP 0x20 +#define LSI_SCNTL1_ADB 0x40 +#define LSI_SCNTL1_EXC 0x80 + +#define LSI_SCNTL2_WSR 0x01 +#define LSI_SCNTL2_VUE0 0x02 +#define LSI_SCNTL2_VUE1 0x04 +#define LSI_SCNTL2_WSS 0x08 +#define LSI_SCNTL2_SLPHBEN 0x10 +#define LSI_SCNTL2_SLPMD 0x20 +#define LSI_SCNTL2_CHM 0x40 +#define LSI_SCNTL2_SDU 0x80 + +#define LSI_ISTAT0_DIP 0x01 +#define LSI_ISTAT0_SIP 0x02 +#define LSI_ISTAT0_INTF 0x04 +#define LSI_ISTAT0_CON 0x08 +#define LSI_ISTAT0_SEM 0x10 +#define LSI_ISTAT0_SIGP 0x20 +#define LSI_ISTAT0_SRST 0x40 +#define LSI_ISTAT0_ABRT 0x80 + +#define LSI_ISTAT1_SI 0x01 +#define LSI_ISTAT1_SRUN 0x02 +#define LSI_ISTAT1_FLSH 0x04 + +#define LSI_SSTAT0_SDP0 0x01 +#define LSI_SSTAT0_RST 0x02 +#define LSI_SSTAT0_WOA 0x04 +#define LSI_SSTAT0_LOA 0x08 +#define LSI_SSTAT0_AIP 0x10 +#define LSI_SSTAT0_OLF 0x20 +#define LSI_SSTAT0_ORF 0x40 +#define LSI_SSTAT0_ILF 0x80 + +#define LSI_SIST0_PAR 0x01 +#define LSI_SIST0_RST 0x02 +#define LSI_SIST0_UDC 0x04 +#define LSI_SIST0_SGE 0x08 +#define LSI_SIST0_RSL 0x10 +#define LSI_SIST0_SEL 0x20 +#define LSI_SIST0_CMP 0x40 +#define LSI_SIST0_MA 0x80 + +#define LSI_SIST1_HTH 0x01 +#define LSI_SIST1_GEN 0x02 +#define LSI_SIST1_STO 0x04 +#define LSI_SIST1_SBMC 0x10 + +#define LSI_SOCL_IO 0x01 +#define LSI_SOCL_CD 0x02 +#define LSI_SOCL_MSG 0x04 +#define LSI_SOCL_ATN 0x08 +#define LSI_SOCL_SEL 0x10 +#define LSI_SOCL_BSY 0x20 +#define LSI_SOCL_ACK 0x40 +#define LSI_SOCL_REQ 0x80 + +#define LSI_DSTAT_IID 0x01 +#define LSI_DSTAT_SIR 0x04 +#define LSI_DSTAT_SSI 0x08 +#define LSI_DSTAT_ABRT 0x10 +#define LSI_DSTAT_BF 0x20 +#define LSI_DSTAT_MDPE 0x40 +#define LSI_DSTAT_DFE 0x80 + +#define LSI_DCNTL_COM 0x01 +#define LSI_DCNTL_IRQD 0x02 +#define LSI_DCNTL_STD 0x04 +#define LSI_DCNTL_IRQM 0x08 +#define LSI_DCNTL_SSM 0x10 +#define LSI_DCNTL_PFEN 0x20 +#define LSI_DCNTL_PFF 0x40 +#define LSI_DCNTL_CLSE 0x80 + +#define LSI_DMODE_MAN 0x01 +#define LSI_DMODE_BOF 0x02 +#define LSI_DMODE_ERMP 0x04 +#define LSI_DMODE_ERL 0x08 +#define LSI_DMODE_DIOM 0x10 +#define LSI_DMODE_SIOM 0x20 + +#define LSI_CTEST2_DACK 0x01 +#define LSI_CTEST2_DREQ 0x02 +#define LSI_CTEST2_TEOP 0x04 +#define LSI_CTEST2_PCICIE 0x08 +#define LSI_CTEST2_CM 0x10 +#define LSI_CTEST2_CIO 0x20 +#define LSI_CTEST2_SIGP 0x40 +#define LSI_CTEST2_DDIR 0x80 + +#define LSI_CTEST5_BL2 0x04 +#define LSI_CTEST5_DDIR 0x08 +#define LSI_CTEST5_MASR 0x10 +#define LSI_CTEST5_DFSN 0x20 +#define LSI_CTEST5_BBCK 0x40 +#define LSI_CTEST5_ADCK 0x80 + +#define LSI_CCNTL0_DILS 0x01 +#define LSI_CCNTL0_DISFC 0x10 +#define LSI_CCNTL0_ENNDJ 0x20 +#define LSI_CCNTL0_PMJCTL 0x40 +#define LSI_CCNTL0_ENPMJ 0x80 + +#define LSI_CCNTL1_EN64DBMV 0x01 +#define LSI_CCNTL1_EN64TIBMV 0x02 +#define LSI_CCNTL1_64TIMOD 0x04 +#define LSI_CCNTL1_DDAC 0x08 +#define LSI_CCNTL1_ZMOD 0x80 + +/* Enable Response to Reselection */ +#define LSI_SCID_RRE 0x60 + +#define LSI_CCNTL1_40BIT (LSI_CCNTL1_EN64TIBMV|LSI_CCNTL1_64TIMOD) + +#define PHASE_DO 0 +#define PHASE_DI 1 +#define PHASE_CMD 2 +#define PHASE_ST 3 +#define PHASE_MO 6 +#define PHASE_MI 7 +#define PHASE_MASK 7 + +/* Maximum length of MSG IN data. */ +#define LSI_MAX_MSGIN_LEN 8 + +/* Flag set if this is a tagged command. */ +#define LSI_TAG_VALID (1 << 16) + +typedef struct lsi_request { + SCSIRequest *req; + uint32_t tag; + uint32_t dma_len; + uint8_t *dma_buf; + uint32_t pending; + int out; + QTAILQ_ENTRY(lsi_request) next; +} lsi_request; + +typedef struct { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + MemoryRegion mmio_io; + MemoryRegion ram_io; + MemoryRegion io_io; + + int carry; /* ??? Should this be an a visible register somewhere? */ + int status; + /* Action to take at the end of a MSG IN phase. + 0 = COMMAND, 1 = disconnect, 2 = DATA OUT, 3 = DATA IN. */ + int msg_action; + int msg_len; + uint8_t msg[LSI_MAX_MSGIN_LEN]; + /* 0 if SCRIPTS are running or stopped. + * 1 if a Wait Reselect instruction has been issued. + * 2 if processing DMA from lsi_execute_script. + * 3 if a DMA operation is in progress. */ + int waiting; + SCSIBus bus; + int current_lun; + /* The tag is a combination of the device ID and the SCSI tag. */ + uint32_t select_tag; + int command_complete; + QTAILQ_HEAD(, lsi_request) queue; + lsi_request *current; + + uint32_t dsa; + uint32_t temp; + uint32_t dnad; + uint32_t dbc; + uint8_t istat0; + uint8_t istat1; + uint8_t dcmd; + uint8_t dstat; + uint8_t dien; + uint8_t sist0; + uint8_t sist1; + uint8_t sien0; + uint8_t sien1; + uint8_t mbox0; + uint8_t mbox1; + uint8_t dfifo; + uint8_t ctest2; + uint8_t ctest3; + uint8_t ctest4; + uint8_t ctest5; + uint8_t ccntl0; + uint8_t ccntl1; + uint32_t dsp; + uint32_t dsps; + uint8_t dmode; + uint8_t dcntl; + uint8_t scntl0; + uint8_t scntl1; + uint8_t scntl2; + uint8_t scntl3; + uint8_t sstat0; + uint8_t sstat1; + uint8_t scid; + uint8_t sxfer; + uint8_t socl; + uint8_t sdid; + uint8_t ssid; + uint8_t sfbr; + uint8_t stest1; + uint8_t stest2; + uint8_t stest3; + uint8_t sidl; + uint8_t stime0; + uint8_t respid0; + uint8_t respid1; + uint32_t mmrs; + uint32_t mmws; + uint32_t sfs; + uint32_t drs; + uint32_t sbms; + uint32_t dbms; + uint32_t dnad64; + uint32_t pmjad1; + uint32_t pmjad2; + uint32_t rbc; + uint32_t ua; + uint32_t ia; + uint32_t sbc; + uint32_t csbc; + uint32_t scratch[18]; /* SCRATCHA-SCRATCHR */ + uint8_t sbr; + uint32_t adder; + + /* Script ram is stored as 32-bit words in host byteorder. */ + uint32_t script_ram[2048]; +} LSIState; + +#define TYPE_LSI53C810 "lsi53c810" +#define TYPE_LSI53C895A "lsi53c895a" + +#define LSI53C895A(obj) \ + OBJECT_CHECK(LSIState, (obj), TYPE_LSI53C895A) + +static inline int lsi_irq_on_rsl(LSIState *s) +{ + return (s->sien0 & LSI_SIST0_RSL) && (s->scid & LSI_SCID_RRE); +} + +static void lsi_soft_reset(LSIState *s) +{ + DPRINTF("Reset\n"); + s->carry = 0; + + s->msg_action = 0; + s->msg_len = 0; + s->waiting = 0; + s->dsa = 0; + s->dnad = 0; + s->dbc = 0; + s->temp = 0; + memset(s->scratch, 0, sizeof(s->scratch)); + s->istat0 = 0; + s->istat1 = 0; + s->dcmd = 0x40; + s->dstat = LSI_DSTAT_DFE; + s->dien = 0; + s->sist0 = 0; + s->sist1 = 0; + s->sien0 = 0; + s->sien1 = 0; + s->mbox0 = 0; + s->mbox1 = 0; + s->dfifo = 0; + s->ctest2 = LSI_CTEST2_DACK; + s->ctest3 = 0; + s->ctest4 = 0; + s->ctest5 = 0; + s->ccntl0 = 0; + s->ccntl1 = 0; + s->dsp = 0; + s->dsps = 0; + s->dmode = 0; + s->dcntl = 0; + s->scntl0 = 0xc0; + s->scntl1 = 0; + s->scntl2 = 0; + s->scntl3 = 0; + s->sstat0 = 0; + s->sstat1 = 0; + s->scid = 7; + s->sxfer = 0; + s->socl = 0; + s->sdid = 0; + s->ssid = 0; + s->stest1 = 0; + s->stest2 = 0; + s->stest3 = 0; + s->sidl = 0; + s->stime0 = 0; + s->respid0 = 0x80; + s->respid1 = 0; + s->mmrs = 0; + s->mmws = 0; + s->sfs = 0; + s->drs = 0; + s->sbms = 0; + s->dbms = 0; + s->dnad64 = 0; + s->pmjad1 = 0; + s->pmjad2 = 0; + s->rbc = 0; + s->ua = 0; + s->ia = 0; + s->sbc = 0; + s->csbc = 0; + s->sbr = 0; + assert(QTAILQ_EMPTY(&s->queue)); + assert(!s->current); +} + +static int lsi_dma_40bit(LSIState *s) +{ + if ((s->ccntl1 & LSI_CCNTL1_40BIT) == LSI_CCNTL1_40BIT) + return 1; + return 0; +} + +static int lsi_dma_ti64bit(LSIState *s) +{ + if ((s->ccntl1 & LSI_CCNTL1_EN64TIBMV) == LSI_CCNTL1_EN64TIBMV) + return 1; + return 0; +} + +static int lsi_dma_64bit(LSIState *s) +{ + if ((s->ccntl1 & LSI_CCNTL1_EN64DBMV) == LSI_CCNTL1_EN64DBMV) + return 1; + return 0; +} + +static uint8_t lsi_reg_readb(LSIState *s, int offset); +static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val); +static void lsi_execute_script(LSIState *s); +static void lsi_reselect(LSIState *s, lsi_request *p); + +static inline uint32_t read_dword(LSIState *s, uint32_t addr) +{ + uint32_t buf; + + pci_dma_read(PCI_DEVICE(s), addr, &buf, 4); + return cpu_to_le32(buf); +} + +static void lsi_stop_script(LSIState *s) +{ + s->istat1 &= ~LSI_ISTAT1_SRUN; +} + +static void lsi_update_irq(LSIState *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int level; + static int last_level; + lsi_request *p; + + /* It's unclear whether the DIP/SIP bits should be cleared when the + Interrupt Status Registers are cleared or when istat0 is read. + We currently do the formwer, which seems to work. */ + level = 0; + if (s->dstat) { + if (s->dstat & s->dien) + level = 1; + s->istat0 |= LSI_ISTAT0_DIP; + } else { + s->istat0 &= ~LSI_ISTAT0_DIP; + } + + if (s->sist0 || s->sist1) { + if ((s->sist0 & s->sien0) || (s->sist1 & s->sien1)) + level = 1; + s->istat0 |= LSI_ISTAT0_SIP; + } else { + s->istat0 &= ~LSI_ISTAT0_SIP; + } + if (s->istat0 & LSI_ISTAT0_INTF) + level = 1; + + if (level != last_level) { + DPRINTF("Update IRQ level %d dstat %02x sist %02x%02x\n", + level, s->dstat, s->sist1, s->sist0); + last_level = level; + } + pci_set_irq(d, level); + + if (!level && lsi_irq_on_rsl(s) && !(s->scntl1 & LSI_SCNTL1_CON)) { + DPRINTF("Handled IRQs & disconnected, looking for pending " + "processes\n"); + QTAILQ_FOREACH(p, &s->queue, next) { + if (p->pending) { + lsi_reselect(s, p); + break; + } + } + } +} + +/* Stop SCRIPTS execution and raise a SCSI interrupt. */ +static void lsi_script_scsi_interrupt(LSIState *s, int stat0, int stat1) +{ + uint32_t mask0; + uint32_t mask1; + + DPRINTF("SCSI Interrupt 0x%02x%02x prev 0x%02x%02x\n", + stat1, stat0, s->sist1, s->sist0); + s->sist0 |= stat0; + s->sist1 |= stat1; + /* Stop processor on fatal or unmasked interrupt. As a special hack + we don't stop processing when raising STO. Instead continue + execution and stop at the next insn that accesses the SCSI bus. */ + mask0 = s->sien0 | ~(LSI_SIST0_CMP | LSI_SIST0_SEL | LSI_SIST0_RSL); + mask1 = s->sien1 | ~(LSI_SIST1_GEN | LSI_SIST1_HTH); + mask1 &= ~LSI_SIST1_STO; + if (s->sist0 & mask0 || s->sist1 & mask1) { + lsi_stop_script(s); + } + lsi_update_irq(s); +} + +/* Stop SCRIPTS execution and raise a DMA interrupt. */ +static void lsi_script_dma_interrupt(LSIState *s, int stat) +{ + DPRINTF("DMA Interrupt 0x%x prev 0x%x\n", stat, s->dstat); + s->dstat |= stat; + lsi_update_irq(s); + lsi_stop_script(s); +} + +static inline void lsi_set_phase(LSIState *s, int phase) +{ + s->sstat1 = (s->sstat1 & ~PHASE_MASK) | phase; +} + +static void lsi_bad_phase(LSIState *s, int out, int new_phase) +{ + /* Trigger a phase mismatch. */ + if (s->ccntl0 & LSI_CCNTL0_ENPMJ) { + if ((s->ccntl0 & LSI_CCNTL0_PMJCTL)) { + s->dsp = out ? s->pmjad1 : s->pmjad2; + } else { + s->dsp = (s->scntl2 & LSI_SCNTL2_WSR ? s->pmjad2 : s->pmjad1); + } + DPRINTF("Data phase mismatch jump to %08x\n", s->dsp); + } else { + DPRINTF("Phase mismatch interrupt\n"); + lsi_script_scsi_interrupt(s, LSI_SIST0_MA, 0); + lsi_stop_script(s); + } + lsi_set_phase(s, new_phase); +} + + +/* Resume SCRIPTS execution after a DMA operation. */ +static void lsi_resume_script(LSIState *s) +{ + if (s->waiting != 2) { + s->waiting = 0; + lsi_execute_script(s); + } else { + s->waiting = 0; + } +} + +static void lsi_disconnect(LSIState *s) +{ + s->scntl1 &= ~LSI_SCNTL1_CON; + s->sstat1 &= ~PHASE_MASK; +} + +static void lsi_bad_selection(LSIState *s, uint32_t id) +{ + DPRINTF("Selected absent target %d\n", id); + lsi_script_scsi_interrupt(s, 0, LSI_SIST1_STO); + lsi_disconnect(s); +} + +/* Initiate a SCSI layer data transfer. */ +static void lsi_do_dma(LSIState *s, int out) +{ + PCIDevice *pci_dev; + uint32_t count; + dma_addr_t addr; + SCSIDevice *dev; + + assert(s->current); + if (!s->current->dma_len) { + /* Wait until data is available. */ + DPRINTF("DMA no data available\n"); + return; + } + + pci_dev = PCI_DEVICE(s); + dev = s->current->req->dev; + assert(dev); + + count = s->dbc; + if (count > s->current->dma_len) + count = s->current->dma_len; + + addr = s->dnad; + /* both 40 and Table Indirect 64-bit DMAs store upper bits in dnad64 */ + if (lsi_dma_40bit(s) || lsi_dma_ti64bit(s)) + addr |= ((uint64_t)s->dnad64 << 32); + else if (s->dbms) + addr |= ((uint64_t)s->dbms << 32); + else if (s->sbms) + addr |= ((uint64_t)s->sbms << 32); + + DPRINTF("DMA addr=0x" DMA_ADDR_FMT " len=%d\n", addr, count); + s->csbc += count; + s->dnad += count; + s->dbc -= count; + if (s->current->dma_buf == NULL) { + s->current->dma_buf = scsi_req_get_buf(s->current->req); + } + /* ??? Set SFBR to first data byte. */ + if (out) { + pci_dma_read(pci_dev, addr, s->current->dma_buf, count); + } else { + pci_dma_write(pci_dev, addr, s->current->dma_buf, count); + } + s->current->dma_len -= count; + if (s->current->dma_len == 0) { + s->current->dma_buf = NULL; + scsi_req_continue(s->current->req); + } else { + s->current->dma_buf += count; + lsi_resume_script(s); + } +} + + +/* Add a command to the queue. */ +static void lsi_queue_command(LSIState *s) +{ + lsi_request *p = s->current; + + DPRINTF("Queueing tag=0x%x\n", p->tag); + assert(s->current != NULL); + assert(s->current->dma_len == 0); + QTAILQ_INSERT_TAIL(&s->queue, s->current, next); + s->current = NULL; + + p->pending = 0; + p->out = (s->sstat1 & PHASE_MASK) == PHASE_DO; +} + +/* Queue a byte for a MSG IN phase. */ +static void lsi_add_msg_byte(LSIState *s, uint8_t data) +{ + if (s->msg_len >= LSI_MAX_MSGIN_LEN) { + BADF("MSG IN data too long\n"); + } else { + DPRINTF("MSG IN 0x%02x\n", data); + s->msg[s->msg_len++] = data; + } +} + +/* Perform reselection to continue a command. */ +static void lsi_reselect(LSIState *s, lsi_request *p) +{ + int id; + + assert(s->current == NULL); + QTAILQ_REMOVE(&s->queue, p, next); + s->current = p; + + id = (p->tag >> 8) & 0xf; + s->ssid = id | 0x80; + /* LSI53C700 Family Compatibility, see LSI53C895A 4-73 */ + if (!(s->dcntl & LSI_DCNTL_COM)) { + s->sfbr = 1 << (id & 0x7); + } + DPRINTF("Reselected target %d\n", id); + s->scntl1 |= LSI_SCNTL1_CON; + lsi_set_phase(s, PHASE_MI); + s->msg_action = p->out ? 2 : 3; + s->current->dma_len = p->pending; + lsi_add_msg_byte(s, 0x80); + if (s->current->tag & LSI_TAG_VALID) { + lsi_add_msg_byte(s, 0x20); + lsi_add_msg_byte(s, p->tag & 0xff); + } + + if (lsi_irq_on_rsl(s)) { + lsi_script_scsi_interrupt(s, LSI_SIST0_RSL, 0); + } +} + +static lsi_request *lsi_find_by_tag(LSIState *s, uint32_t tag) +{ + lsi_request *p; + + QTAILQ_FOREACH(p, &s->queue, next) { + if (p->tag == tag) { + return p; + } + } + + return NULL; +} + +static void lsi_request_free(LSIState *s, lsi_request *p) +{ + if (p == s->current) { + s->current = NULL; + } else { + QTAILQ_REMOVE(&s->queue, p, next); + } + g_free(p); +} + +static void lsi_request_cancelled(SCSIRequest *req) +{ + LSIState *s = LSI53C895A(req->bus->qbus.parent); + lsi_request *p = req->hba_private; + + req->hba_private = NULL; + lsi_request_free(s, p); + scsi_req_unref(req); +} + +/* Record that data is available for a queued command. Returns zero if + the device was reselected, nonzero if the IO is deferred. */ +static int lsi_queue_req(LSIState *s, SCSIRequest *req, uint32_t len) +{ + lsi_request *p = req->hba_private; + + if (p->pending) { + BADF("Multiple IO pending for request %p\n", p); + } + p->pending = len; + /* Reselect if waiting for it, or if reselection triggers an IRQ + and the bus is free. + Since no interrupt stacking is implemented in the emulation, it + is also required that there are no pending interrupts waiting + for service from the device driver. */ + if (s->waiting == 1 || + (lsi_irq_on_rsl(s) && !(s->scntl1 & LSI_SCNTL1_CON) && + !(s->istat0 & (LSI_ISTAT0_SIP | LSI_ISTAT0_DIP)))) { + /* Reselect device. */ + lsi_reselect(s, p); + return 0; + } else { + DPRINTF("Queueing IO tag=0x%x\n", p->tag); + p->pending = len; + return 1; + } +} + + /* Callback to indicate that the SCSI layer has completed a command. */ +static void lsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid) +{ + LSIState *s = LSI53C895A(req->bus->qbus.parent); + int out; + + out = (s->sstat1 & PHASE_MASK) == PHASE_DO; + DPRINTF("Command complete status=%d\n", (int)status); + s->status = status; + s->command_complete = 2; + if (s->waiting && s->dbc != 0) { + /* Raise phase mismatch for short transfers. */ + lsi_bad_phase(s, out, PHASE_ST); + } else { + lsi_set_phase(s, PHASE_ST); + } + + if (req->hba_private == s->current) { + req->hba_private = NULL; + lsi_request_free(s, s->current); + scsi_req_unref(req); + } + lsi_resume_script(s); +} + + /* Callback to indicate that the SCSI layer has completed a transfer. */ +static void lsi_transfer_data(SCSIRequest *req, uint32_t len) +{ + LSIState *s = LSI53C895A(req->bus->qbus.parent); + int out; + + assert(req->hba_private); + if (s->waiting == 1 || req->hba_private != s->current || + (lsi_irq_on_rsl(s) && !(s->scntl1 & LSI_SCNTL1_CON))) { + if (lsi_queue_req(s, req, len)) { + return; + } + } + + out = (s->sstat1 & PHASE_MASK) == PHASE_DO; + + /* host adapter (re)connected */ + DPRINTF("Data ready tag=0x%x len=%d\n", req->tag, len); + s->current->dma_len = len; + s->command_complete = 1; + if (s->waiting) { + if (s->waiting == 1 || s->dbc == 0) { + lsi_resume_script(s); + } else { + lsi_do_dma(s, out); + } + } +} + +static void lsi_do_command(LSIState *s) +{ + SCSIDevice *dev; + uint8_t buf[16]; + uint32_t id; + int n; + + DPRINTF("Send command len=%d\n", s->dbc); + if (s->dbc > 16) + s->dbc = 16; + pci_dma_read(PCI_DEVICE(s), s->dnad, buf, s->dbc); + s->sfbr = buf[0]; + s->command_complete = 0; + + id = (s->select_tag >> 8) & 0xf; + dev = scsi_device_find(&s->bus, 0, id, s->current_lun); + if (!dev) { + lsi_bad_selection(s, id); + return; + } + + assert(s->current == NULL); + s->current = g_new0(lsi_request, 1); + s->current->tag = s->select_tag; + s->current->req = scsi_req_new(dev, s->current->tag, s->current_lun, buf, + s->current); + + n = scsi_req_enqueue(s->current->req); + if (n) { + if (n > 0) { + lsi_set_phase(s, PHASE_DI); + } else if (n < 0) { + lsi_set_phase(s, PHASE_DO); + } + scsi_req_continue(s->current->req); + } + if (!s->command_complete) { + if (n) { + /* Command did not complete immediately so disconnect. */ + lsi_add_msg_byte(s, 2); /* SAVE DATA POINTER */ + lsi_add_msg_byte(s, 4); /* DISCONNECT */ + /* wait data */ + lsi_set_phase(s, PHASE_MI); + s->msg_action = 1; + lsi_queue_command(s); + } else { + /* wait command complete */ + lsi_set_phase(s, PHASE_DI); + } + } +} + +static void lsi_do_status(LSIState *s) +{ + uint8_t status; + DPRINTF("Get status len=%d status=%d\n", s->dbc, s->status); + if (s->dbc != 1) + BADF("Bad Status move\n"); + s->dbc = 1; + status = s->status; + s->sfbr = status; + pci_dma_write(PCI_DEVICE(s), s->dnad, &status, 1); + lsi_set_phase(s, PHASE_MI); + s->msg_action = 1; + lsi_add_msg_byte(s, 0); /* COMMAND COMPLETE */ +} + +static void lsi_do_msgin(LSIState *s) +{ + int len; + DPRINTF("Message in len=%d/%d\n", s->dbc, s->msg_len); + s->sfbr = s->msg[0]; + len = s->msg_len; + if (len > s->dbc) + len = s->dbc; + pci_dma_write(PCI_DEVICE(s), s->dnad, s->msg, len); + /* Linux drivers rely on the last byte being in the SIDL. */ + s->sidl = s->msg[len - 1]; + s->msg_len -= len; + if (s->msg_len) { + memmove(s->msg, s->msg + len, s->msg_len); + } else { + /* ??? Check if ATN (not yet implemented) is asserted and maybe + switch to PHASE_MO. */ + switch (s->msg_action) { + case 0: + lsi_set_phase(s, PHASE_CMD); + break; + case 1: + lsi_disconnect(s); + break; + case 2: + lsi_set_phase(s, PHASE_DO); + break; + case 3: + lsi_set_phase(s, PHASE_DI); + break; + default: + abort(); + } + } +} + +/* Read the next byte during a MSGOUT phase. */ +static uint8_t lsi_get_msgbyte(LSIState *s) +{ + uint8_t data; + pci_dma_read(PCI_DEVICE(s), s->dnad, &data, 1); + s->dnad++; + s->dbc--; + return data; +} + +/* Skip the next n bytes during a MSGOUT phase. */ +static void lsi_skip_msgbytes(LSIState *s, unsigned int n) +{ + s->dnad += n; + s->dbc -= n; +} + +static void lsi_do_msgout(LSIState *s) +{ + uint8_t msg; + int len; + uint32_t current_tag; + lsi_request *current_req, *p, *p_next; + + if (s->current) { + current_tag = s->current->tag; + current_req = s->current; + } else { + current_tag = s->select_tag; + current_req = lsi_find_by_tag(s, current_tag); + } + + DPRINTF("MSG out len=%d\n", s->dbc); + while (s->dbc) { + msg = lsi_get_msgbyte(s); + s->sfbr = msg; + + switch (msg) { + case 0x04: + DPRINTF("MSG: Disconnect\n"); + lsi_disconnect(s); + break; + case 0x08: + DPRINTF("MSG: No Operation\n"); + lsi_set_phase(s, PHASE_CMD); + break; + case 0x01: + len = lsi_get_msgbyte(s); + msg = lsi_get_msgbyte(s); + (void)len; /* avoid a warning about unused variable*/ + DPRINTF("Extended message 0x%x (len %d)\n", msg, len); + switch (msg) { + case 1: + DPRINTF("SDTR (ignored)\n"); + lsi_skip_msgbytes(s, 2); + break; + case 3: + DPRINTF("WDTR (ignored)\n"); + lsi_skip_msgbytes(s, 1); + break; + default: + goto bad; + } + break; + case 0x20: /* SIMPLE queue */ + s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID; + DPRINTF("SIMPLE queue tag=0x%x\n", s->select_tag & 0xff); + break; + case 0x21: /* HEAD of queue */ + BADF("HEAD queue not implemented\n"); + s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID; + break; + case 0x22: /* ORDERED queue */ + BADF("ORDERED queue not implemented\n"); + s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID; + break; + case 0x0d: + /* The ABORT TAG message clears the current I/O process only. */ + DPRINTF("MSG: ABORT TAG tag=0x%x\n", current_tag); + if (current_req) { + scsi_req_cancel(current_req->req); + } + lsi_disconnect(s); + break; + case 0x06: + case 0x0e: + case 0x0c: + /* The ABORT message clears all I/O processes for the selecting + initiator on the specified logical unit of the target. */ + if (msg == 0x06) { + DPRINTF("MSG: ABORT tag=0x%x\n", current_tag); + } + /* The CLEAR QUEUE message clears all I/O processes for all + initiators on the specified logical unit of the target. */ + if (msg == 0x0e) { + DPRINTF("MSG: CLEAR QUEUE tag=0x%x\n", current_tag); + } + /* The BUS DEVICE RESET message clears all I/O processes for all + initiators on all logical units of the target. */ + if (msg == 0x0c) { + DPRINTF("MSG: BUS DEVICE RESET tag=0x%x\n", current_tag); + } + + /* clear the current I/O process */ + if (s->current) { + scsi_req_cancel(s->current->req); + } + + /* As the current implemented devices scsi_disk and scsi_generic + only support one LUN, we don't need to keep track of LUNs. + Clearing I/O processes for other initiators could be possible + for scsi_generic by sending a SG_SCSI_RESET to the /dev/sgX + device, but this is currently not implemented (and seems not + to be really necessary). So let's simply clear all queued + commands for the current device: */ + QTAILQ_FOREACH_SAFE(p, &s->queue, next, p_next) { + if ((p->tag & 0x0000ff00) == (current_tag & 0x0000ff00)) { + scsi_req_cancel(p->req); + } + } + + lsi_disconnect(s); + break; + default: + if ((msg & 0x80) == 0) { + goto bad; + } + s->current_lun = msg & 7; + DPRINTF("Select LUN %d\n", s->current_lun); + lsi_set_phase(s, PHASE_CMD); + break; + } + } + return; +bad: + BADF("Unimplemented message 0x%02x\n", msg); + lsi_set_phase(s, PHASE_MI); + lsi_add_msg_byte(s, 7); /* MESSAGE REJECT */ + s->msg_action = 0; +} + +#define LSI_BUF_SIZE 4096 +static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count) +{ + PCIDevice *d = PCI_DEVICE(s); + int n; + uint8_t buf[LSI_BUF_SIZE]; + + DPRINTF("memcpy dest 0x%08x src 0x%08x count %d\n", dest, src, count); + while (count) { + n = (count > LSI_BUF_SIZE) ? LSI_BUF_SIZE : count; + pci_dma_read(d, src, buf, n); + pci_dma_write(d, dest, buf, n); + src += n; + dest += n; + count -= n; + } +} + +static void lsi_wait_reselect(LSIState *s) +{ + lsi_request *p; + + DPRINTF("Wait Reselect\n"); + + QTAILQ_FOREACH(p, &s->queue, next) { + if (p->pending) { + lsi_reselect(s, p); + break; + } + } + if (s->current == NULL) { + s->waiting = 1; + } +} + +static void lsi_execute_script(LSIState *s) +{ + PCIDevice *pci_dev = PCI_DEVICE(s); + uint32_t insn; + uint32_t addr, addr_high; + int opcode; + int insn_processed = 0; + + s->istat1 |= LSI_ISTAT1_SRUN; +again: + insn_processed++; + insn = read_dword(s, s->dsp); + if (!insn) { + /* If we receive an empty opcode increment the DSP by 4 bytes + instead of 8 and execute the next opcode at that location */ + s->dsp += 4; + goto again; + } + addr = read_dword(s, s->dsp + 4); + addr_high = 0; + DPRINTF("SCRIPTS dsp=%08x opcode %08x arg %08x\n", s->dsp, insn, addr); + s->dsps = addr; + s->dcmd = insn >> 24; + s->dsp += 8; + switch (insn >> 30) { + case 0: /* Block move. */ + if (s->sist1 & LSI_SIST1_STO) { + DPRINTF("Delayed select timeout\n"); + lsi_stop_script(s); + break; + } + s->dbc = insn & 0xffffff; + s->rbc = s->dbc; + /* ??? Set ESA. */ + s->ia = s->dsp - 8; + if (insn & (1 << 29)) { + /* Indirect addressing. */ + addr = read_dword(s, addr); + } else if (insn & (1 << 28)) { + uint32_t buf[2]; + int32_t offset; + /* Table indirect addressing. */ + + /* 32-bit Table indirect */ + offset = sextract32(addr, 0, 24); + pci_dma_read(pci_dev, s->dsa + offset, buf, 8); + /* byte count is stored in bits 0:23 only */ + s->dbc = cpu_to_le32(buf[0]) & 0xffffff; + s->rbc = s->dbc; + addr = cpu_to_le32(buf[1]); + + /* 40-bit DMA, upper addr bits [39:32] stored in first DWORD of + * table, bits [31:24] */ + if (lsi_dma_40bit(s)) + addr_high = cpu_to_le32(buf[0]) >> 24; + else if (lsi_dma_ti64bit(s)) { + int selector = (cpu_to_le32(buf[0]) >> 24) & 0x1f; + switch (selector) { + case 0 ... 0x0f: + /* offset index into scratch registers since + * TI64 mode can use registers C to R */ + addr_high = s->scratch[2 + selector]; + break; + case 0x10: + addr_high = s->mmrs; + break; + case 0x11: + addr_high = s->mmws; + break; + case 0x12: + addr_high = s->sfs; + break; + case 0x13: + addr_high = s->drs; + break; + case 0x14: + addr_high = s->sbms; + break; + case 0x15: + addr_high = s->dbms; + break; + default: + BADF("Illegal selector specified (0x%x > 0x15)" + " for 64-bit DMA block move", selector); + break; + } + } + } else if (lsi_dma_64bit(s)) { + /* fetch a 3rd dword if 64-bit direct move is enabled and + only if we're not doing table indirect or indirect addressing */ + s->dbms = read_dword(s, s->dsp); + s->dsp += 4; + s->ia = s->dsp - 12; + } + if ((s->sstat1 & PHASE_MASK) != ((insn >> 24) & 7)) { + DPRINTF("Wrong phase got %d expected %d\n", + s->sstat1 & PHASE_MASK, (insn >> 24) & 7); + lsi_script_scsi_interrupt(s, LSI_SIST0_MA, 0); + break; + } + s->dnad = addr; + s->dnad64 = addr_high; + switch (s->sstat1 & 0x7) { + case PHASE_DO: + s->waiting = 2; + lsi_do_dma(s, 1); + if (s->waiting) + s->waiting = 3; + break; + case PHASE_DI: + s->waiting = 2; + lsi_do_dma(s, 0); + if (s->waiting) + s->waiting = 3; + break; + case PHASE_CMD: + lsi_do_command(s); + break; + case PHASE_ST: + lsi_do_status(s); + break; + case PHASE_MO: + lsi_do_msgout(s); + break; + case PHASE_MI: + lsi_do_msgin(s); + break; + default: + BADF("Unimplemented phase %d\n", s->sstat1 & PHASE_MASK); + exit(1); + } + s->dfifo = s->dbc & 0xff; + s->ctest5 = (s->ctest5 & 0xfc) | ((s->dbc >> 8) & 3); + s->sbc = s->dbc; + s->rbc -= s->dbc; + s->ua = addr + s->dbc; + break; + + case 1: /* IO or Read/Write instruction. */ + opcode = (insn >> 27) & 7; + if (opcode < 5) { + uint32_t id; + + if (insn & (1 << 25)) { + id = read_dword(s, s->dsa + sextract32(insn, 0, 24)); + } else { + id = insn; + } + id = (id >> 16) & 0xf; + if (insn & (1 << 26)) { + addr = s->dsp + sextract32(addr, 0, 24); + } + s->dnad = addr; + switch (opcode) { + case 0: /* Select */ + s->sdid = id; + if (s->scntl1 & LSI_SCNTL1_CON) { + DPRINTF("Already reselected, jumping to alternative address\n"); + s->dsp = s->dnad; + break; + } + s->sstat0 |= LSI_SSTAT0_WOA; + s->scntl1 &= ~LSI_SCNTL1_IARB; + if (!scsi_device_find(&s->bus, 0, id, 0)) { + lsi_bad_selection(s, id); + break; + } + DPRINTF("Selected target %d%s\n", + id, insn & (1 << 3) ? " ATN" : ""); + /* ??? Linux drivers compain when this is set. Maybe + it only applies in low-level mode (unimplemented). + lsi_script_scsi_interrupt(s, LSI_SIST0_CMP, 0); */ + s->select_tag = id << 8; + s->scntl1 |= LSI_SCNTL1_CON; + if (insn & (1 << 3)) { + s->socl |= LSI_SOCL_ATN; + } + lsi_set_phase(s, PHASE_MO); + break; + case 1: /* Disconnect */ + DPRINTF("Wait Disconnect\n"); + s->scntl1 &= ~LSI_SCNTL1_CON; + break; + case 2: /* Wait Reselect */ + if (!lsi_irq_on_rsl(s)) { + lsi_wait_reselect(s); + } + break; + case 3: /* Set */ + DPRINTF("Set%s%s%s%s\n", + insn & (1 << 3) ? " ATN" : "", + insn & (1 << 6) ? " ACK" : "", + insn & (1 << 9) ? " TM" : "", + insn & (1 << 10) ? " CC" : ""); + if (insn & (1 << 3)) { + s->socl |= LSI_SOCL_ATN; + lsi_set_phase(s, PHASE_MO); + } + if (insn & (1 << 9)) { + BADF("Target mode not implemented\n"); + exit(1); + } + if (insn & (1 << 10)) + s->carry = 1; + break; + case 4: /* Clear */ + DPRINTF("Clear%s%s%s%s\n", + insn & (1 << 3) ? " ATN" : "", + insn & (1 << 6) ? " ACK" : "", + insn & (1 << 9) ? " TM" : "", + insn & (1 << 10) ? " CC" : ""); + if (insn & (1 << 3)) { + s->socl &= ~LSI_SOCL_ATN; + } + if (insn & (1 << 10)) + s->carry = 0; + break; + } + } else { + uint8_t op0; + uint8_t op1; + uint8_t data8; + int reg; + int operator; +#ifdef DEBUG_LSI + static const char *opcode_names[3] = + {"Write", "Read", "Read-Modify-Write"}; + static const char *operator_names[8] = + {"MOV", "SHL", "OR", "XOR", "AND", "SHR", "ADD", "ADC"}; +#endif + + reg = ((insn >> 16) & 0x7f) | (insn & 0x80); + data8 = (insn >> 8) & 0xff; + opcode = (insn >> 27) & 7; + operator = (insn >> 24) & 7; + DPRINTF("%s reg 0x%x %s data8=0x%02x sfbr=0x%02x%s\n", + opcode_names[opcode - 5], reg, + operator_names[operator], data8, s->sfbr, + (insn & (1 << 23)) ? " SFBR" : ""); + op0 = op1 = 0; + switch (opcode) { + case 5: /* From SFBR */ + op0 = s->sfbr; + op1 = data8; + break; + case 6: /* To SFBR */ + if (operator) + op0 = lsi_reg_readb(s, reg); + op1 = data8; + break; + case 7: /* Read-modify-write */ + if (operator) + op0 = lsi_reg_readb(s, reg); + if (insn & (1 << 23)) { + op1 = s->sfbr; + } else { + op1 = data8; + } + break; + } + + switch (operator) { + case 0: /* move */ + op0 = op1; + break; + case 1: /* Shift left */ + op1 = op0 >> 7; + op0 = (op0 << 1) | s->carry; + s->carry = op1; + break; + case 2: /* OR */ + op0 |= op1; + break; + case 3: /* XOR */ + op0 ^= op1; + break; + case 4: /* AND */ + op0 &= op1; + break; + case 5: /* SHR */ + op1 = op0 & 1; + op0 = (op0 >> 1) | (s->carry << 7); + s->carry = op1; + break; + case 6: /* ADD */ + op0 += op1; + s->carry = op0 < op1; + break; + case 7: /* ADC */ + op0 += op1 + s->carry; + if (s->carry) + s->carry = op0 <= op1; + else + s->carry = op0 < op1; + break; + } + + switch (opcode) { + case 5: /* From SFBR */ + case 7: /* Read-modify-write */ + lsi_reg_writeb(s, reg, op0); + break; + case 6: /* To SFBR */ + s->sfbr = op0; + break; + } + } + break; + + case 2: /* Transfer Control. */ + { + int cond; + int jmp; + + if ((insn & 0x002e0000) == 0) { + DPRINTF("NOP\n"); + break; + } + if (s->sist1 & LSI_SIST1_STO) { + DPRINTF("Delayed select timeout\n"); + lsi_stop_script(s); + break; + } + cond = jmp = (insn & (1 << 19)) != 0; + if (cond == jmp && (insn & (1 << 21))) { + DPRINTF("Compare carry %d\n", s->carry == jmp); + cond = s->carry != 0; + } + if (cond == jmp && (insn & (1 << 17))) { + DPRINTF("Compare phase %d %c= %d\n", + (s->sstat1 & PHASE_MASK), + jmp ? '=' : '!', + ((insn >> 24) & 7)); + cond = (s->sstat1 & PHASE_MASK) == ((insn >> 24) & 7); + } + if (cond == jmp && (insn & (1 << 18))) { + uint8_t mask; + + mask = (~insn >> 8) & 0xff; + DPRINTF("Compare data 0x%x & 0x%x %c= 0x%x\n", + s->sfbr, mask, jmp ? '=' : '!', insn & mask); + cond = (s->sfbr & mask) == (insn & mask); + } + if (cond == jmp) { + if (insn & (1 << 23)) { + /* Relative address. */ + addr = s->dsp + sextract32(addr, 0, 24); + } + switch ((insn >> 27) & 7) { + case 0: /* Jump */ + DPRINTF("Jump to 0x%08x\n", addr); + s->adder = addr; + s->dsp = addr; + break; + case 1: /* Call */ + DPRINTF("Call 0x%08x\n", addr); + s->temp = s->dsp; + s->dsp = addr; + break; + case 2: /* Return */ + DPRINTF("Return to 0x%08x\n", s->temp); + s->dsp = s->temp; + break; + case 3: /* Interrupt */ + DPRINTF("Interrupt 0x%08x\n", s->dsps); + if ((insn & (1 << 20)) != 0) { + s->istat0 |= LSI_ISTAT0_INTF; + lsi_update_irq(s); + } else { + lsi_script_dma_interrupt(s, LSI_DSTAT_SIR); + } + break; + default: + DPRINTF("Illegal transfer control\n"); + lsi_script_dma_interrupt(s, LSI_DSTAT_IID); + break; + } + } else { + DPRINTF("Control condition failed\n"); + } + } + break; + + case 3: + if ((insn & (1 << 29)) == 0) { + /* Memory move. */ + uint32_t dest; + /* ??? The docs imply the destination address is loaded into + the TEMP register. However the Linux drivers rely on + the value being presrved. */ + dest = read_dword(s, s->dsp); + s->dsp += 4; + lsi_memcpy(s, dest, addr, insn & 0xffffff); + } else { + uint8_t data[7]; + int reg; + int n; + int i; + + if (insn & (1 << 28)) { + addr = s->dsa + sextract32(addr, 0, 24); + } + n = (insn & 7); + reg = (insn >> 16) & 0xff; + if (insn & (1 << 24)) { + pci_dma_read(pci_dev, addr, data, n); + DPRINTF("Load reg 0x%x size %d addr 0x%08x = %08x\n", reg, n, + addr, *(int *)data); + for (i = 0; i < n; i++) { + lsi_reg_writeb(s, reg + i, data[i]); + } + } else { + DPRINTF("Store reg 0x%x size %d addr 0x%08x\n", reg, n, addr); + for (i = 0; i < n; i++) { + data[i] = lsi_reg_readb(s, reg + i); + } + pci_dma_write(pci_dev, addr, data, n); + } + } + } + if (insn_processed > 10000 && !s->waiting) { + /* Some windows drivers make the device spin waiting for a memory + location to change. If we have been executed a lot of code then + assume this is the case and force an unexpected device disconnect. + This is apparently sufficient to beat the drivers into submission. + */ + if (!(s->sien0 & LSI_SIST0_UDC)) + fprintf(stderr, "inf. loop with UDC masked\n"); + lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); + lsi_disconnect(s); + } else if (s->istat1 & LSI_ISTAT1_SRUN && !s->waiting) { + if (s->dcntl & LSI_DCNTL_SSM) { + lsi_script_dma_interrupt(s, LSI_DSTAT_SSI); + } else { + goto again; + } + } + DPRINTF("SCRIPTS execution stopped\n"); +} + +static uint8_t lsi_reg_readb(LSIState *s, int offset) +{ + uint8_t tmp; +#define CASE_GET_REG24(name, addr) \ + case addr: return s->name & 0xff; \ + case addr + 1: return (s->name >> 8) & 0xff; \ + case addr + 2: return (s->name >> 16) & 0xff; + +#define CASE_GET_REG32(name, addr) \ + case addr: return s->name & 0xff; \ + case addr + 1: return (s->name >> 8) & 0xff; \ + case addr + 2: return (s->name >> 16) & 0xff; \ + case addr + 3: return (s->name >> 24) & 0xff; + +#ifdef DEBUG_LSI_REG + DPRINTF("Read reg %x\n", offset); +#endif + switch (offset) { + case 0x00: /* SCNTL0 */ + return s->scntl0; + case 0x01: /* SCNTL1 */ + return s->scntl1; + case 0x02: /* SCNTL2 */ + return s->scntl2; + case 0x03: /* SCNTL3 */ + return s->scntl3; + case 0x04: /* SCID */ + return s->scid; + case 0x05: /* SXFER */ + return s->sxfer; + case 0x06: /* SDID */ + return s->sdid; + case 0x07: /* GPREG0 */ + return 0x7f; + case 0x08: /* Revision ID */ + return 0x00; + case 0x09: /* SOCL */ + return s->socl; + case 0xa: /* SSID */ + return s->ssid; + case 0xb: /* SBCL */ + /* ??? This is not correct. However it's (hopefully) only + used for diagnostics, so should be ok. */ + return 0; + case 0xc: /* DSTAT */ + tmp = s->dstat | LSI_DSTAT_DFE; + if ((s->istat0 & LSI_ISTAT0_INTF) == 0) + s->dstat = 0; + lsi_update_irq(s); + return tmp; + case 0x0d: /* SSTAT0 */ + return s->sstat0; + case 0x0e: /* SSTAT1 */ + return s->sstat1; + case 0x0f: /* SSTAT2 */ + return s->scntl1 & LSI_SCNTL1_CON ? 0 : 2; + CASE_GET_REG32(dsa, 0x10) + case 0x14: /* ISTAT0 */ + return s->istat0; + case 0x15: /* ISTAT1 */ + return s->istat1; + case 0x16: /* MBOX0 */ + return s->mbox0; + case 0x17: /* MBOX1 */ + return s->mbox1; + case 0x18: /* CTEST0 */ + return 0xff; + case 0x19: /* CTEST1 */ + return 0; + case 0x1a: /* CTEST2 */ + tmp = s->ctest2 | LSI_CTEST2_DACK | LSI_CTEST2_CM; + if (s->istat0 & LSI_ISTAT0_SIGP) { + s->istat0 &= ~LSI_ISTAT0_SIGP; + tmp |= LSI_CTEST2_SIGP; + } + return tmp; + case 0x1b: /* CTEST3 */ + return s->ctest3; + CASE_GET_REG32(temp, 0x1c) + case 0x20: /* DFIFO */ + return 0; + case 0x21: /* CTEST4 */ + return s->ctest4; + case 0x22: /* CTEST5 */ + return s->ctest5; + case 0x23: /* CTEST6 */ + return 0; + CASE_GET_REG24(dbc, 0x24) + case 0x27: /* DCMD */ + return s->dcmd; + CASE_GET_REG32(dnad, 0x28) + CASE_GET_REG32(dsp, 0x2c) + CASE_GET_REG32(dsps, 0x30) + CASE_GET_REG32(scratch[0], 0x34) + case 0x38: /* DMODE */ + return s->dmode; + case 0x39: /* DIEN */ + return s->dien; + case 0x3a: /* SBR */ + return s->sbr; + case 0x3b: /* DCNTL */ + return s->dcntl; + /* ADDER Output (Debug of relative jump address) */ + CASE_GET_REG32(adder, 0x3c) + case 0x40: /* SIEN0 */ + return s->sien0; + case 0x41: /* SIEN1 */ + return s->sien1; + case 0x42: /* SIST0 */ + tmp = s->sist0; + s->sist0 = 0; + lsi_update_irq(s); + return tmp; + case 0x43: /* SIST1 */ + tmp = s->sist1; + s->sist1 = 0; + lsi_update_irq(s); + return tmp; + case 0x46: /* MACNTL */ + return 0x0f; + case 0x47: /* GPCNTL0 */ + return 0x0f; + case 0x48: /* STIME0 */ + return s->stime0; + case 0x4a: /* RESPID0 */ + return s->respid0; + case 0x4b: /* RESPID1 */ + return s->respid1; + case 0x4d: /* STEST1 */ + return s->stest1; + case 0x4e: /* STEST2 */ + return s->stest2; + case 0x4f: /* STEST3 */ + return s->stest3; + case 0x50: /* SIDL */ + /* This is needed by the linux drivers. We currently only update it + during the MSG IN phase. */ + return s->sidl; + case 0x52: /* STEST4 */ + return 0xe0; + case 0x56: /* CCNTL0 */ + return s->ccntl0; + case 0x57: /* CCNTL1 */ + return s->ccntl1; + case 0x58: /* SBDL */ + /* Some drivers peek at the data bus during the MSG IN phase. */ + if ((s->sstat1 & PHASE_MASK) == PHASE_MI) + return s->msg[0]; + return 0; + case 0x59: /* SBDL high */ + return 0; + CASE_GET_REG32(mmrs, 0xa0) + CASE_GET_REG32(mmws, 0xa4) + CASE_GET_REG32(sfs, 0xa8) + CASE_GET_REG32(drs, 0xac) + CASE_GET_REG32(sbms, 0xb0) + CASE_GET_REG32(dbms, 0xb4) + CASE_GET_REG32(dnad64, 0xb8) + CASE_GET_REG32(pmjad1, 0xc0) + CASE_GET_REG32(pmjad2, 0xc4) + CASE_GET_REG32(rbc, 0xc8) + CASE_GET_REG32(ua, 0xcc) + CASE_GET_REG32(ia, 0xd4) + CASE_GET_REG32(sbc, 0xd8) + CASE_GET_REG32(csbc, 0xdc) + } + if (offset >= 0x5c && offset < 0xa0) { + int n; + int shift; + n = (offset - 0x58) >> 2; + shift = (offset & 3) * 8; + return (s->scratch[n] >> shift) & 0xff; + } + BADF("readb 0x%x\n", offset); + exit(1); +#undef CASE_GET_REG24 +#undef CASE_GET_REG32 +} + +static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) +{ +#define CASE_SET_REG24(name, addr) \ + case addr : s->name &= 0xffffff00; s->name |= val; break; \ + case addr + 1: s->name &= 0xffff00ff; s->name |= val << 8; break; \ + case addr + 2: s->name &= 0xff00ffff; s->name |= val << 16; break; + +#define CASE_SET_REG32(name, addr) \ + case addr : s->name &= 0xffffff00; s->name |= val; break; \ + case addr + 1: s->name &= 0xffff00ff; s->name |= val << 8; break; \ + case addr + 2: s->name &= 0xff00ffff; s->name |= val << 16; break; \ + case addr + 3: s->name &= 0x00ffffff; s->name |= val << 24; break; + +#ifdef DEBUG_LSI_REG + DPRINTF("Write reg %x = %02x\n", offset, val); +#endif + switch (offset) { + case 0x00: /* SCNTL0 */ + s->scntl0 = val; + if (val & LSI_SCNTL0_START) { + BADF("Start sequence not implemented\n"); + } + break; + case 0x01: /* SCNTL1 */ + s->scntl1 = val & ~LSI_SCNTL1_SST; + if (val & LSI_SCNTL1_IARB) { + BADF("Immediate Arbritration not implemented\n"); + } + if (val & LSI_SCNTL1_RST) { + if (!(s->sstat0 & LSI_SSTAT0_RST)) { + qbus_reset_all(&s->bus.qbus); + s->sstat0 |= LSI_SSTAT0_RST; + lsi_script_scsi_interrupt(s, LSI_SIST0_RST, 0); + } + } else { + s->sstat0 &= ~LSI_SSTAT0_RST; + } + break; + case 0x02: /* SCNTL2 */ + val &= ~(LSI_SCNTL2_WSR | LSI_SCNTL2_WSS); + s->scntl2 = val; + break; + case 0x03: /* SCNTL3 */ + s->scntl3 = val; + break; + case 0x04: /* SCID */ + s->scid = val; + break; + case 0x05: /* SXFER */ + s->sxfer = val; + break; + case 0x06: /* SDID */ + if ((s->ssid & 0x80) && (val & 0xf) != (s->ssid & 0xf)) { + BADF("Destination ID does not match SSID\n"); + } + s->sdid = val & 0xf; + break; + case 0x07: /* GPREG0 */ + break; + case 0x08: /* SFBR */ + /* The CPU is not allowed to write to this register. However the + SCRIPTS register move instructions are. */ + s->sfbr = val; + break; + case 0x0a: case 0x0b: + /* Openserver writes to these readonly registers on startup */ + return; + case 0x0c: case 0x0d: case 0x0e: case 0x0f: + /* Linux writes to these readonly registers on startup. */ + return; + CASE_SET_REG32(dsa, 0x10) + case 0x14: /* ISTAT0 */ + s->istat0 = (s->istat0 & 0x0f) | (val & 0xf0); + if (val & LSI_ISTAT0_ABRT) { + lsi_script_dma_interrupt(s, LSI_DSTAT_ABRT); + } + if (val & LSI_ISTAT0_INTF) { + s->istat0 &= ~LSI_ISTAT0_INTF; + lsi_update_irq(s); + } + if (s->waiting == 1 && val & LSI_ISTAT0_SIGP) { + DPRINTF("Woken by SIGP\n"); + s->waiting = 0; + s->dsp = s->dnad; + lsi_execute_script(s); + } + if (val & LSI_ISTAT0_SRST) { + qdev_reset_all(DEVICE(s)); + } + break; + case 0x16: /* MBOX0 */ + s->mbox0 = val; + break; + case 0x17: /* MBOX1 */ + s->mbox1 = val; + break; + case 0x18: /* CTEST0 */ + /* nothing to do */ + break; + case 0x1a: /* CTEST2 */ + s->ctest2 = val & LSI_CTEST2_PCICIE; + break; + case 0x1b: /* CTEST3 */ + s->ctest3 = val & 0x0f; + break; + CASE_SET_REG32(temp, 0x1c) + case 0x21: /* CTEST4 */ + if (val & 7) { + BADF("Unimplemented CTEST4-FBL 0x%x\n", val); + } + s->ctest4 = val; + break; + case 0x22: /* CTEST5 */ + if (val & (LSI_CTEST5_ADCK | LSI_CTEST5_BBCK)) { + BADF("CTEST5 DMA increment not implemented\n"); + } + s->ctest5 = val; + break; + CASE_SET_REG24(dbc, 0x24) + CASE_SET_REG32(dnad, 0x28) + case 0x2c: /* DSP[0:7] */ + s->dsp &= 0xffffff00; + s->dsp |= val; + break; + case 0x2d: /* DSP[8:15] */ + s->dsp &= 0xffff00ff; + s->dsp |= val << 8; + break; + case 0x2e: /* DSP[16:23] */ + s->dsp &= 0xff00ffff; + s->dsp |= val << 16; + break; + case 0x2f: /* DSP[24:31] */ + s->dsp &= 0x00ffffff; + s->dsp |= val << 24; + if ((s->dmode & LSI_DMODE_MAN) == 0 + && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); + break; + CASE_SET_REG32(dsps, 0x30) + CASE_SET_REG32(scratch[0], 0x34) + case 0x38: /* DMODE */ + if (val & (LSI_DMODE_SIOM | LSI_DMODE_DIOM)) { + BADF("IO mappings not implemented\n"); + } + s->dmode = val; + break; + case 0x39: /* DIEN */ + s->dien = val; + lsi_update_irq(s); + break; + case 0x3a: /* SBR */ + s->sbr = val; + break; + case 0x3b: /* DCNTL */ + s->dcntl = val & ~(LSI_DCNTL_PFF | LSI_DCNTL_STD); + if ((val & LSI_DCNTL_STD) && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); + break; + case 0x40: /* SIEN0 */ + s->sien0 = val; + lsi_update_irq(s); + break; + case 0x41: /* SIEN1 */ + s->sien1 = val; + lsi_update_irq(s); + break; + case 0x47: /* GPCNTL0 */ + break; + case 0x48: /* STIME0 */ + s->stime0 = val; + break; + case 0x49: /* STIME1 */ + if (val & 0xf) { + DPRINTF("General purpose timer not implemented\n"); + /* ??? Raising the interrupt immediately seems to be sufficient + to keep the FreeBSD driver happy. */ + lsi_script_scsi_interrupt(s, 0, LSI_SIST1_GEN); + } + break; + case 0x4a: /* RESPID0 */ + s->respid0 = val; + break; + case 0x4b: /* RESPID1 */ + s->respid1 = val; + break; + case 0x4d: /* STEST1 */ + s->stest1 = val; + break; + case 0x4e: /* STEST2 */ + if (val & 1) { + BADF("Low level mode not implemented\n"); + } + s->stest2 = val; + break; + case 0x4f: /* STEST3 */ + if (val & 0x41) { + BADF("SCSI FIFO test mode not implemented\n"); + } + s->stest3 = val; + break; + case 0x56: /* CCNTL0 */ + s->ccntl0 = val; + break; + case 0x57: /* CCNTL1 */ + s->ccntl1 = val; + break; + CASE_SET_REG32(mmrs, 0xa0) + CASE_SET_REG32(mmws, 0xa4) + CASE_SET_REG32(sfs, 0xa8) + CASE_SET_REG32(drs, 0xac) + CASE_SET_REG32(sbms, 0xb0) + CASE_SET_REG32(dbms, 0xb4) + CASE_SET_REG32(dnad64, 0xb8) + CASE_SET_REG32(pmjad1, 0xc0) + CASE_SET_REG32(pmjad2, 0xc4) + CASE_SET_REG32(rbc, 0xc8) + CASE_SET_REG32(ua, 0xcc) + CASE_SET_REG32(ia, 0xd4) + CASE_SET_REG32(sbc, 0xd8) + CASE_SET_REG32(csbc, 0xdc) + default: + if (offset >= 0x5c && offset < 0xa0) { + int n; + int shift; + n = (offset - 0x58) >> 2; + shift = (offset & 3) * 8; + s->scratch[n] = deposit32(s->scratch[n], shift, 8, val); + } else { + BADF("Unhandled writeb 0x%x = 0x%x\n", offset, val); + } + } +#undef CASE_SET_REG24 +#undef CASE_SET_REG32 +} + +static void lsi_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + LSIState *s = opaque; + + lsi_reg_writeb(s, addr & 0xff, val); +} + +static uint64_t lsi_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + LSIState *s = opaque; + + return lsi_reg_readb(s, addr & 0xff); +} + +static const MemoryRegionOps lsi_mmio_ops = { + .read = lsi_mmio_read, + .write = lsi_mmio_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static void lsi_ram_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + LSIState *s = opaque; + uint32_t newval; + uint32_t mask; + int shift; + + newval = s->script_ram[addr >> 2]; + shift = (addr & 3) * 8; + mask = ((uint64_t)1 << (size * 8)) - 1; + newval &= ~(mask << shift); + newval |= val << shift; + s->script_ram[addr >> 2] = newval; +} + +static uint64_t lsi_ram_read(void *opaque, hwaddr addr, + unsigned size) +{ + LSIState *s = opaque; + uint32_t val; + uint32_t mask; + + val = s->script_ram[addr >> 2]; + mask = ((uint64_t)1 << (size * 8)) - 1; + val >>= (addr & 3) * 8; + return val & mask; +} + +static const MemoryRegionOps lsi_ram_ops = { + .read = lsi_ram_read, + .write = lsi_ram_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static uint64_t lsi_io_read(void *opaque, hwaddr addr, + unsigned size) +{ + LSIState *s = opaque; + return lsi_reg_readb(s, addr & 0xff); +} + +static void lsi_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + LSIState *s = opaque; + lsi_reg_writeb(s, addr & 0xff, val); +} + +static const MemoryRegionOps lsi_io_ops = { + .read = lsi_io_read, + .write = lsi_io_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static void lsi_scsi_reset(DeviceState *dev) +{ + LSIState *s = LSI53C895A(dev); + + lsi_soft_reset(s); +} + +static void lsi_pre_save(void *opaque) +{ + LSIState *s = opaque; + + if (s->current) { + assert(s->current->dma_buf == NULL); + assert(s->current->dma_len == 0); + } + assert(QTAILQ_EMPTY(&s->queue)); +} + +static const VMStateDescription vmstate_lsi_scsi = { + .name = "lsiscsi", + .version_id = 0, + .minimum_version_id = 0, + .pre_save = lsi_pre_save, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, LSIState), + + VMSTATE_INT32(carry, LSIState), + VMSTATE_INT32(status, LSIState), + VMSTATE_INT32(msg_action, LSIState), + VMSTATE_INT32(msg_len, LSIState), + VMSTATE_BUFFER(msg, LSIState), + VMSTATE_INT32(waiting, LSIState), + + VMSTATE_UINT32(dsa, LSIState), + VMSTATE_UINT32(temp, LSIState), + VMSTATE_UINT32(dnad, LSIState), + VMSTATE_UINT32(dbc, LSIState), + VMSTATE_UINT8(istat0, LSIState), + VMSTATE_UINT8(istat1, LSIState), + VMSTATE_UINT8(dcmd, LSIState), + VMSTATE_UINT8(dstat, LSIState), + VMSTATE_UINT8(dien, LSIState), + VMSTATE_UINT8(sist0, LSIState), + VMSTATE_UINT8(sist1, LSIState), + VMSTATE_UINT8(sien0, LSIState), + VMSTATE_UINT8(sien1, LSIState), + VMSTATE_UINT8(mbox0, LSIState), + VMSTATE_UINT8(mbox1, LSIState), + VMSTATE_UINT8(dfifo, LSIState), + VMSTATE_UINT8(ctest2, LSIState), + VMSTATE_UINT8(ctest3, LSIState), + VMSTATE_UINT8(ctest4, LSIState), + VMSTATE_UINT8(ctest5, LSIState), + VMSTATE_UINT8(ccntl0, LSIState), + VMSTATE_UINT8(ccntl1, LSIState), + VMSTATE_UINT32(dsp, LSIState), + VMSTATE_UINT32(dsps, LSIState), + VMSTATE_UINT8(dmode, LSIState), + VMSTATE_UINT8(dcntl, LSIState), + VMSTATE_UINT8(scntl0, LSIState), + VMSTATE_UINT8(scntl1, LSIState), + VMSTATE_UINT8(scntl2, LSIState), + VMSTATE_UINT8(scntl3, LSIState), + VMSTATE_UINT8(sstat0, LSIState), + VMSTATE_UINT8(sstat1, LSIState), + VMSTATE_UINT8(scid, LSIState), + VMSTATE_UINT8(sxfer, LSIState), + VMSTATE_UINT8(socl, LSIState), + VMSTATE_UINT8(sdid, LSIState), + VMSTATE_UINT8(ssid, LSIState), + VMSTATE_UINT8(sfbr, LSIState), + VMSTATE_UINT8(stest1, LSIState), + VMSTATE_UINT8(stest2, LSIState), + VMSTATE_UINT8(stest3, LSIState), + VMSTATE_UINT8(sidl, LSIState), + VMSTATE_UINT8(stime0, LSIState), + VMSTATE_UINT8(respid0, LSIState), + VMSTATE_UINT8(respid1, LSIState), + VMSTATE_UINT32(mmrs, LSIState), + VMSTATE_UINT32(mmws, LSIState), + VMSTATE_UINT32(sfs, LSIState), + VMSTATE_UINT32(drs, LSIState), + VMSTATE_UINT32(sbms, LSIState), + VMSTATE_UINT32(dbms, LSIState), + VMSTATE_UINT32(dnad64, LSIState), + VMSTATE_UINT32(pmjad1, LSIState), + VMSTATE_UINT32(pmjad2, LSIState), + VMSTATE_UINT32(rbc, LSIState), + VMSTATE_UINT32(ua, LSIState), + VMSTATE_UINT32(ia, LSIState), + VMSTATE_UINT32(sbc, LSIState), + VMSTATE_UINT32(csbc, LSIState), + VMSTATE_BUFFER_UNSAFE(scratch, LSIState, 0, 18 * sizeof(uint32_t)), + VMSTATE_UINT8(sbr, LSIState), + + VMSTATE_BUFFER_UNSAFE(script_ram, LSIState, 0, 2048 * sizeof(uint32_t)), + VMSTATE_END_OF_LIST() + } +}; + +static const struct SCSIBusInfo lsi_scsi_info = { + .tcq = true, + .max_target = LSI_MAX_DEVS, + .max_lun = 0, /* LUN support is buggy */ + + .transfer_data = lsi_transfer_data, + .complete = lsi_command_complete, + .cancel = lsi_request_cancelled +}; + +static void lsi_scsi_realize(PCIDevice *dev, Error **errp) +{ + LSIState *s = LSI53C895A(dev); + DeviceState *d = DEVICE(dev); + uint8_t *pci_conf; + + pci_conf = dev->config; + + /* PCI latency timer = 255 */ + pci_conf[PCI_LATENCY_TIMER] = 0xff; + /* Interrupt pin A */ + pci_conf[PCI_INTERRUPT_PIN] = 0x01; + + memory_region_init_io(&s->mmio_io, OBJECT(s), &lsi_mmio_ops, s, + "lsi-mmio", 0x400); + memory_region_init_io(&s->ram_io, OBJECT(s), &lsi_ram_ops, s, + "lsi-ram", 0x2000); + memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, + "lsi-io", 256); + + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_io); + pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio_io); + pci_register_bar(dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ram_io); + QTAILQ_INIT(&s->queue); + + scsi_bus_new(&s->bus, sizeof(s->bus), d, &lsi_scsi_info, NULL); + if (!d->hotplugged) { + scsi_bus_legacy_handle_cmdline(&s->bus, errp); + } +} + +static void lsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = lsi_scsi_realize; + k->vendor_id = PCI_VENDOR_ID_LSI_LOGIC; + k->device_id = PCI_DEVICE_ID_LSI_53C895A; + k->class_id = PCI_CLASS_STORAGE_SCSI; + k->subsystem_id = 0x1000; + dc->reset = lsi_scsi_reset; + dc->vmsd = &vmstate_lsi_scsi; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo lsi_info = { + .name = TYPE_LSI53C895A, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(LSIState), + .class_init = lsi_class_init, +}; + +static void lsi53c810_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->device_id = PCI_DEVICE_ID_LSI_53C810; +} + +static TypeInfo lsi53c810_info = { + .name = TYPE_LSI53C810, + .parent = TYPE_LSI53C895A, + .class_init = lsi53c810_class_init, +}; + +static void lsi53c895a_register_types(void) +{ + type_register_static(&lsi_info); + type_register_static(&lsi53c810_info); +} + +type_init(lsi53c895a_register_types) diff --git a/qemu/hw/scsi/megasas.c b/qemu/hw/scsi/megasas.c new file mode 100644 index 000000000..a04369c5a --- /dev/null +++ b/qemu/hw/scsi/megasas.c @@ -0,0 +1,2547 @@ +/* + * QEMU MegaRAID SAS 8708EM2 Host Bus Adapter emulation + * Based on the linux driver code at drivers/scsi/megaraid + * + * Copyright (c) 2009-2012 Hannes Reinecke, SUSE Labs + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "sysemu/dma.h" +#include "sysemu/block-backend.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "qemu/iov.h" +#include "hw/scsi/scsi.h" +#include "block/scsi.h" +#include "trace.h" + +#include "mfi.h" + +#define MEGASAS_VERSION_GEN1 "1.70" +#define MEGASAS_VERSION_GEN2 "1.80" +#define MEGASAS_MAX_FRAMES 2048 /* Firmware limit at 65535 */ +#define MEGASAS_DEFAULT_FRAMES 1000 /* Windows requires this */ +#define MEGASAS_GEN2_DEFAULT_FRAMES 1008 /* Windows requires this */ +#define MEGASAS_MAX_SGE 128 /* Firmware limit */ +#define MEGASAS_DEFAULT_SGE 80 +#define MEGASAS_MAX_SECTORS 0xFFFF /* No real limit */ +#define MEGASAS_MAX_ARRAYS 128 + +#define MEGASAS_HBA_SERIAL "QEMU123456" +#define NAA_LOCALLY_ASSIGNED_ID 0x3ULL +#define IEEE_COMPANY_LOCALLY_ASSIGNED 0x525400 + +#define MEGASAS_FLAG_USE_JBOD 0 +#define MEGASAS_MASK_USE_JBOD (1 << MEGASAS_FLAG_USE_JBOD) +#define MEGASAS_FLAG_USE_MSI 1 +#define MEGASAS_MASK_USE_MSI (1 << MEGASAS_FLAG_USE_MSI) +#define MEGASAS_FLAG_USE_MSIX 2 +#define MEGASAS_MASK_USE_MSIX (1 << MEGASAS_FLAG_USE_MSIX) +#define MEGASAS_FLAG_USE_QUEUE64 3 +#define MEGASAS_MASK_USE_QUEUE64 (1 << MEGASAS_FLAG_USE_QUEUE64) + +static const char *mfi_frame_desc[] = { + "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI", + "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop"}; + +typedef struct MegasasCmd { + uint32_t index; + uint16_t flags; + uint16_t count; + uint64_t context; + + hwaddr pa; + hwaddr pa_size; + union mfi_frame *frame; + SCSIRequest *req; + QEMUSGList qsg; + void *iov_buf; + size_t iov_size; + size_t iov_offset; + struct MegasasState *state; +} MegasasCmd; + +typedef struct MegasasState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + MemoryRegion mmio_io; + MemoryRegion port_io; + MemoryRegion queue_io; + uint32_t frame_hi; + + int fw_state; + uint32_t fw_sge; + uint32_t fw_cmds; + uint32_t flags; + int fw_luns; + int intr_mask; + int doorbell; + int busy; + int diag; + int adp_reset; + + MegasasCmd *event_cmd; + int event_locale; + int event_class; + int event_count; + int shutdown_event; + int boot_event; + + uint64_t sas_addr; + char *hba_serial; + + uint64_t reply_queue_pa; + void *reply_queue; + int reply_queue_len; + int reply_queue_head; + int reply_queue_tail; + uint64_t consumer_pa; + uint64_t producer_pa; + + MegasasCmd frames[MEGASAS_MAX_FRAMES]; + DECLARE_BITMAP(frame_map, MEGASAS_MAX_FRAMES); + SCSIBus bus; +} MegasasState; + +typedef struct MegasasBaseClass { + PCIDeviceClass parent_class; + const char *product_name; + const char *product_version; + int mmio_bar; + int ioport_bar; + int osts; +} MegasasBaseClass; + +#define TYPE_MEGASAS_BASE "megasas-base" +#define TYPE_MEGASAS_GEN1 "megasas" +#define TYPE_MEGASAS_GEN2 "megasas-gen2" + +#define MEGASAS(obj) \ + OBJECT_CHECK(MegasasState, (obj), TYPE_MEGASAS_BASE) + +#define MEGASAS_DEVICE_CLASS(oc) \ + OBJECT_CLASS_CHECK(MegasasBaseClass, (oc), TYPE_MEGASAS_BASE) +#define MEGASAS_DEVICE_GET_CLASS(oc) \ + OBJECT_GET_CLASS(MegasasBaseClass, (oc), TYPE_MEGASAS_BASE) + +#define MEGASAS_INTR_DISABLED_MASK 0xFFFFFFFF + +static bool megasas_intr_enabled(MegasasState *s) +{ + if ((s->intr_mask & MEGASAS_INTR_DISABLED_MASK) != + MEGASAS_INTR_DISABLED_MASK) { + return true; + } + return false; +} + +static bool megasas_use_queue64(MegasasState *s) +{ + return s->flags & MEGASAS_MASK_USE_QUEUE64; +} + +static bool megasas_use_msi(MegasasState *s) +{ + return s->flags & MEGASAS_MASK_USE_MSI; +} + +static bool megasas_use_msix(MegasasState *s) +{ + return s->flags & MEGASAS_MASK_USE_MSIX; +} + +static bool megasas_is_jbod(MegasasState *s) +{ + return s->flags & MEGASAS_MASK_USE_JBOD; +} + +static void megasas_frame_set_cmd_status(MegasasState *s, + unsigned long frame, uint8_t v) +{ + PCIDevice *pci = &s->parent_obj; + stb_pci_dma(pci, frame + offsetof(struct mfi_frame_header, cmd_status), v); +} + +static void megasas_frame_set_scsi_status(MegasasState *s, + unsigned long frame, uint8_t v) +{ + PCIDevice *pci = &s->parent_obj; + stb_pci_dma(pci, frame + offsetof(struct mfi_frame_header, scsi_status), v); +} + +/* + * Context is considered opaque, but the HBA firmware is running + * in little endian mode. So convert it to little endian, too. + */ +static uint64_t megasas_frame_get_context(MegasasState *s, + unsigned long frame) +{ + PCIDevice *pci = &s->parent_obj; + return ldq_le_pci_dma(pci, frame + offsetof(struct mfi_frame_header, context)); +} + +static bool megasas_frame_is_ieee_sgl(MegasasCmd *cmd) +{ + return cmd->flags & MFI_FRAME_IEEE_SGL; +} + +static bool megasas_frame_is_sgl64(MegasasCmd *cmd) +{ + return cmd->flags & MFI_FRAME_SGL64; +} + +static bool megasas_frame_is_sense64(MegasasCmd *cmd) +{ + return cmd->flags & MFI_FRAME_SENSE64; +} + +static uint64_t megasas_sgl_get_addr(MegasasCmd *cmd, + union mfi_sgl *sgl) +{ + uint64_t addr; + + if (megasas_frame_is_ieee_sgl(cmd)) { + addr = le64_to_cpu(sgl->sg_skinny->addr); + } else if (megasas_frame_is_sgl64(cmd)) { + addr = le64_to_cpu(sgl->sg64->addr); + } else { + addr = le32_to_cpu(sgl->sg32->addr); + } + return addr; +} + +static uint32_t megasas_sgl_get_len(MegasasCmd *cmd, + union mfi_sgl *sgl) +{ + uint32_t len; + + if (megasas_frame_is_ieee_sgl(cmd)) { + len = le32_to_cpu(sgl->sg_skinny->len); + } else if (megasas_frame_is_sgl64(cmd)) { + len = le32_to_cpu(sgl->sg64->len); + } else { + len = le32_to_cpu(sgl->sg32->len); + } + return len; +} + +static union mfi_sgl *megasas_sgl_next(MegasasCmd *cmd, + union mfi_sgl *sgl) +{ + uint8_t *next = (uint8_t *)sgl; + + if (megasas_frame_is_ieee_sgl(cmd)) { + next += sizeof(struct mfi_sg_skinny); + } else if (megasas_frame_is_sgl64(cmd)) { + next += sizeof(struct mfi_sg64); + } else { + next += sizeof(struct mfi_sg32); + } + + if (next >= (uint8_t *)cmd->frame + cmd->pa_size) { + return NULL; + } + return (union mfi_sgl *)next; +} + +static void megasas_soft_reset(MegasasState *s); + +static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl) +{ + int i; + int iov_count = 0; + size_t iov_size = 0; + + cmd->flags = le16_to_cpu(cmd->frame->header.flags); + iov_count = cmd->frame->header.sge_count; + if (iov_count > MEGASAS_MAX_SGE) { + trace_megasas_iovec_sgl_overflow(cmd->index, iov_count, + MEGASAS_MAX_SGE); + return iov_count; + } + pci_dma_sglist_init(&cmd->qsg, PCI_DEVICE(s), iov_count); + for (i = 0; i < iov_count; i++) { + dma_addr_t iov_pa, iov_size_p; + + if (!sgl) { + trace_megasas_iovec_sgl_underflow(cmd->index, i); + goto unmap; + } + iov_pa = megasas_sgl_get_addr(cmd, sgl); + iov_size_p = megasas_sgl_get_len(cmd, sgl); + if (!iov_pa || !iov_size_p) { + trace_megasas_iovec_sgl_invalid(cmd->index, i, + iov_pa, iov_size_p); + goto unmap; + } + qemu_sglist_add(&cmd->qsg, iov_pa, iov_size_p); + sgl = megasas_sgl_next(cmd, sgl); + iov_size += (size_t)iov_size_p; + } + if (cmd->iov_size > iov_size) { + trace_megasas_iovec_overflow(cmd->index, iov_size, cmd->iov_size); + } else if (cmd->iov_size < iov_size) { + trace_megasas_iovec_underflow(cmd->iov_size, iov_size, cmd->iov_size); + } + cmd->iov_offset = 0; + return 0; +unmap: + qemu_sglist_destroy(&cmd->qsg); + return iov_count - i; +} + +static void megasas_unmap_sgl(MegasasCmd *cmd) +{ + qemu_sglist_destroy(&cmd->qsg); + cmd->iov_offset = 0; +} + +/* + * passthrough sense and io sense are at the same offset + */ +static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr, + uint8_t sense_len) +{ + PCIDevice *pcid = PCI_DEVICE(cmd->state); + uint32_t pa_hi = 0, pa_lo; + hwaddr pa; + + if (sense_len > cmd->frame->header.sense_len) { + sense_len = cmd->frame->header.sense_len; + } + if (sense_len) { + pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo); + if (megasas_frame_is_sense64(cmd)) { + pa_hi = le32_to_cpu(cmd->frame->pass.sense_addr_hi); + } + pa = ((uint64_t) pa_hi << 32) | pa_lo; + pci_dma_write(pcid, pa, sense_ptr, sense_len); + cmd->frame->header.sense_len = sense_len; + } + return sense_len; +} + +static void megasas_write_sense(MegasasCmd *cmd, SCSISense sense) +{ + uint8_t sense_buf[SCSI_SENSE_BUF_SIZE]; + uint8_t sense_len = 18; + + memset(sense_buf, 0, sense_len); + sense_buf[0] = 0xf0; + sense_buf[2] = sense.key; + sense_buf[7] = 10; + sense_buf[12] = sense.asc; + sense_buf[13] = sense.ascq; + megasas_build_sense(cmd, sense_buf, sense_len); +} + +static void megasas_copy_sense(MegasasCmd *cmd) +{ + uint8_t sense_buf[SCSI_SENSE_BUF_SIZE]; + uint8_t sense_len; + + sense_len = scsi_req_get_sense(cmd->req, sense_buf, + SCSI_SENSE_BUF_SIZE); + megasas_build_sense(cmd, sense_buf, sense_len); +} + +/* + * Format an INQUIRY CDB + */ +static int megasas_setup_inquiry(uint8_t *cdb, int pg, int len) +{ + memset(cdb, 0, 6); + cdb[0] = INQUIRY; + if (pg > 0) { + cdb[1] = 0x1; + cdb[2] = pg; + } + cdb[3] = (len >> 8) & 0xff; + cdb[4] = (len & 0xff); + return len; +} + +/* + * Encode lba and len into a READ_16/WRITE_16 CDB + */ +static void megasas_encode_lba(uint8_t *cdb, uint64_t lba, + uint32_t len, bool is_write) +{ + memset(cdb, 0x0, 16); + if (is_write) { + cdb[0] = WRITE_16; + } else { + cdb[0] = READ_16; + } + cdb[2] = (lba >> 56) & 0xff; + cdb[3] = (lba >> 48) & 0xff; + cdb[4] = (lba >> 40) & 0xff; + cdb[5] = (lba >> 32) & 0xff; + cdb[6] = (lba >> 24) & 0xff; + cdb[7] = (lba >> 16) & 0xff; + cdb[8] = (lba >> 8) & 0xff; + cdb[9] = (lba) & 0xff; + cdb[10] = (len >> 24) & 0xff; + cdb[11] = (len >> 16) & 0xff; + cdb[12] = (len >> 8) & 0xff; + cdb[13] = (len) & 0xff; +} + +/* + * Utility functions + */ +static uint64_t megasas_fw_time(void) +{ + struct tm curtime; + uint64_t bcd_time; + + qemu_get_timedate(&curtime, 0); + bcd_time = ((uint64_t)curtime.tm_sec & 0xff) << 48 | + ((uint64_t)curtime.tm_min & 0xff) << 40 | + ((uint64_t)curtime.tm_hour & 0xff) << 32 | + ((uint64_t)curtime.tm_mday & 0xff) << 24 | + ((uint64_t)curtime.tm_mon & 0xff) << 16 | + ((uint64_t)(curtime.tm_year + 1900) & 0xffff); + + return bcd_time; +} + +/* + * Default disk sata address + * 0x1221 is the magic number as + * present in real hardware, + * so use it here, too. + */ +static uint64_t megasas_get_sata_addr(uint16_t id) +{ + uint64_t addr = (0x1221ULL << 48); + return addr & (id << 24); +} + +/* + * Frame handling + */ +static int megasas_next_index(MegasasState *s, int index, int limit) +{ + index++; + if (index == limit) { + index = 0; + } + return index; +} + +static MegasasCmd *megasas_lookup_frame(MegasasState *s, + hwaddr frame) +{ + MegasasCmd *cmd = NULL; + int num = 0, index; + + index = s->reply_queue_head; + + while (num < s->fw_cmds) { + if (s->frames[index].pa && s->frames[index].pa == frame) { + cmd = &s->frames[index]; + break; + } + index = megasas_next_index(s, index, s->fw_cmds); + num++; + } + + return cmd; +} + +static void megasas_unmap_frame(MegasasState *s, MegasasCmd *cmd) +{ + PCIDevice *p = PCI_DEVICE(s); + + pci_dma_unmap(p, cmd->frame, cmd->pa_size, 0, 0); + cmd->frame = NULL; + cmd->pa = 0; + clear_bit(cmd->index, s->frame_map); +} + +/* + * This absolutely needs to be locked if + * qemu ever goes multithreaded. + */ +static MegasasCmd *megasas_enqueue_frame(MegasasState *s, + hwaddr frame, uint64_t context, int count) +{ + PCIDevice *pcid = PCI_DEVICE(s); + MegasasCmd *cmd = NULL; + int frame_size = MFI_FRAME_SIZE * 16; + hwaddr frame_size_p = frame_size; + unsigned long index; + + index = 0; + while (index < s->fw_cmds) { + index = find_next_zero_bit(s->frame_map, s->fw_cmds, index); + if (!s->frames[index].pa) + break; + /* Busy frame found */ + trace_megasas_qf_mapped(index); + } + if (index >= s->fw_cmds) { + /* All frames busy */ + trace_megasas_qf_busy(frame); + return NULL; + } + cmd = &s->frames[index]; + set_bit(index, s->frame_map); + trace_megasas_qf_new(index, frame); + + cmd->pa = frame; + /* Map all possible frames */ + cmd->frame = pci_dma_map(pcid, frame, &frame_size_p, 0); + if (frame_size_p != frame_size) { + trace_megasas_qf_map_failed(cmd->index, (unsigned long)frame); + if (cmd->frame) { + megasas_unmap_frame(s, cmd); + } + s->event_count++; + return NULL; + } + cmd->pa_size = frame_size_p; + cmd->context = context; + if (!megasas_use_queue64(s)) { + cmd->context &= (uint64_t)0xFFFFFFFF; + } + cmd->count = count; + s->busy++; + + if (s->consumer_pa) { + s->reply_queue_tail = ldl_le_pci_dma(pcid, s->consumer_pa); + } + trace_megasas_qf_enqueue(cmd->index, cmd->count, cmd->context, + s->reply_queue_head, s->reply_queue_tail, s->busy); + + return cmd; +} + +static void megasas_complete_frame(MegasasState *s, uint64_t context) +{ + PCIDevice *pci_dev = PCI_DEVICE(s); + int tail, queue_offset; + + /* Decrement busy count */ + s->busy--; + if (s->reply_queue_pa) { + /* + * Put command on the reply queue. + * Context is opaque, but emulation is running in + * little endian. So convert it. + */ + if (megasas_use_queue64(s)) { + queue_offset = s->reply_queue_head * sizeof(uint64_t); + stq_le_pci_dma(pci_dev, s->reply_queue_pa + queue_offset, context); + } else { + queue_offset = s->reply_queue_head * sizeof(uint32_t); + stl_le_pci_dma(pci_dev, s->reply_queue_pa + queue_offset, context); + } + s->reply_queue_tail = ldl_le_pci_dma(pci_dev, s->consumer_pa); + trace_megasas_qf_complete(context, s->reply_queue_head, + s->reply_queue_tail, s->busy); + } + + if (megasas_intr_enabled(s)) { + /* Update reply queue pointer */ + s->reply_queue_tail = ldl_le_pci_dma(pci_dev, s->consumer_pa); + tail = s->reply_queue_head; + s->reply_queue_head = megasas_next_index(s, tail, s->fw_cmds); + trace_megasas_qf_update(s->reply_queue_head, s->reply_queue_tail, + s->busy); + stl_le_pci_dma(pci_dev, s->producer_pa, s->reply_queue_head); + /* Notify HBA */ + if (msix_enabled(pci_dev)) { + trace_megasas_msix_raise(0); + msix_notify(pci_dev, 0); + } else if (msi_enabled(pci_dev)) { + trace_megasas_msi_raise(0); + msi_notify(pci_dev, 0); + } else { + s->doorbell++; + if (s->doorbell == 1) { + trace_megasas_irq_raise(); + pci_irq_assert(pci_dev); + } + } + } else { + trace_megasas_qf_complete_noirq(context); + } +} + +static void megasas_reset_frames(MegasasState *s) +{ + int i; + MegasasCmd *cmd; + + for (i = 0; i < s->fw_cmds; i++) { + cmd = &s->frames[i]; + if (cmd->pa) { + megasas_unmap_frame(s, cmd); + } + } + bitmap_zero(s->frame_map, MEGASAS_MAX_FRAMES); +} + +static void megasas_abort_command(MegasasCmd *cmd) +{ + if (cmd->req) { + scsi_req_cancel(cmd->req); + cmd->req = NULL; + } +} + +static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd) +{ + PCIDevice *pcid = PCI_DEVICE(s); + uint32_t pa_hi, pa_lo; + hwaddr iq_pa, initq_size = sizeof(struct mfi_init_qinfo); + struct mfi_init_qinfo *initq = NULL; + uint32_t flags; + int ret = MFI_STAT_OK; + + if (s->reply_queue_pa) { + trace_megasas_initq_mapped(s->reply_queue_pa); + goto out; + } + pa_lo = le32_to_cpu(cmd->frame->init.qinfo_new_addr_lo); + pa_hi = le32_to_cpu(cmd->frame->init.qinfo_new_addr_hi); + iq_pa = (((uint64_t) pa_hi << 32) | pa_lo); + trace_megasas_init_firmware((uint64_t)iq_pa); + initq = pci_dma_map(pcid, iq_pa, &initq_size, 0); + if (!initq || initq_size != sizeof(*initq)) { + trace_megasas_initq_map_failed(cmd->index); + s->event_count++; + ret = MFI_STAT_MEMORY_NOT_AVAILABLE; + goto out; + } + s->reply_queue_len = le32_to_cpu(initq->rq_entries) & 0xFFFF; + if (s->reply_queue_len > s->fw_cmds) { + trace_megasas_initq_mismatch(s->reply_queue_len, s->fw_cmds); + s->event_count++; + ret = MFI_STAT_INVALID_PARAMETER; + goto out; + } + pa_lo = le32_to_cpu(initq->rq_addr_lo); + pa_hi = le32_to_cpu(initq->rq_addr_hi); + s->reply_queue_pa = ((uint64_t) pa_hi << 32) | pa_lo; + pa_lo = le32_to_cpu(initq->ci_addr_lo); + pa_hi = le32_to_cpu(initq->ci_addr_hi); + s->consumer_pa = ((uint64_t) pa_hi << 32) | pa_lo; + pa_lo = le32_to_cpu(initq->pi_addr_lo); + pa_hi = le32_to_cpu(initq->pi_addr_hi); + s->producer_pa = ((uint64_t) pa_hi << 32) | pa_lo; + s->reply_queue_head = ldl_le_pci_dma(pcid, s->producer_pa); + s->reply_queue_tail = ldl_le_pci_dma(pcid, s->consumer_pa); + flags = le32_to_cpu(initq->flags); + if (flags & MFI_QUEUE_FLAG_CONTEXT64) { + s->flags |= MEGASAS_MASK_USE_QUEUE64; + } + trace_megasas_init_queue((unsigned long)s->reply_queue_pa, + s->reply_queue_len, s->reply_queue_head, + s->reply_queue_tail, flags); + megasas_reset_frames(s); + s->fw_state = MFI_FWSTATE_OPERATIONAL; +out: + if (initq) { + pci_dma_unmap(pcid, initq, initq_size, 0, 0); + } + return ret; +} + +static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd) +{ + dma_addr_t iov_pa, iov_size; + + cmd->flags = le16_to_cpu(cmd->frame->header.flags); + if (!cmd->frame->header.sge_count) { + trace_megasas_dcmd_zero_sge(cmd->index); + cmd->iov_size = 0; + return 0; + } else if (cmd->frame->header.sge_count > 1) { + trace_megasas_dcmd_invalid_sge(cmd->index, + cmd->frame->header.sge_count); + cmd->iov_size = 0; + return -1; + } + iov_pa = megasas_sgl_get_addr(cmd, &cmd->frame->dcmd.sgl); + iov_size = megasas_sgl_get_len(cmd, &cmd->frame->dcmd.sgl); + pci_dma_sglist_init(&cmd->qsg, PCI_DEVICE(s), 1); + qemu_sglist_add(&cmd->qsg, iov_pa, iov_size); + cmd->iov_size = iov_size; + return cmd->iov_size; +} + +static void megasas_finish_dcmd(MegasasCmd *cmd, uint32_t iov_size) +{ + trace_megasas_finish_dcmd(cmd->index, iov_size); + + if (cmd->frame->header.sge_count) { + qemu_sglist_destroy(&cmd->qsg); + } + if (iov_size > cmd->iov_size) { + if (megasas_frame_is_ieee_sgl(cmd)) { + cmd->frame->dcmd.sgl.sg_skinny->len = cpu_to_le32(iov_size); + } else if (megasas_frame_is_sgl64(cmd)) { + cmd->frame->dcmd.sgl.sg64->len = cpu_to_le32(iov_size); + } else { + cmd->frame->dcmd.sgl.sg32->len = cpu_to_le32(iov_size); + } + } + cmd->iov_size = 0; +} + +static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd) +{ + PCIDevice *pci_dev = PCI_DEVICE(s); + PCIDeviceClass *pci_class = PCI_DEVICE_GET_CLASS(pci_dev); + MegasasBaseClass *base_class = MEGASAS_DEVICE_GET_CLASS(s); + struct mfi_ctrl_info info; + size_t dcmd_size = sizeof(info); + BusChild *kid; + int num_pd_disks = 0; + + memset(&info, 0x0, cmd->iov_size); + if (cmd->iov_size < dcmd_size) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + + info.pci.vendor = cpu_to_le16(pci_class->vendor_id); + info.pci.device = cpu_to_le16(pci_class->device_id); + info.pci.subvendor = cpu_to_le16(pci_class->subsystem_vendor_id); + info.pci.subdevice = cpu_to_le16(pci_class->subsystem_id); + + /* + * For some reason the firmware supports + * only up to 8 device ports. + * Despite supporting a far larger number + * of devices for the physical devices. + * So just display the first 8 devices + * in the device port list, independent + * of how many logical devices are actually + * present. + */ + info.host.type = MFI_INFO_HOST_PCIE; + info.device.type = MFI_INFO_DEV_SAS3G; + info.device.port_count = 8; + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + uint16_t pd_id; + + if (num_pd_disks < 8) { + pd_id = ((sdev->id & 0xFF) << 8) | (sdev->lun & 0xFF); + info.device.port_addr[num_pd_disks] = + cpu_to_le64(megasas_get_sata_addr(pd_id)); + } + num_pd_disks++; + } + + memcpy(info.product_name, base_class->product_name, 24); + snprintf(info.serial_number, 32, "%s", s->hba_serial); + snprintf(info.package_version, 0x60, "%s-QEMU", QEMU_VERSION); + memcpy(info.image_component[0].name, "APP", 3); + snprintf(info.image_component[0].version, 10, "%s-QEMU", + base_class->product_version); + memcpy(info.image_component[0].build_date, "Apr 1 2014", 11); + memcpy(info.image_component[0].build_time, "12:34:56", 8); + info.image_component_count = 1; + if (pci_dev->has_rom) { + uint8_t biosver[32]; + uint8_t *ptr; + + ptr = memory_region_get_ram_ptr(&pci_dev->rom); + memcpy(biosver, ptr + 0x41, 31); + memcpy(info.image_component[1].name, "BIOS", 4); + memcpy(info.image_component[1].version, biosver, + strlen((const char *)biosver)); + info.image_component_count++; + } + info.current_fw_time = cpu_to_le32(megasas_fw_time()); + info.max_arms = 32; + info.max_spans = 8; + info.max_arrays = MEGASAS_MAX_ARRAYS; + info.max_lds = MFI_MAX_LD; + info.max_cmds = cpu_to_le16(s->fw_cmds); + info.max_sg_elements = cpu_to_le16(s->fw_sge); + info.max_request_size = cpu_to_le32(MEGASAS_MAX_SECTORS); + if (!megasas_is_jbod(s)) + info.lds_present = cpu_to_le16(num_pd_disks); + info.pd_present = cpu_to_le16(num_pd_disks); + info.pd_disks_present = cpu_to_le16(num_pd_disks); + info.hw_present = cpu_to_le32(MFI_INFO_HW_NVRAM | + MFI_INFO_HW_MEM | + MFI_INFO_HW_FLASH); + info.memory_size = cpu_to_le16(512); + info.nvram_size = cpu_to_le16(32); + info.flash_size = cpu_to_le16(16); + info.raid_levels = cpu_to_le32(MFI_INFO_RAID_0); + info.adapter_ops = cpu_to_le32(MFI_INFO_AOPS_RBLD_RATE | + MFI_INFO_AOPS_SELF_DIAGNOSTIC | + MFI_INFO_AOPS_MIXED_ARRAY); + info.ld_ops = cpu_to_le32(MFI_INFO_LDOPS_DISK_CACHE_POLICY | + MFI_INFO_LDOPS_ACCESS_POLICY | + MFI_INFO_LDOPS_IO_POLICY | + MFI_INFO_LDOPS_WRITE_POLICY | + MFI_INFO_LDOPS_READ_POLICY); + info.max_strips_per_io = cpu_to_le16(s->fw_sge); + info.stripe_sz_ops.min = 3; + info.stripe_sz_ops.max = ctz32(MEGASAS_MAX_SECTORS + 1); + info.properties.pred_fail_poll_interval = cpu_to_le16(300); + info.properties.intr_throttle_cnt = cpu_to_le16(16); + info.properties.intr_throttle_timeout = cpu_to_le16(50); + info.properties.rebuild_rate = 30; + info.properties.patrol_read_rate = 30; + info.properties.bgi_rate = 30; + info.properties.cc_rate = 30; + info.properties.recon_rate = 30; + info.properties.cache_flush_interval = 4; + info.properties.spinup_drv_cnt = 2; + info.properties.spinup_delay = 6; + info.properties.ecc_bucket_size = 15; + info.properties.ecc_bucket_leak_rate = cpu_to_le16(1440); + info.properties.expose_encl_devices = 1; + info.properties.OnOffProperties = cpu_to_le32(MFI_CTRL_PROP_EnableJBOD); + info.pd_ops = cpu_to_le32(MFI_INFO_PDOPS_FORCE_ONLINE | + MFI_INFO_PDOPS_FORCE_OFFLINE); + info.pd_mix_support = cpu_to_le32(MFI_INFO_PDMIX_SAS | + MFI_INFO_PDMIX_SATA | + MFI_INFO_PDMIX_LD); + + cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_mfc_get_defaults(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_defaults info; + size_t dcmd_size = sizeof(struct mfi_defaults); + + memset(&info, 0x0, dcmd_size); + if (cmd->iov_size < dcmd_size) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + + info.sas_addr = cpu_to_le64(s->sas_addr); + info.stripe_size = 3; + info.flush_time = 4; + info.background_rate = 30; + info.allow_mix_in_enclosure = 1; + info.allow_mix_in_ld = 1; + info.direct_pd_mapping = 1; + /* Enable for BIOS support */ + info.bios_enumerate_lds = 1; + info.disable_ctrl_r = 1; + info.expose_enclosure_devices = 1; + info.disable_preboot_cli = 1; + info.cluster_disable = 1; + + cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_dcmd_get_bios_info(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_bios_data info; + size_t dcmd_size = sizeof(info); + + memset(&info, 0x0, dcmd_size); + if (cmd->iov_size < dcmd_size) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + info.continue_on_error = 1; + info.verbose = 1; + if (megasas_is_jbod(s)) { + info.expose_all_drives = 1; + } + + cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_dcmd_get_fw_time(MegasasState *s, MegasasCmd *cmd) +{ + uint64_t fw_time; + size_t dcmd_size = sizeof(fw_time); + + fw_time = cpu_to_le64(megasas_fw_time()); + + cmd->iov_size -= dma_buf_read((uint8_t *)&fw_time, dcmd_size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_dcmd_set_fw_time(MegasasState *s, MegasasCmd *cmd) +{ + uint64_t fw_time; + + /* This is a dummy; setting of firmware time is not allowed */ + memcpy(&fw_time, cmd->frame->dcmd.mbox, sizeof(fw_time)); + + trace_megasas_dcmd_set_fw_time(cmd->index, fw_time); + fw_time = cpu_to_le64(megasas_fw_time()); + return MFI_STAT_OK; +} + +static int megasas_event_info(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_evt_log_state info; + size_t dcmd_size = sizeof(info); + + memset(&info, 0, dcmd_size); + + info.newest_seq_num = cpu_to_le32(s->event_count); + info.shutdown_seq_num = cpu_to_le32(s->shutdown_event); + info.boot_seq_num = cpu_to_le32(s->boot_event); + + cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd) +{ + union mfi_evt event; + + if (cmd->iov_size < sizeof(struct mfi_evt_detail)) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + sizeof(struct mfi_evt_detail)); + return MFI_STAT_INVALID_PARAMETER; + } + s->event_count = cpu_to_le32(cmd->frame->dcmd.mbox[0]); + event.word = cpu_to_le32(cmd->frame->dcmd.mbox[4]); + s->event_locale = event.members.locale; + s->event_class = event.members.class; + s->event_cmd = cmd; + /* Decrease busy count; event frame doesn't count here */ + s->busy--; + cmd->iov_size = sizeof(struct mfi_evt_detail); + return MFI_STAT_INVALID_STATUS; +} + +static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_pd_list info; + size_t dcmd_size = sizeof(info); + BusChild *kid; + uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks; + + memset(&info, 0, dcmd_size); + offset = 8; + dcmd_limit = offset + sizeof(struct mfi_pd_address); + if (cmd->iov_size < dcmd_limit) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_limit); + return MFI_STAT_INVALID_PARAMETER; + } + + max_pd_disks = (cmd->iov_size - offset) / sizeof(struct mfi_pd_address); + if (max_pd_disks > MFI_MAX_SYS_PDS) { + max_pd_disks = MFI_MAX_SYS_PDS; + } + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + uint16_t pd_id; + + if (num_pd_disks >= max_pd_disks) + break; + + pd_id = ((sdev->id & 0xFF) << 8) | (sdev->lun & 0xFF); + info.addr[num_pd_disks].device_id = cpu_to_le16(pd_id); + info.addr[num_pd_disks].encl_device_id = 0xFFFF; + info.addr[num_pd_disks].encl_index = 0; + info.addr[num_pd_disks].slot_number = sdev->id & 0xFF; + info.addr[num_pd_disks].scsi_dev_type = sdev->type; + info.addr[num_pd_disks].connect_port_bitmap = 0x1; + info.addr[num_pd_disks].sas_addr[0] = + cpu_to_le64(megasas_get_sata_addr(pd_id)); + num_pd_disks++; + offset += sizeof(struct mfi_pd_address); + } + trace_megasas_dcmd_pd_get_list(cmd->index, num_pd_disks, + max_pd_disks, offset); + + info.size = cpu_to_le32(offset); + info.count = cpu_to_le32(num_pd_disks); + + cmd->iov_size -= dma_buf_read((uint8_t *)&info, offset, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_dcmd_pd_list_query(MegasasState *s, MegasasCmd *cmd) +{ + uint16_t flags; + + /* mbox0 contains flags */ + flags = le16_to_cpu(cmd->frame->dcmd.mbox[0]); + trace_megasas_dcmd_pd_list_query(cmd->index, flags); + if (flags == MR_PD_QUERY_TYPE_ALL || + megasas_is_jbod(s)) { + return megasas_dcmd_pd_get_list(s, cmd); + } + + return MFI_STAT_OK; +} + +static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, + MegasasCmd *cmd) +{ + struct mfi_pd_info *info = cmd->iov_buf; + size_t dcmd_size = sizeof(struct mfi_pd_info); + uint64_t pd_size; + uint16_t pd_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF); + uint8_t cmdbuf[6]; + SCSIRequest *req; + size_t len, resid; + + if (!cmd->iov_buf) { + cmd->iov_buf = g_malloc0(dcmd_size); + info = cmd->iov_buf; + info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */ + info->vpd_page83[0] = 0x7f; + megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data)); + req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); + if (!req) { + trace_megasas_dcmd_req_alloc_failed(cmd->index, + "PD get info std inquiry"); + g_free(cmd->iov_buf); + cmd->iov_buf = NULL; + return MFI_STAT_FLASH_ALLOC_FAIL; + } + trace_megasas_dcmd_internal_submit(cmd->index, + "PD get info std inquiry", lun); + len = scsi_req_enqueue(req); + if (len > 0) { + cmd->iov_size = len; + scsi_req_continue(req); + } + return MFI_STAT_INVALID_STATUS; + } else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) { + megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83)); + req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); + if (!req) { + trace_megasas_dcmd_req_alloc_failed(cmd->index, + "PD get info vpd inquiry"); + return MFI_STAT_FLASH_ALLOC_FAIL; + } + trace_megasas_dcmd_internal_submit(cmd->index, + "PD get info vpd inquiry", lun); + len = scsi_req_enqueue(req); + if (len > 0) { + cmd->iov_size = len; + scsi_req_continue(req); + } + return MFI_STAT_INVALID_STATUS; + } + /* Finished, set FW state */ + if ((info->inquiry_data[0] >> 5) == 0) { + if (megasas_is_jbod(cmd->state)) { + info->fw_state = cpu_to_le16(MFI_PD_STATE_SYSTEM); + } else { + info->fw_state = cpu_to_le16(MFI_PD_STATE_ONLINE); + } + } else { + info->fw_state = cpu_to_le16(MFI_PD_STATE_OFFLINE); + } + + info->ref.v.device_id = cpu_to_le16(pd_id); + info->state.ddf.pd_type = cpu_to_le16(MFI_PD_DDF_TYPE_IN_VD| + MFI_PD_DDF_TYPE_INTF_SAS); + blk_get_geometry(sdev->conf.blk, &pd_size); + info->raw_size = cpu_to_le64(pd_size); + info->non_coerced_size = cpu_to_le64(pd_size); + info->coerced_size = cpu_to_le64(pd_size); + info->encl_device_id = 0xFFFF; + info->slot_number = (sdev->id & 0xFF); + info->path_info.count = 1; + info->path_info.sas_addr[0] = + cpu_to_le64(megasas_get_sata_addr(pd_id)); + info->connected_port_bitmap = 0x1; + info->device_speed = 1; + info->link_speed = 1; + resid = dma_buf_read(cmd->iov_buf, dcmd_size, &cmd->qsg); + g_free(cmd->iov_buf); + cmd->iov_size = dcmd_size - resid; + cmd->iov_buf = NULL; + return MFI_STAT_OK; +} + +static int megasas_dcmd_pd_get_info(MegasasState *s, MegasasCmd *cmd) +{ + size_t dcmd_size = sizeof(struct mfi_pd_info); + uint16_t pd_id; + uint8_t target_id, lun_id; + SCSIDevice *sdev = NULL; + int retval = MFI_STAT_DEVICE_NOT_FOUND; + + if (cmd->iov_size < dcmd_size) { + return MFI_STAT_INVALID_PARAMETER; + } + + /* mbox0 has the ID */ + pd_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]); + target_id = (pd_id >> 8) & 0xFF; + lun_id = pd_id & 0xFF; + sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); + trace_megasas_dcmd_pd_get_info(cmd->index, pd_id); + + if (sdev) { + /* Submit inquiry */ + retval = megasas_pd_get_info_submit(sdev, pd_id, cmd); + } + + return retval; +} + +static int megasas_dcmd_ld_get_list(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_ld_list info; + size_t dcmd_size = sizeof(info), resid; + uint32_t num_ld_disks = 0, max_ld_disks; + uint64_t ld_size; + BusChild *kid; + + memset(&info, 0, dcmd_size); + if (cmd->iov_size > dcmd_size) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + + max_ld_disks = (cmd->iov_size - 8) / 16; + if (megasas_is_jbod(s)) { + max_ld_disks = 0; + } + if (max_ld_disks > MFI_MAX_LD) { + max_ld_disks = MFI_MAX_LD; + } + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + + if (num_ld_disks >= max_ld_disks) { + break; + } + /* Logical device size is in blocks */ + blk_get_geometry(sdev->conf.blk, &ld_size); + info.ld_list[num_ld_disks].ld.v.target_id = sdev->id; + info.ld_list[num_ld_disks].state = MFI_LD_STATE_OPTIMAL; + info.ld_list[num_ld_disks].size = cpu_to_le64(ld_size); + num_ld_disks++; + } + info.ld_count = cpu_to_le32(num_ld_disks); + trace_megasas_dcmd_ld_get_list(cmd->index, num_ld_disks, max_ld_disks); + + resid = dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + cmd->iov_size = dcmd_size - resid; + return MFI_STAT_OK; +} + +static int megasas_dcmd_ld_list_query(MegasasState *s, MegasasCmd *cmd) +{ + uint16_t flags; + struct mfi_ld_targetid_list info; + size_t dcmd_size = sizeof(info), resid; + uint32_t num_ld_disks = 0, max_ld_disks = s->fw_luns; + BusChild *kid; + + /* mbox0 contains flags */ + flags = le16_to_cpu(cmd->frame->dcmd.mbox[0]); + trace_megasas_dcmd_ld_list_query(cmd->index, flags); + if (flags != MR_LD_QUERY_TYPE_ALL && + flags != MR_LD_QUERY_TYPE_EXPOSED_TO_HOST) { + max_ld_disks = 0; + } + + memset(&info, 0, dcmd_size); + if (cmd->iov_size < 12) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + dcmd_size = sizeof(uint32_t) * 2 + 3; + max_ld_disks = cmd->iov_size - dcmd_size; + if (megasas_is_jbod(s)) { + max_ld_disks = 0; + } + if (max_ld_disks > MFI_MAX_LD) { + max_ld_disks = MFI_MAX_LD; + } + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + + if (num_ld_disks >= max_ld_disks) { + break; + } + info.targetid[num_ld_disks] = sdev->lun; + num_ld_disks++; + dcmd_size++; + } + info.ld_count = cpu_to_le32(num_ld_disks); + info.size = dcmd_size; + trace_megasas_dcmd_ld_get_list(cmd->index, num_ld_disks, max_ld_disks); + + resid = dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + cmd->iov_size = dcmd_size - resid; + return MFI_STAT_OK; +} + +static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, + MegasasCmd *cmd) +{ + struct mfi_ld_info *info = cmd->iov_buf; + size_t dcmd_size = sizeof(struct mfi_ld_info); + uint8_t cdb[6]; + SCSIRequest *req; + ssize_t len, resid; + uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF); + uint64_t ld_size; + + if (!cmd->iov_buf) { + cmd->iov_buf = g_malloc0(dcmd_size); + info = cmd->iov_buf; + megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83)); + req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd); + if (!req) { + trace_megasas_dcmd_req_alloc_failed(cmd->index, + "LD get info vpd inquiry"); + g_free(cmd->iov_buf); + cmd->iov_buf = NULL; + return MFI_STAT_FLASH_ALLOC_FAIL; + } + trace_megasas_dcmd_internal_submit(cmd->index, + "LD get info vpd inquiry", lun); + len = scsi_req_enqueue(req); + if (len > 0) { + cmd->iov_size = len; + scsi_req_continue(req); + } + return MFI_STAT_INVALID_STATUS; + } + + info->ld_config.params.state = MFI_LD_STATE_OPTIMAL; + info->ld_config.properties.ld.v.target_id = lun; + info->ld_config.params.stripe_size = 3; + info->ld_config.params.num_drives = 1; + info->ld_config.params.is_consistent = 1; + /* Logical device size is in blocks */ + blk_get_geometry(sdev->conf.blk, &ld_size); + info->size = cpu_to_le64(ld_size); + memset(info->ld_config.span, 0, sizeof(info->ld_config.span)); + info->ld_config.span[0].start_block = 0; + info->ld_config.span[0].num_blocks = info->size; + info->ld_config.span[0].array_ref = cpu_to_le16(sdev_id); + + resid = dma_buf_read(cmd->iov_buf, dcmd_size, &cmd->qsg); + g_free(cmd->iov_buf); + cmd->iov_size = dcmd_size - resid; + cmd->iov_buf = NULL; + return MFI_STAT_OK; +} + +static int megasas_dcmd_ld_get_info(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_ld_info info; + size_t dcmd_size = sizeof(info); + uint16_t ld_id; + uint32_t max_ld_disks = s->fw_luns; + SCSIDevice *sdev = NULL; + int retval = MFI_STAT_DEVICE_NOT_FOUND; + + if (cmd->iov_size < dcmd_size) { + return MFI_STAT_INVALID_PARAMETER; + } + + /* mbox0 has the ID */ + ld_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]); + trace_megasas_dcmd_ld_get_info(cmd->index, ld_id); + + if (megasas_is_jbod(s)) { + return MFI_STAT_DEVICE_NOT_FOUND; + } + + if (ld_id < max_ld_disks) { + sdev = scsi_device_find(&s->bus, 0, ld_id, 0); + } + + if (sdev) { + retval = megasas_ld_get_info_submit(sdev, ld_id, cmd); + } + + return retval; +} + +static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd) +{ + uint8_t data[4096]; + struct mfi_config_data *info; + int num_pd_disks = 0, array_offset, ld_offset; + BusChild *kid; + + if (cmd->iov_size > 4096) { + return MFI_STAT_INVALID_PARAMETER; + } + + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + num_pd_disks++; + } + info = (struct mfi_config_data *)&data; + /* + * Array mapping: + * - One array per SCSI device + * - One logical drive per SCSI device + * spanning the entire device + */ + info->array_count = num_pd_disks; + info->array_size = sizeof(struct mfi_array) * num_pd_disks; + info->log_drv_count = num_pd_disks; + info->log_drv_size = sizeof(struct mfi_ld_config) * num_pd_disks; + info->spares_count = 0; + info->spares_size = sizeof(struct mfi_spare); + info->size = sizeof(struct mfi_config_data) + info->array_size + + info->log_drv_size; + if (info->size > 4096) { + return MFI_STAT_INVALID_PARAMETER; + } + + array_offset = sizeof(struct mfi_config_data); + ld_offset = array_offset + sizeof(struct mfi_array) * num_pd_disks; + + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (sdev->lun & 0xFF); + struct mfi_array *array; + struct mfi_ld_config *ld; + uint64_t pd_size; + int i; + + array = (struct mfi_array *)(data + array_offset); + blk_get_geometry(sdev->conf.blk, &pd_size); + array->size = cpu_to_le64(pd_size); + array->num_drives = 1; + array->array_ref = cpu_to_le16(sdev_id); + array->pd[0].ref.v.device_id = cpu_to_le16(sdev_id); + array->pd[0].ref.v.seq_num = 0; + array->pd[0].fw_state = MFI_PD_STATE_ONLINE; + array->pd[0].encl.pd = 0xFF; + array->pd[0].encl.slot = (sdev->id & 0xFF); + for (i = 1; i < MFI_MAX_ROW_SIZE; i++) { + array->pd[i].ref.v.device_id = 0xFFFF; + array->pd[i].ref.v.seq_num = 0; + array->pd[i].fw_state = MFI_PD_STATE_UNCONFIGURED_GOOD; + array->pd[i].encl.pd = 0xFF; + array->pd[i].encl.slot = 0xFF; + } + array_offset += sizeof(struct mfi_array); + ld = (struct mfi_ld_config *)(data + ld_offset); + memset(ld, 0, sizeof(struct mfi_ld_config)); + ld->properties.ld.v.target_id = sdev->id; + ld->properties.default_cache_policy = MR_LD_CACHE_READ_AHEAD | + MR_LD_CACHE_READ_ADAPTIVE; + ld->properties.current_cache_policy = MR_LD_CACHE_READ_AHEAD | + MR_LD_CACHE_READ_ADAPTIVE; + ld->params.state = MFI_LD_STATE_OPTIMAL; + ld->params.stripe_size = 3; + ld->params.num_drives = 1; + ld->params.span_depth = 1; + ld->params.is_consistent = 1; + ld->span[0].start_block = 0; + ld->span[0].num_blocks = cpu_to_le64(pd_size); + ld->span[0].array_ref = cpu_to_le16(sdev_id); + ld_offset += sizeof(struct mfi_ld_config); + } + + cmd->iov_size -= dma_buf_read((uint8_t *)data, info->size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_ctrl_props info; + size_t dcmd_size = sizeof(info); + + memset(&info, 0x0, dcmd_size); + if (cmd->iov_size < dcmd_size) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + info.pred_fail_poll_interval = cpu_to_le16(300); + info.intr_throttle_cnt = cpu_to_le16(16); + info.intr_throttle_timeout = cpu_to_le16(50); + info.rebuild_rate = 30; + info.patrol_read_rate = 30; + info.bgi_rate = 30; + info.cc_rate = 30; + info.recon_rate = 30; + info.cache_flush_interval = 4; + info.spinup_drv_cnt = 2; + info.spinup_delay = 6; + info.ecc_bucket_size = 15; + info.ecc_bucket_leak_rate = cpu_to_le16(1440); + info.expose_encl_devices = 1; + + cmd->iov_size -= dma_buf_read((uint8_t *)&info, dcmd_size, &cmd->qsg); + return MFI_STAT_OK; +} + +static int megasas_cache_flush(MegasasState *s, MegasasCmd *cmd) +{ + blk_drain_all(); + return MFI_STAT_OK; +} + +static int megasas_ctrl_shutdown(MegasasState *s, MegasasCmd *cmd) +{ + s->fw_state = MFI_FWSTATE_READY; + return MFI_STAT_OK; +} + +/* Some implementations use CLUSTER RESET LD to simulate a device reset */ +static int megasas_cluster_reset_ld(MegasasState *s, MegasasCmd *cmd) +{ + uint16_t target_id; + int i; + + /* mbox0 contains the device index */ + target_id = le16_to_cpu(cmd->frame->dcmd.mbox[0]); + trace_megasas_dcmd_reset_ld(cmd->index, target_id); + for (i = 0; i < s->fw_cmds; i++) { + MegasasCmd *tmp_cmd = &s->frames[i]; + if (tmp_cmd->req && tmp_cmd->req->dev->id == target_id) { + SCSIDevice *d = tmp_cmd->req->dev; + qdev_reset_all(&d->qdev); + } + } + return MFI_STAT_OK; +} + +static int megasas_dcmd_set_properties(MegasasState *s, MegasasCmd *cmd) +{ + struct mfi_ctrl_props info; + size_t dcmd_size = sizeof(info); + + if (cmd->iov_size < dcmd_size) { + trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, + dcmd_size); + return MFI_STAT_INVALID_PARAMETER; + } + dma_buf_write((uint8_t *)&info, cmd->iov_size, &cmd->qsg); + trace_megasas_dcmd_unsupported(cmd->index, cmd->iov_size); + return MFI_STAT_OK; +} + +static int megasas_dcmd_dummy(MegasasState *s, MegasasCmd *cmd) +{ + trace_megasas_dcmd_dummy(cmd->index, cmd->iov_size); + return MFI_STAT_OK; +} + +static const struct dcmd_cmd_tbl_t { + int opcode; + const char *desc; + int (*func)(MegasasState *s, MegasasCmd *cmd); +} dcmd_cmd_tbl[] = { + { MFI_DCMD_CTRL_MFI_HOST_MEM_ALLOC, "CTRL_HOST_MEM_ALLOC", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_GET_INFO, "CTRL_GET_INFO", + megasas_ctrl_get_info }, + { MFI_DCMD_CTRL_GET_PROPERTIES, "CTRL_GET_PROPERTIES", + megasas_dcmd_get_properties }, + { MFI_DCMD_CTRL_SET_PROPERTIES, "CTRL_SET_PROPERTIES", + megasas_dcmd_set_properties }, + { MFI_DCMD_CTRL_ALARM_GET, "CTRL_ALARM_GET", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_ALARM_ENABLE, "CTRL_ALARM_ENABLE", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_ALARM_DISABLE, "CTRL_ALARM_DISABLE", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_ALARM_SILENCE, "CTRL_ALARM_SILENCE", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_ALARM_TEST, "CTRL_ALARM_TEST", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_EVENT_GETINFO, "CTRL_EVENT_GETINFO", + megasas_event_info }, + { MFI_DCMD_CTRL_EVENT_GET, "CTRL_EVENT_GET", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_EVENT_WAIT, "CTRL_EVENT_WAIT", + megasas_event_wait }, + { MFI_DCMD_CTRL_SHUTDOWN, "CTRL_SHUTDOWN", + megasas_ctrl_shutdown }, + { MFI_DCMD_HIBERNATE_STANDBY, "CTRL_STANDBY", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_GET_TIME, "CTRL_GET_TIME", + megasas_dcmd_get_fw_time }, + { MFI_DCMD_CTRL_SET_TIME, "CTRL_SET_TIME", + megasas_dcmd_set_fw_time }, + { MFI_DCMD_CTRL_BIOS_DATA_GET, "CTRL_BIOS_DATA_GET", + megasas_dcmd_get_bios_info }, + { MFI_DCMD_CTRL_FACTORY_DEFAULTS, "CTRL_FACTORY_DEFAULTS", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_MFC_DEFAULTS_GET, "CTRL_MFC_DEFAULTS_GET", + megasas_mfc_get_defaults }, + { MFI_DCMD_CTRL_MFC_DEFAULTS_SET, "CTRL_MFC_DEFAULTS_SET", + megasas_dcmd_dummy }, + { MFI_DCMD_CTRL_CACHE_FLUSH, "CTRL_CACHE_FLUSH", + megasas_cache_flush }, + { MFI_DCMD_PD_GET_LIST, "PD_GET_LIST", + megasas_dcmd_pd_get_list }, + { MFI_DCMD_PD_LIST_QUERY, "PD_LIST_QUERY", + megasas_dcmd_pd_list_query }, + { MFI_DCMD_PD_GET_INFO, "PD_GET_INFO", + megasas_dcmd_pd_get_info }, + { MFI_DCMD_PD_STATE_SET, "PD_STATE_SET", + megasas_dcmd_dummy }, + { MFI_DCMD_PD_REBUILD, "PD_REBUILD", + megasas_dcmd_dummy }, + { MFI_DCMD_PD_BLINK, "PD_BLINK", + megasas_dcmd_dummy }, + { MFI_DCMD_PD_UNBLINK, "PD_UNBLINK", + megasas_dcmd_dummy }, + { MFI_DCMD_LD_GET_LIST, "LD_GET_LIST", + megasas_dcmd_ld_get_list}, + { MFI_DCMD_LD_LIST_QUERY, "LD_LIST_QUERY", + megasas_dcmd_ld_list_query }, + { MFI_DCMD_LD_GET_INFO, "LD_GET_INFO", + megasas_dcmd_ld_get_info }, + { MFI_DCMD_LD_GET_PROP, "LD_GET_PROP", + megasas_dcmd_dummy }, + { MFI_DCMD_LD_SET_PROP, "LD_SET_PROP", + megasas_dcmd_dummy }, + { MFI_DCMD_LD_DELETE, "LD_DELETE", + megasas_dcmd_dummy }, + { MFI_DCMD_CFG_READ, "CFG_READ", + megasas_dcmd_cfg_read }, + { MFI_DCMD_CFG_ADD, "CFG_ADD", + megasas_dcmd_dummy }, + { MFI_DCMD_CFG_CLEAR, "CFG_CLEAR", + megasas_dcmd_dummy }, + { MFI_DCMD_CFG_FOREIGN_READ, "CFG_FOREIGN_READ", + megasas_dcmd_dummy }, + { MFI_DCMD_CFG_FOREIGN_IMPORT, "CFG_FOREIGN_IMPORT", + megasas_dcmd_dummy }, + { MFI_DCMD_BBU_STATUS, "BBU_STATUS", + megasas_dcmd_dummy }, + { MFI_DCMD_BBU_CAPACITY_INFO, "BBU_CAPACITY_INFO", + megasas_dcmd_dummy }, + { MFI_DCMD_BBU_DESIGN_INFO, "BBU_DESIGN_INFO", + megasas_dcmd_dummy }, + { MFI_DCMD_BBU_PROP_GET, "BBU_PROP_GET", + megasas_dcmd_dummy }, + { MFI_DCMD_CLUSTER, "CLUSTER", + megasas_dcmd_dummy }, + { MFI_DCMD_CLUSTER_RESET_ALL, "CLUSTER_RESET_ALL", + megasas_dcmd_dummy }, + { MFI_DCMD_CLUSTER_RESET_LD, "CLUSTER_RESET_LD", + megasas_cluster_reset_ld }, + { -1, NULL, NULL } +}; + +static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd) +{ + int opcode, len; + int retval = 0; + const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl; + + opcode = le32_to_cpu(cmd->frame->dcmd.opcode); + trace_megasas_handle_dcmd(cmd->index, opcode); + len = megasas_map_dcmd(s, cmd); + if (len < 0) { + return MFI_STAT_MEMORY_NOT_AVAILABLE; + } + while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) { + cmdptr++; + } + if (cmdptr->opcode == -1) { + trace_megasas_dcmd_unhandled(cmd->index, opcode, len); + retval = megasas_dcmd_dummy(s, cmd); + } else { + trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len); + retval = cmdptr->func(s, cmd); + } + if (retval != MFI_STAT_INVALID_STATUS) { + megasas_finish_dcmd(cmd, len); + } + return retval; +} + +static int megasas_finish_internal_dcmd(MegasasCmd *cmd, + SCSIRequest *req) +{ + int opcode; + int retval = MFI_STAT_OK; + int lun = req->lun; + + opcode = le32_to_cpu(cmd->frame->dcmd.opcode); + scsi_req_unref(req); + trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun); + switch (opcode) { + case MFI_DCMD_PD_GET_INFO: + retval = megasas_pd_get_info_submit(req->dev, lun, cmd); + break; + case MFI_DCMD_LD_GET_INFO: + retval = megasas_ld_get_info_submit(req->dev, lun, cmd); + break; + default: + trace_megasas_dcmd_internal_invalid(cmd->index, opcode); + retval = MFI_STAT_INVALID_DCMD; + break; + } + if (retval != MFI_STAT_INVALID_STATUS) { + megasas_finish_dcmd(cmd, cmd->iov_size); + } + return retval; +} + +static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write) +{ + int len; + + len = scsi_req_enqueue(cmd->req); + if (len < 0) { + len = -len; + } + if (len > 0) { + if (len > cmd->iov_size) { + if (is_write) { + trace_megasas_iov_write_overflow(cmd->index, len, + cmd->iov_size); + } else { + trace_megasas_iov_read_overflow(cmd->index, len, + cmd->iov_size); + } + } + if (len < cmd->iov_size) { + if (is_write) { + trace_megasas_iov_write_underflow(cmd->index, len, + cmd->iov_size); + } else { + trace_megasas_iov_read_underflow(cmd->index, len, + cmd->iov_size); + } + cmd->iov_size = len; + } + scsi_req_continue(cmd->req); + } + return len; +} + +static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, + bool is_logical) +{ + uint8_t *cdb; + bool is_write; + struct SCSIDevice *sdev = NULL; + + cdb = cmd->frame->pass.cdb; + + if (is_logical) { + if (cmd->frame->header.target_id >= MFI_MAX_LD || + cmd->frame->header.lun_id != 0) { + trace_megasas_scsi_target_not_present( + mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, + cmd->frame->header.target_id, cmd->frame->header.lun_id); + return MFI_STAT_DEVICE_NOT_FOUND; + } + } + sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id, + cmd->frame->header.lun_id); + + cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len); + trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd], + is_logical, cmd->frame->header.target_id, + cmd->frame->header.lun_id, sdev, cmd->iov_size); + + if (!sdev || (megasas_is_jbod(s) && is_logical)) { + trace_megasas_scsi_target_not_present( + mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, + cmd->frame->header.target_id, cmd->frame->header.lun_id); + return MFI_STAT_DEVICE_NOT_FOUND; + } + + if (cmd->frame->header.cdb_len > 16) { + trace_megasas_scsi_invalid_cdb_len( + mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, + cmd->frame->header.target_id, cmd->frame->header.lun_id, + cmd->frame->header.cdb_len); + megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); + cmd->frame->header.scsi_status = CHECK_CONDITION; + s->event_count++; + return MFI_STAT_SCSI_DONE_WITH_ERROR; + } + + if (megasas_map_sgl(s, cmd, &cmd->frame->pass.sgl)) { + megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE)); + cmd->frame->header.scsi_status = CHECK_CONDITION; + s->event_count++; + return MFI_STAT_SCSI_DONE_WITH_ERROR; + } + + cmd->req = scsi_req_new(sdev, cmd->index, + cmd->frame->header.lun_id, cdb, cmd); + if (!cmd->req) { + trace_megasas_scsi_req_alloc_failed( + mfi_frame_desc[cmd->frame->header.frame_cmd], + cmd->frame->header.target_id, cmd->frame->header.lun_id); + megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); + cmd->frame->header.scsi_status = BUSY; + s->event_count++; + return MFI_STAT_SCSI_DONE_WITH_ERROR; + } + + is_write = (cmd->req->cmd.mode == SCSI_XFER_TO_DEV); + if (cmd->iov_size) { + if (is_write) { + trace_megasas_scsi_write_start(cmd->index, cmd->iov_size); + } else { + trace_megasas_scsi_read_start(cmd->index, cmd->iov_size); + } + } else { + trace_megasas_scsi_nodata(cmd->index); + } + megasas_enqueue_req(cmd, is_write); + return MFI_STAT_INVALID_STATUS; +} + +static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) +{ + uint32_t lba_count, lba_start_hi, lba_start_lo; + uint64_t lba_start; + bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE); + uint8_t cdb[16]; + int len; + struct SCSIDevice *sdev = NULL; + + lba_count = le32_to_cpu(cmd->frame->io.header.data_len); + lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo); + lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi); + lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo; + + if (cmd->frame->header.target_id < MFI_MAX_LD && + cmd->frame->header.lun_id == 0) { + sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id, + cmd->frame->header.lun_id); + } + + trace_megasas_handle_io(cmd->index, + mfi_frame_desc[cmd->frame->header.frame_cmd], + cmd->frame->header.target_id, + cmd->frame->header.lun_id, + (unsigned long)lba_start, (unsigned long)lba_count); + if (!sdev) { + trace_megasas_io_target_not_present(cmd->index, + mfi_frame_desc[cmd->frame->header.frame_cmd], + cmd->frame->header.target_id, cmd->frame->header.lun_id); + return MFI_STAT_DEVICE_NOT_FOUND; + } + + if (cmd->frame->header.cdb_len > 16) { + trace_megasas_scsi_invalid_cdb_len( + mfi_frame_desc[cmd->frame->header.frame_cmd], 1, + cmd->frame->header.target_id, cmd->frame->header.lun_id, + cmd->frame->header.cdb_len); + megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); + cmd->frame->header.scsi_status = CHECK_CONDITION; + s->event_count++; + return MFI_STAT_SCSI_DONE_WITH_ERROR; + } + + cmd->iov_size = lba_count * sdev->blocksize; + if (megasas_map_sgl(s, cmd, &cmd->frame->io.sgl)) { + megasas_write_sense(cmd, SENSE_CODE(TARGET_FAILURE)); + cmd->frame->header.scsi_status = CHECK_CONDITION; + s->event_count++; + return MFI_STAT_SCSI_DONE_WITH_ERROR; + } + + megasas_encode_lba(cdb, lba_start, lba_count, is_write); + cmd->req = scsi_req_new(sdev, cmd->index, + cmd->frame->header.lun_id, cdb, cmd); + if (!cmd->req) { + trace_megasas_scsi_req_alloc_failed( + mfi_frame_desc[cmd->frame->header.frame_cmd], + cmd->frame->header.target_id, cmd->frame->header.lun_id); + megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); + cmd->frame->header.scsi_status = BUSY; + s->event_count++; + return MFI_STAT_SCSI_DONE_WITH_ERROR; + } + len = megasas_enqueue_req(cmd, is_write); + if (len > 0) { + if (is_write) { + trace_megasas_io_write_start(cmd->index, lba_start, lba_count, len); + } else { + trace_megasas_io_read_start(cmd->index, lba_start, lba_count, len); + } + } + return MFI_STAT_INVALID_STATUS; +} + +static int megasas_finish_internal_command(MegasasCmd *cmd, + SCSIRequest *req, size_t resid) +{ + int retval = MFI_STAT_INVALID_CMD; + + if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) { + cmd->iov_size -= resid; + retval = megasas_finish_internal_dcmd(cmd, req); + } + return retval; +} + +static QEMUSGList *megasas_get_sg_list(SCSIRequest *req) +{ + MegasasCmd *cmd = req->hba_private; + + if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) { + return NULL; + } else { + return &cmd->qsg; + } +} + +static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) +{ + MegasasCmd *cmd = req->hba_private; + uint8_t *buf; + uint32_t opcode; + + trace_megasas_io_complete(cmd->index, len); + + if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) { + scsi_req_continue(req); + return; + } + + buf = scsi_req_get_buf(req); + opcode = le32_to_cpu(cmd->frame->dcmd.opcode); + if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) { + struct mfi_pd_info *info = cmd->iov_buf; + + if (info->inquiry_data[0] == 0x7f) { + memset(info->inquiry_data, 0, sizeof(info->inquiry_data)); + memcpy(info->inquiry_data, buf, len); + } else if (info->vpd_page83[0] == 0x7f) { + memset(info->vpd_page83, 0, sizeof(info->vpd_page83)); + memcpy(info->vpd_page83, buf, len); + } + scsi_req_continue(req); + } else if (opcode == MFI_DCMD_LD_GET_INFO) { + struct mfi_ld_info *info = cmd->iov_buf; + + if (cmd->iov_buf) { + memcpy(info->vpd_page83, buf, sizeof(info->vpd_page83)); + scsi_req_continue(req); + } + } +} + +static void megasas_command_complete(SCSIRequest *req, uint32_t status, + size_t resid) +{ + MegasasCmd *cmd = req->hba_private; + uint8_t cmd_status = MFI_STAT_OK; + + trace_megasas_command_complete(cmd->index, status, resid); + + if (cmd->req != req) { + /* + * Internal command complete + */ + cmd_status = megasas_finish_internal_command(cmd, req, resid); + if (cmd_status == MFI_STAT_INVALID_STATUS) { + return; + } + } else { + req->status = status; + trace_megasas_scsi_complete(cmd->index, req->status, + cmd->iov_size, req->cmd.xfer); + if (req->status != GOOD) { + cmd_status = MFI_STAT_SCSI_DONE_WITH_ERROR; + } + if (req->status == CHECK_CONDITION) { + megasas_copy_sense(cmd); + } + + megasas_unmap_sgl(cmd); + cmd->frame->header.scsi_status = req->status; + scsi_req_unref(cmd->req); + cmd->req = NULL; + } + cmd->frame->header.cmd_status = cmd_status; + megasas_unmap_frame(cmd->state, cmd); + megasas_complete_frame(cmd->state, cmd->context); +} + +static void megasas_command_cancel(SCSIRequest *req) +{ + MegasasCmd *cmd = req->hba_private; + + if (cmd) { + megasas_abort_command(cmd); + } else { + scsi_req_unref(req); + } +} + +static int megasas_handle_abort(MegasasState *s, MegasasCmd *cmd) +{ + uint64_t abort_ctx = le64_to_cpu(cmd->frame->abort.abort_context); + hwaddr abort_addr, addr_hi, addr_lo; + MegasasCmd *abort_cmd; + + addr_hi = le32_to_cpu(cmd->frame->abort.abort_mfi_addr_hi); + addr_lo = le32_to_cpu(cmd->frame->abort.abort_mfi_addr_lo); + abort_addr = ((uint64_t)addr_hi << 32) | addr_lo; + + abort_cmd = megasas_lookup_frame(s, abort_addr); + if (!abort_cmd) { + trace_megasas_abort_no_cmd(cmd->index, abort_ctx); + s->event_count++; + return MFI_STAT_OK; + } + if (!megasas_use_queue64(s)) { + abort_ctx &= (uint64_t)0xFFFFFFFF; + } + if (abort_cmd->context != abort_ctx) { + trace_megasas_abort_invalid_context(cmd->index, abort_cmd->index, + abort_cmd->context); + s->event_count++; + return MFI_STAT_ABORT_NOT_POSSIBLE; + } + trace_megasas_abort_frame(cmd->index, abort_cmd->index); + megasas_abort_command(abort_cmd); + if (!s->event_cmd || abort_cmd != s->event_cmd) { + s->event_cmd = NULL; + } + s->event_count++; + return MFI_STAT_OK; +} + +static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, + uint32_t frame_count) +{ + uint8_t frame_status = MFI_STAT_INVALID_CMD; + uint64_t frame_context; + MegasasCmd *cmd; + + /* + * Always read 64bit context, top bits will be + * masked out if required in megasas_enqueue_frame() + */ + frame_context = megasas_frame_get_context(s, frame_addr); + + cmd = megasas_enqueue_frame(s, frame_addr, frame_context, frame_count); + if (!cmd) { + /* reply queue full */ + trace_megasas_frame_busy(frame_addr); + megasas_frame_set_scsi_status(s, frame_addr, BUSY); + megasas_frame_set_cmd_status(s, frame_addr, MFI_STAT_SCSI_DONE_WITH_ERROR); + megasas_complete_frame(s, frame_context); + s->event_count++; + return; + } + switch (cmd->frame->header.frame_cmd) { + case MFI_CMD_INIT: + frame_status = megasas_init_firmware(s, cmd); + break; + case MFI_CMD_DCMD: + frame_status = megasas_handle_dcmd(s, cmd); + break; + case MFI_CMD_ABORT: + frame_status = megasas_handle_abort(s, cmd); + break; + case MFI_CMD_PD_SCSI_IO: + frame_status = megasas_handle_scsi(s, cmd, 0); + break; + case MFI_CMD_LD_SCSI_IO: + frame_status = megasas_handle_scsi(s, cmd, 1); + break; + case MFI_CMD_LD_READ: + case MFI_CMD_LD_WRITE: + frame_status = megasas_handle_io(s, cmd); + break; + default: + trace_megasas_unhandled_frame_cmd(cmd->index, + cmd->frame->header.frame_cmd); + s->event_count++; + break; + } + if (frame_status != MFI_STAT_INVALID_STATUS) { + if (cmd->frame) { + cmd->frame->header.cmd_status = frame_status; + } else { + megasas_frame_set_cmd_status(s, frame_addr, frame_status); + } + megasas_unmap_frame(s, cmd); + megasas_complete_frame(s, cmd->context); + } +} + +static uint64_t megasas_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + MegasasState *s = opaque; + PCIDevice *pci_dev = PCI_DEVICE(s); + MegasasBaseClass *base_class = MEGASAS_DEVICE_GET_CLASS(s); + uint32_t retval = 0; + + switch (addr) { + case MFI_IDB: + retval = 0; + trace_megasas_mmio_readl("MFI_IDB", retval); + break; + case MFI_OMSG0: + case MFI_OSP0: + retval = (msix_present(pci_dev) ? MFI_FWSTATE_MSIX_SUPPORTED : 0) | + (s->fw_state & MFI_FWSTATE_MASK) | + ((s->fw_sge & 0xff) << 16) | + (s->fw_cmds & 0xFFFF); + trace_megasas_mmio_readl(addr == MFI_OMSG0 ? "MFI_OMSG0" : "MFI_OSP0", + retval); + break; + case MFI_OSTS: + if (megasas_intr_enabled(s) && s->doorbell) { + retval = base_class->osts; + } + trace_megasas_mmio_readl("MFI_OSTS", retval); + break; + case MFI_OMSK: + retval = s->intr_mask; + trace_megasas_mmio_readl("MFI_OMSK", retval); + break; + case MFI_ODCR0: + retval = s->doorbell ? 1 : 0; + trace_megasas_mmio_readl("MFI_ODCR0", retval); + break; + case MFI_DIAG: + retval = s->diag; + trace_megasas_mmio_readl("MFI_DIAG", retval); + break; + case MFI_OSP1: + retval = 15; + trace_megasas_mmio_readl("MFI_OSP1", retval); + break; + default: + trace_megasas_mmio_invalid_readl(addr); + break; + } + return retval; +} + +static int adp_reset_seq[] = {0x00, 0x04, 0x0b, 0x02, 0x07, 0x0d}; + +static void megasas_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + MegasasState *s = opaque; + PCIDevice *pci_dev = PCI_DEVICE(s); + uint64_t frame_addr; + uint32_t frame_count; + int i; + + switch (addr) { + case MFI_IDB: + trace_megasas_mmio_writel("MFI_IDB", val); + if (val & MFI_FWINIT_ABORT) { + /* Abort all pending cmds */ + for (i = 0; i < s->fw_cmds; i++) { + megasas_abort_command(&s->frames[i]); + } + } + if (val & MFI_FWINIT_READY) { + /* move to FW READY */ + megasas_soft_reset(s); + } + if (val & MFI_FWINIT_MFIMODE) { + /* discard MFIs */ + } + if (val & MFI_FWINIT_STOP_ADP) { + /* Terminal error, stop processing */ + s->fw_state = MFI_FWSTATE_FAULT; + } + break; + case MFI_OMSK: + trace_megasas_mmio_writel("MFI_OMSK", val); + s->intr_mask = val; + if (!megasas_intr_enabled(s) && + !msi_enabled(pci_dev) && + !msix_enabled(pci_dev)) { + trace_megasas_irq_lower(); + pci_irq_deassert(pci_dev); + } + if (megasas_intr_enabled(s)) { + if (msix_enabled(pci_dev)) { + trace_megasas_msix_enabled(0); + } else if (msi_enabled(pci_dev)) { + trace_megasas_msi_enabled(0); + } else { + trace_megasas_intr_enabled(); + } + } else { + trace_megasas_intr_disabled(); + megasas_soft_reset(s); + } + break; + case MFI_ODCR0: + trace_megasas_mmio_writel("MFI_ODCR0", val); + s->doorbell = 0; + if (megasas_intr_enabled(s)) { + if (!msix_enabled(pci_dev) && !msi_enabled(pci_dev)) { + trace_megasas_irq_lower(); + pci_irq_deassert(pci_dev); + } + } + break; + case MFI_IQPH: + trace_megasas_mmio_writel("MFI_IQPH", val); + /* Received high 32 bits of a 64 bit MFI frame address */ + s->frame_hi = val; + break; + case MFI_IQPL: + trace_megasas_mmio_writel("MFI_IQPL", val); + /* Received low 32 bits of a 64 bit MFI frame address */ + /* Fallthrough */ + case MFI_IQP: + if (addr == MFI_IQP) { + trace_megasas_mmio_writel("MFI_IQP", val); + /* Received 64 bit MFI frame address */ + s->frame_hi = 0; + } + frame_addr = (val & ~0x1F); + /* Add possible 64 bit offset */ + frame_addr |= ((uint64_t)s->frame_hi << 32); + s->frame_hi = 0; + frame_count = (val >> 1) & 0xF; + megasas_handle_frame(s, frame_addr, frame_count); + break; + case MFI_SEQ: + trace_megasas_mmio_writel("MFI_SEQ", val); + /* Magic sequence to start ADP reset */ + if (adp_reset_seq[s->adp_reset] == val) { + s->adp_reset++; + } else { + s->adp_reset = 0; + s->diag = 0; + } + if (s->adp_reset == 6) { + s->diag = MFI_DIAG_WRITE_ENABLE; + } + break; + case MFI_DIAG: + trace_megasas_mmio_writel("MFI_DIAG", val); + /* ADP reset */ + if ((s->diag & MFI_DIAG_WRITE_ENABLE) && + (val & MFI_DIAG_RESET_ADP)) { + s->diag |= MFI_DIAG_RESET_ADP; + megasas_soft_reset(s); + s->adp_reset = 0; + s->diag = 0; + } + break; + default: + trace_megasas_mmio_invalid_writel(addr, val); + break; + } +} + +static const MemoryRegionOps megasas_mmio_ops = { + .read = megasas_mmio_read, + .write = megasas_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + } +}; + +static uint64_t megasas_port_read(void *opaque, hwaddr addr, + unsigned size) +{ + return megasas_mmio_read(opaque, addr & 0xff, size); +} + +static void megasas_port_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + megasas_mmio_write(opaque, addr & 0xff, val, size); +} + +static const MemoryRegionOps megasas_port_ops = { + .read = megasas_port_read, + .write = megasas_port_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + } +}; + +static uint64_t megasas_queue_read(void *opaque, hwaddr addr, + unsigned size) +{ + return 0; +} + +static void megasas_queue_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + return; +} + +static const MemoryRegionOps megasas_queue_ops = { + .read = megasas_queue_read, + .write = megasas_queue_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + } +}; + +static void megasas_soft_reset(MegasasState *s) +{ + int i; + MegasasCmd *cmd; + + trace_megasas_reset(s->fw_state); + for (i = 0; i < s->fw_cmds; i++) { + cmd = &s->frames[i]; + megasas_abort_command(cmd); + } + if (s->fw_state == MFI_FWSTATE_READY) { + BusChild *kid; + + /* + * The EFI firmware doesn't handle UA, + * so we need to clear the Power On/Reset UA + * after the initial reset. + */ + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + + sdev->unit_attention = SENSE_CODE(NO_SENSE); + scsi_device_unit_attention_reported(sdev); + } + } + megasas_reset_frames(s); + s->reply_queue_len = s->fw_cmds; + s->reply_queue_pa = 0; + s->consumer_pa = 0; + s->producer_pa = 0; + s->fw_state = MFI_FWSTATE_READY; + s->doorbell = 0; + s->intr_mask = MEGASAS_INTR_DISABLED_MASK; + s->frame_hi = 0; + s->flags &= ~MEGASAS_MASK_USE_QUEUE64; + s->event_count++; + s->boot_event = s->event_count; +} + +static void megasas_scsi_reset(DeviceState *dev) +{ + MegasasState *s = MEGASAS(dev); + + megasas_soft_reset(s); +} + +static const VMStateDescription vmstate_megasas_gen1 = { + .name = "megasas", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, MegasasState), + VMSTATE_MSIX(parent_obj, MegasasState), + + VMSTATE_INT32(fw_state, MegasasState), + VMSTATE_INT32(intr_mask, MegasasState), + VMSTATE_INT32(doorbell, MegasasState), + VMSTATE_UINT64(reply_queue_pa, MegasasState), + VMSTATE_UINT64(consumer_pa, MegasasState), + VMSTATE_UINT64(producer_pa, MegasasState), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_megasas_gen2 = { + .name = "megasas-gen2", + .version_id = 0, + .minimum_version_id = 0, + .minimum_version_id_old = 0, + .fields = (VMStateField[]) { + VMSTATE_PCIE_DEVICE(parent_obj, MegasasState), + VMSTATE_MSIX(parent_obj, MegasasState), + + VMSTATE_INT32(fw_state, MegasasState), + VMSTATE_INT32(intr_mask, MegasasState), + VMSTATE_INT32(doorbell, MegasasState), + VMSTATE_UINT64(reply_queue_pa, MegasasState), + VMSTATE_UINT64(consumer_pa, MegasasState), + VMSTATE_UINT64(producer_pa, MegasasState), + VMSTATE_END_OF_LIST() + } +}; + +static void megasas_scsi_uninit(PCIDevice *d) +{ + MegasasState *s = MEGASAS(d); + + if (megasas_use_msix(s)) { + msix_uninit(d, &s->mmio_io, &s->mmio_io); + } + if (megasas_use_msi(s)) { + msi_uninit(d); + } +} + +static const struct SCSIBusInfo megasas_scsi_info = { + .tcq = true, + .max_target = MFI_MAX_LD, + .max_lun = 255, + + .transfer_data = megasas_xfer_complete, + .get_sg_list = megasas_get_sg_list, + .complete = megasas_command_complete, + .cancel = megasas_command_cancel, +}; + +static void megasas_scsi_realize(PCIDevice *dev, Error **errp) +{ + DeviceState *d = DEVICE(dev); + MegasasState *s = MEGASAS(dev); + MegasasBaseClass *b = MEGASAS_DEVICE_GET_CLASS(s); + uint8_t *pci_conf; + int i, bar_type; + + pci_conf = dev->config; + + /* PCI latency timer = 0 */ + pci_conf[PCI_LATENCY_TIMER] = 0; + /* Interrupt pin 1 */ + pci_conf[PCI_INTERRUPT_PIN] = 0x01; + + memory_region_init_io(&s->mmio_io, OBJECT(s), &megasas_mmio_ops, s, + "megasas-mmio", 0x4000); + memory_region_init_io(&s->port_io, OBJECT(s), &megasas_port_ops, s, + "megasas-io", 256); + memory_region_init_io(&s->queue_io, OBJECT(s), &megasas_queue_ops, s, + "megasas-queue", 0x40000); + + if (megasas_use_msi(s) && + msi_init(dev, 0x50, 1, true, false)) { + s->flags &= ~MEGASAS_MASK_USE_MSI; + } + if (megasas_use_msix(s) && + msix_init(dev, 15, &s->mmio_io, b->mmio_bar, 0x2000, + &s->mmio_io, b->mmio_bar, 0x3800, 0x68)) { + s->flags &= ~MEGASAS_MASK_USE_MSIX; + } + if (pci_is_express(dev)) { + pcie_endpoint_cap_init(dev, 0xa0); + } + + bar_type = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64; + pci_register_bar(dev, b->ioport_bar, + PCI_BASE_ADDRESS_SPACE_IO, &s->port_io); + pci_register_bar(dev, b->mmio_bar, bar_type, &s->mmio_io); + pci_register_bar(dev, 3, bar_type, &s->queue_io); + + if (megasas_use_msix(s)) { + msix_vector_use(dev, 0); + } + + s->fw_state = MFI_FWSTATE_READY; + if (!s->sas_addr) { + s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) | + IEEE_COMPANY_LOCALLY_ASSIGNED) << 36; + s->sas_addr |= (pci_bus_num(dev->bus) << 16); + s->sas_addr |= (PCI_SLOT(dev->devfn) << 8); + s->sas_addr |= PCI_FUNC(dev->devfn); + } + if (!s->hba_serial) { + s->hba_serial = g_strdup(MEGASAS_HBA_SERIAL); + } + if (s->fw_sge >= MEGASAS_MAX_SGE - MFI_PASS_FRAME_SIZE) { + s->fw_sge = MEGASAS_MAX_SGE - MFI_PASS_FRAME_SIZE; + } else if (s->fw_sge >= 128 - MFI_PASS_FRAME_SIZE) { + s->fw_sge = 128 - MFI_PASS_FRAME_SIZE; + } else { + s->fw_sge = 64 - MFI_PASS_FRAME_SIZE; + } + if (s->fw_cmds > MEGASAS_MAX_FRAMES) { + s->fw_cmds = MEGASAS_MAX_FRAMES; + } + trace_megasas_init(s->fw_sge, s->fw_cmds, + megasas_is_jbod(s) ? "jbod" : "raid"); + + if (megasas_is_jbod(s)) { + s->fw_luns = MFI_MAX_SYS_PDS; + } else { + s->fw_luns = MFI_MAX_LD; + } + s->producer_pa = 0; + s->consumer_pa = 0; + for (i = 0; i < s->fw_cmds; i++) { + s->frames[i].index = i; + s->frames[i].context = -1; + s->frames[i].pa = 0; + s->frames[i].state = s; + } + + scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev), + &megasas_scsi_info, NULL); + if (!d->hotplugged) { + scsi_bus_legacy_handle_cmdline(&s->bus, errp); + } +} + +static Property megasas_properties_gen1[] = { + DEFINE_PROP_UINT32("max_sge", MegasasState, fw_sge, + MEGASAS_DEFAULT_SGE), + DEFINE_PROP_UINT32("max_cmds", MegasasState, fw_cmds, + MEGASAS_DEFAULT_FRAMES), + DEFINE_PROP_STRING("hba_serial", MegasasState, hba_serial), + DEFINE_PROP_UINT64("sas_address", MegasasState, sas_addr, 0), + DEFINE_PROP_BIT("use_msi", MegasasState, flags, + MEGASAS_FLAG_USE_MSI, false), + DEFINE_PROP_BIT("use_msix", MegasasState, flags, + MEGASAS_FLAG_USE_MSIX, false), + DEFINE_PROP_BIT("use_jbod", MegasasState, flags, + MEGASAS_FLAG_USE_JBOD, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static Property megasas_properties_gen2[] = { + DEFINE_PROP_UINT32("max_sge", MegasasState, fw_sge, + MEGASAS_DEFAULT_SGE), + DEFINE_PROP_UINT32("max_cmds", MegasasState, fw_cmds, + MEGASAS_GEN2_DEFAULT_FRAMES), + DEFINE_PROP_STRING("hba_serial", MegasasState, hba_serial), + DEFINE_PROP_UINT64("sas_address", MegasasState, sas_addr, 0), + DEFINE_PROP_BIT("use_msi", MegasasState, flags, + MEGASAS_FLAG_USE_MSI, true), + DEFINE_PROP_BIT("use_msix", MegasasState, flags, + MEGASAS_FLAG_USE_MSIX, true), + DEFINE_PROP_BIT("use_jbod", MegasasState, flags, + MEGASAS_FLAG_USE_JBOD, false), + DEFINE_PROP_END_OF_LIST(), +}; + +typedef struct MegasasInfo { + const char *name; + const char *desc; + const char *product_name; + const char *product_version; + uint16_t device_id; + uint16_t subsystem_id; + int ioport_bar; + int mmio_bar; + bool is_express; + int osts; + const VMStateDescription *vmsd; + Property *props; +} MegasasInfo; + +static struct MegasasInfo megasas_devices[] = { + { + .name = TYPE_MEGASAS_GEN1, + .desc = "LSI MegaRAID SAS 1078", + .product_name = "LSI MegaRAID SAS 8708EM2", + .product_version = MEGASAS_VERSION_GEN1, + .device_id = PCI_DEVICE_ID_LSI_SAS1078, + .subsystem_id = 0x1013, + .ioport_bar = 2, + .mmio_bar = 0, + .osts = MFI_1078_RM | 1, + .is_express = false, + .vmsd = &vmstate_megasas_gen1, + .props = megasas_properties_gen1, + },{ + .name = TYPE_MEGASAS_GEN2, + .desc = "LSI MegaRAID SAS 2108", + .product_name = "LSI MegaRAID SAS 9260-8i", + .product_version = MEGASAS_VERSION_GEN2, + .device_id = PCI_DEVICE_ID_LSI_SAS0079, + .subsystem_id = 0x9261, + .ioport_bar = 0, + .mmio_bar = 1, + .osts = MFI_GEN2_RM, + .is_express = true, + .vmsd = &vmstate_megasas_gen2, + .props = megasas_properties_gen2, + } +}; + +static void megasas_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + MegasasBaseClass *e = MEGASAS_DEVICE_CLASS(oc); + const MegasasInfo *info = data; + + pc->realize = megasas_scsi_realize; + pc->exit = megasas_scsi_uninit; + pc->vendor_id = PCI_VENDOR_ID_LSI_LOGIC; + pc->device_id = info->device_id; + pc->subsystem_vendor_id = PCI_VENDOR_ID_LSI_LOGIC; + pc->subsystem_id = info->subsystem_id; + pc->class_id = PCI_CLASS_STORAGE_RAID; + pc->is_express = info->is_express; + e->mmio_bar = info->mmio_bar; + e->ioport_bar = info->ioport_bar; + e->osts = info->osts; + e->product_name = info->product_name; + e->product_version = info->product_version; + dc->props = info->props; + dc->reset = megasas_scsi_reset; + dc->vmsd = info->vmsd; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = info->desc; +} + +static const TypeInfo megasas_info = { + .name = TYPE_MEGASAS_BASE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(MegasasState), + .class_size = sizeof(MegasasBaseClass), + .abstract = true, +}; + +static void megasas_register_types(void) +{ + int i; + + type_register_static(&megasas_info); + for (i = 0; i < ARRAY_SIZE(megasas_devices); i++) { + const MegasasInfo *info = &megasas_devices[i]; + TypeInfo type_info = {}; + + type_info.name = info->name; + type_info.parent = TYPE_MEGASAS_BASE; + type_info.class_data = (void *)info; + type_info.class_init = megasas_class_init; + + type_register(&type_info); + } +} + +type_init(megasas_register_types) diff --git a/qemu/hw/scsi/mfi.h b/qemu/hw/scsi/mfi.h new file mode 100644 index 000000000..29d41775d --- /dev/null +++ b/qemu/hw/scsi/mfi.h @@ -0,0 +1,1272 @@ +/* + * NetBSD header file, copied from + * http://gitorious.org/freebsd/freebsd/blobs/HEAD/sys/dev/mfi/mfireg.h + */ +/*- + * Copyright (c) 2006 IronPort Systems + * Copyright (c) 2007 LSI Corp. + * Copyright (c) 2007 Rajesh Prabhakaran. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef MFI_REG_H +#define MFI_REG_H + +/* + * MegaRAID SAS MFI firmware definitions + */ + +/* + * Start with the register set. All registers are 32 bits wide. + * The usual Intel IOP style setup. + */ +#define MFI_IMSG0 0x10 /* Inbound message 0 */ +#define MFI_IMSG1 0x14 /* Inbound message 1 */ +#define MFI_OMSG0 0x18 /* Outbound message 0 */ +#define MFI_OMSG1 0x1c /* Outbound message 1 */ +#define MFI_IDB 0x20 /* Inbound doorbell */ +#define MFI_ISTS 0x24 /* Inbound interrupt status */ +#define MFI_IMSK 0x28 /* Inbound interrupt mask */ +#define MFI_ODB 0x2c /* Outbound doorbell */ +#define MFI_OSTS 0x30 /* Outbound interrupt status */ +#define MFI_OMSK 0x34 /* Outbound interrupt mask */ +#define MFI_IQP 0x40 /* Inbound queue port */ +#define MFI_OQP 0x44 /* Outbound queue port */ + +/* + * 1078 specific related register + */ +#define MFI_ODR0 0x9c /* outbound doorbell register0 */ +#define MFI_ODCR0 0xa0 /* outbound doorbell clear register0 */ +#define MFI_OSP0 0xb0 /* outbound scratch pad0 */ +#define MFI_OSP1 0xb4 /* outbound scratch pad1 */ +#define MFI_IQPL 0xc0 /* Inbound queue port (low bytes) */ +#define MFI_IQPH 0xc4 /* Inbound queue port (high bytes) */ +#define MFI_DIAG 0xf8 /* Host diag */ +#define MFI_SEQ 0xfc /* Sequencer offset */ +#define MFI_1078_EIM 0x80000004 /* 1078 enable intrrupt mask */ +#define MFI_RMI 0x2 /* reply message interrupt */ +#define MFI_1078_RM 0x80000000 /* reply 1078 message interrupt */ +#define MFI_ODC 0x4 /* outbound doorbell change interrupt */ + +/* + * gen2 specific changes + */ +#define MFI_GEN2_EIM 0x00000005 /* gen2 enable interrupt mask */ +#define MFI_GEN2_RM 0x00000001 /* reply gen2 message interrupt */ + +/* + * skinny specific changes + */ +#define MFI_SKINNY_IDB 0x00 /* Inbound doorbell is at 0x00 for skinny */ +#define MFI_SKINNY_RM 0x00000001 /* reply skinny message interrupt */ + +/* Bits for MFI_OSTS */ +#define MFI_OSTS_INTR_VALID 0x00000002 + +/* + * Firmware state values. Found in OMSG0 during initialization. + */ +#define MFI_FWSTATE_MASK 0xf0000000 +#define MFI_FWSTATE_UNDEFINED 0x00000000 +#define MFI_FWSTATE_BB_INIT 0x10000000 +#define MFI_FWSTATE_FW_INIT 0x40000000 +#define MFI_FWSTATE_WAIT_HANDSHAKE 0x60000000 +#define MFI_FWSTATE_FW_INIT_2 0x70000000 +#define MFI_FWSTATE_DEVICE_SCAN 0x80000000 +#define MFI_FWSTATE_BOOT_MSG_PENDING 0x90000000 +#define MFI_FWSTATE_FLUSH_CACHE 0xa0000000 +#define MFI_FWSTATE_READY 0xb0000000 +#define MFI_FWSTATE_OPERATIONAL 0xc0000000 +#define MFI_FWSTATE_FAULT 0xf0000000 +#define MFI_FWSTATE_MAXSGL_MASK 0x00ff0000 +#define MFI_FWSTATE_MAXCMD_MASK 0x0000ffff +#define MFI_FWSTATE_MSIX_SUPPORTED 0x04000000 +#define MFI_FWSTATE_HOSTMEMREQD_MASK 0x08000000 + +/* + * Control bits to drive the card to ready state. These go into the IDB + * register. + */ +#define MFI_FWINIT_ABORT 0x00000001 /* Abort all pending commands */ +#define MFI_FWINIT_READY 0x00000002 /* Move from operational to ready */ +#define MFI_FWINIT_MFIMODE 0x00000004 /* unknown */ +#define MFI_FWINIT_CLEAR_HANDSHAKE 0x00000008 /* Respond to WAIT_HANDSHAKE */ +#define MFI_FWINIT_HOTPLUG 0x00000010 +#define MFI_FWINIT_STOP_ADP 0x00000020 /* Move to operational, stop */ +#define MFI_FWINIT_ADP_RESET 0x00000040 /* Reset ADP */ + +/* + * Control bits for the DIAG register + */ +#define MFI_DIAG_WRITE_ENABLE 0x00000080 +#define MFI_DIAG_RESET_ADP 0x00000004 + +/* MFI Commands */ +typedef enum { + MFI_CMD_INIT = 0x00, + MFI_CMD_LD_READ, + MFI_CMD_LD_WRITE, + MFI_CMD_LD_SCSI_IO, + MFI_CMD_PD_SCSI_IO, + MFI_CMD_DCMD, + MFI_CMD_ABORT, + MFI_CMD_SMP, + MFI_CMD_STP +} mfi_cmd_t; + +/* Direct commands */ +typedef enum { + MFI_DCMD_CTRL_MFI_HOST_MEM_ALLOC = 0x0100e100, + MFI_DCMD_CTRL_GET_INFO = 0x01010000, + MFI_DCMD_CTRL_GET_PROPERTIES = 0x01020100, + MFI_DCMD_CTRL_SET_PROPERTIES = 0x01020200, + MFI_DCMD_CTRL_ALARM = 0x01030000, + MFI_DCMD_CTRL_ALARM_GET = 0x01030100, + MFI_DCMD_CTRL_ALARM_ENABLE = 0x01030200, + MFI_DCMD_CTRL_ALARM_DISABLE = 0x01030300, + MFI_DCMD_CTRL_ALARM_SILENCE = 0x01030400, + MFI_DCMD_CTRL_ALARM_TEST = 0x01030500, + MFI_DCMD_CTRL_EVENT_GETINFO = 0x01040100, + MFI_DCMD_CTRL_EVENT_CLEAR = 0x01040200, + MFI_DCMD_CTRL_EVENT_GET = 0x01040300, + MFI_DCMD_CTRL_EVENT_COUNT = 0x01040400, + MFI_DCMD_CTRL_EVENT_WAIT = 0x01040500, + MFI_DCMD_CTRL_SHUTDOWN = 0x01050000, + MFI_DCMD_HIBERNATE_STANDBY = 0x01060000, + MFI_DCMD_CTRL_GET_TIME = 0x01080101, + MFI_DCMD_CTRL_SET_TIME = 0x01080102, + MFI_DCMD_CTRL_BIOS_DATA_GET = 0x010c0100, + MFI_DCMD_CTRL_BIOS_DATA_SET = 0x010c0200, + MFI_DCMD_CTRL_FACTORY_DEFAULTS = 0x010d0000, + MFI_DCMD_CTRL_MFC_DEFAULTS_GET = 0x010e0201, + MFI_DCMD_CTRL_MFC_DEFAULTS_SET = 0x010e0202, + MFI_DCMD_CTRL_CACHE_FLUSH = 0x01101000, + MFI_DCMD_PD_GET_LIST = 0x02010000, + MFI_DCMD_PD_LIST_QUERY = 0x02010100, + MFI_DCMD_PD_GET_INFO = 0x02020000, + MFI_DCMD_PD_STATE_SET = 0x02030100, + MFI_DCMD_PD_REBUILD = 0x02040100, + MFI_DCMD_PD_BLINK = 0x02070100, + MFI_DCMD_PD_UNBLINK = 0x02070200, + MFI_DCMD_LD_GET_LIST = 0x03010000, + MFI_DCMD_LD_LIST_QUERY = 0x03010100, + MFI_DCMD_LD_GET_INFO = 0x03020000, + MFI_DCMD_LD_GET_PROP = 0x03030000, + MFI_DCMD_LD_SET_PROP = 0x03040000, + MFI_DCMD_LD_DELETE = 0x03090000, + MFI_DCMD_CFG_READ = 0x04010000, + MFI_DCMD_CFG_ADD = 0x04020000, + MFI_DCMD_CFG_CLEAR = 0x04030000, + MFI_DCMD_CFG_FOREIGN_READ = 0x04060100, + MFI_DCMD_CFG_FOREIGN_IMPORT = 0x04060400, + MFI_DCMD_BBU_STATUS = 0x05010000, + MFI_DCMD_BBU_CAPACITY_INFO = 0x05020000, + MFI_DCMD_BBU_DESIGN_INFO = 0x05030000, + MFI_DCMD_BBU_PROP_GET = 0x05050100, + MFI_DCMD_CLUSTER = 0x08000000, + MFI_DCMD_CLUSTER_RESET_ALL = 0x08010100, + MFI_DCMD_CLUSTER_RESET_LD = 0x08010200 +} mfi_dcmd_t; + +/* Modifiers for MFI_DCMD_CTRL_FLUSHCACHE */ +#define MFI_FLUSHCACHE_CTRL 0x01 +#define MFI_FLUSHCACHE_DISK 0x02 + +/* Modifiers for MFI_DCMD_CTRL_SHUTDOWN */ +#define MFI_SHUTDOWN_SPINDOWN 0x01 + +/* + * MFI Frame flags + */ +typedef enum { + MFI_FRAME_DONT_POST_IN_REPLY_QUEUE = 0x0001, + MFI_FRAME_SGL64 = 0x0002, + MFI_FRAME_SENSE64 = 0x0004, + MFI_FRAME_DIR_WRITE = 0x0008, + MFI_FRAME_DIR_READ = 0x0010, + MFI_FRAME_IEEE_SGL = 0x0020, +} mfi_frame_flags; + +/* MFI Status codes */ +typedef enum { + MFI_STAT_OK = 0x00, + MFI_STAT_INVALID_CMD, + MFI_STAT_INVALID_DCMD, + MFI_STAT_INVALID_PARAMETER, + MFI_STAT_INVALID_SEQUENCE_NUMBER, + MFI_STAT_ABORT_NOT_POSSIBLE, + MFI_STAT_APP_HOST_CODE_NOT_FOUND, + MFI_STAT_APP_IN_USE, + MFI_STAT_APP_NOT_INITIALIZED, + MFI_STAT_ARRAY_INDEX_INVALID, + MFI_STAT_ARRAY_ROW_NOT_EMPTY, + MFI_STAT_CONFIG_RESOURCE_CONFLICT, + MFI_STAT_DEVICE_NOT_FOUND, + MFI_STAT_DRIVE_TOO_SMALL, + MFI_STAT_FLASH_ALLOC_FAIL, + MFI_STAT_FLASH_BUSY, + MFI_STAT_FLASH_ERROR = 0x10, + MFI_STAT_FLASH_IMAGE_BAD, + MFI_STAT_FLASH_IMAGE_INCOMPLETE, + MFI_STAT_FLASH_NOT_OPEN, + MFI_STAT_FLASH_NOT_STARTED, + MFI_STAT_FLUSH_FAILED, + MFI_STAT_HOST_CODE_NOT_FOUNT, + MFI_STAT_LD_CC_IN_PROGRESS, + MFI_STAT_LD_INIT_IN_PROGRESS, + MFI_STAT_LD_LBA_OUT_OF_RANGE, + MFI_STAT_LD_MAX_CONFIGURED, + MFI_STAT_LD_NOT_OPTIMAL, + MFI_STAT_LD_RBLD_IN_PROGRESS, + MFI_STAT_LD_RECON_IN_PROGRESS, + MFI_STAT_LD_WRONG_RAID_LEVEL, + MFI_STAT_MAX_SPARES_EXCEEDED, + MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20, + MFI_STAT_MFC_HW_ERROR, + MFI_STAT_NO_HW_PRESENT, + MFI_STAT_NOT_FOUND, + MFI_STAT_NOT_IN_ENCL, + MFI_STAT_PD_CLEAR_IN_PROGRESS, + MFI_STAT_PD_TYPE_WRONG, + MFI_STAT_PR_DISABLED, + MFI_STAT_ROW_INDEX_INVALID, + MFI_STAT_SAS_CONFIG_INVALID_ACTION, + MFI_STAT_SAS_CONFIG_INVALID_DATA, + MFI_STAT_SAS_CONFIG_INVALID_PAGE, + MFI_STAT_SAS_CONFIG_INVALID_TYPE, + MFI_STAT_SCSI_DONE_WITH_ERROR, + MFI_STAT_SCSI_IO_FAILED, + MFI_STAT_SCSI_RESERVATION_CONFLICT, + MFI_STAT_SHUTDOWN_FAILED = 0x30, + MFI_STAT_TIME_NOT_SET, + MFI_STAT_WRONG_STATE, + MFI_STAT_LD_OFFLINE, + MFI_STAT_PEER_NOTIFICATION_REJECTED, + MFI_STAT_PEER_NOTIFICATION_FAILED, + MFI_STAT_RESERVATION_IN_PROGRESS, + MFI_STAT_I2C_ERRORS_DETECTED, + MFI_STAT_PCI_ERRORS_DETECTED, + MFI_STAT_DIAG_FAILED, + MFI_STAT_BOOT_MSG_PENDING, + MFI_STAT_FOREIGN_CONFIG_INCOMPLETE, + MFI_STAT_INVALID_SGL, + MFI_STAT_UNSUPPORTED_HW, + MFI_STAT_CC_SCHEDULE_DISABLED, + MFI_STAT_PD_COPYBACK_IN_PROGRESS, + MFI_STAT_MULTIPLE_PDS_IN_ARRAY = 0x40, + MFI_STAT_FW_DOWNLOAD_ERROR, + MFI_STAT_FEATURE_SECURITY_NOT_ENABLED, + MFI_STAT_LOCK_KEY_ALREADY_EXISTS, + MFI_STAT_LOCK_KEY_BACKUP_NOT_ALLOWED, + MFI_STAT_LOCK_KEY_VERIFY_NOT_ALLOWED, + MFI_STAT_LOCK_KEY_VERIFY_FAILED, + MFI_STAT_LOCK_KEY_REKEY_NOT_ALLOWED, + MFI_STAT_LOCK_KEY_INVALID, + MFI_STAT_LOCK_KEY_ESCROW_INVALID, + MFI_STAT_LOCK_KEY_BACKUP_REQUIRED, + MFI_STAT_SECURE_LD_EXISTS, + MFI_STAT_LD_SECURE_NOT_ALLOWED, + MFI_STAT_REPROVISION_NOT_ALLOWED, + MFI_STAT_PD_SECURITY_TYPE_WRONG, + MFI_STAT_LD_ENCRYPTION_TYPE_INVALID, + MFI_STAT_CONFIG_FDE_NON_FDE_MIX_NOT_ALLOWED = 0x50, + MFI_STAT_CONFIG_LD_ENCRYPTION_TYPE_MIX_NOT_ALLOWED, + MFI_STAT_SECRET_KEY_NOT_ALLOWED, + MFI_STAT_PD_HW_ERRORS_DETECTED, + MFI_STAT_LD_CACHE_PINNED, + MFI_STAT_POWER_STATE_SET_IN_PROGRESS, + MFI_STAT_POWER_STATE_SET_BUSY, + MFI_STAT_POWER_STATE_WRONG, + MFI_STAT_PR_NO_AVAILABLE_PD_FOUND, + MFI_STAT_CTRL_RESET_REQUIRED, + MFI_STAT_LOCK_KEY_EKM_NO_BOOT_AGENT, + MFI_STAT_SNAP_NO_SPACE, + MFI_STAT_SNAP_PARTIAL_FAILURE, + MFI_STAT_UPGRADE_KEY_INCOMPATIBLE, + MFI_STAT_PFK_INCOMPATIBLE, + MFI_STAT_PD_MAX_UNCONFIGURED, + MFI_STAT_IO_METRICS_DISABLED = 0x60, + MFI_STAT_AEC_NOT_STOPPED, + MFI_STAT_PI_TYPE_WRONG, + MFI_STAT_LD_PD_PI_INCOMPATIBLE, + MFI_STAT_PI_NOT_ENABLED, + MFI_STAT_LD_BLOCK_SIZE_MISMATCH, + MFI_STAT_INVALID_STATUS = 0xFF +} mfi_status_t; + +/* Event classes */ +typedef enum { + MFI_EVT_CLASS_DEBUG = -2, + MFI_EVT_CLASS_PROGRESS = -1, + MFI_EVT_CLASS_INFO = 0, + MFI_EVT_CLASS_WARNING = 1, + MFI_EVT_CLASS_CRITICAL = 2, + MFI_EVT_CLASS_FATAL = 3, + MFI_EVT_CLASS_DEAD = 4 +} mfi_evt_class_t; + +/* Event locales */ +typedef enum { + MFI_EVT_LOCALE_LD = 0x0001, + MFI_EVT_LOCALE_PD = 0x0002, + MFI_EVT_LOCALE_ENCL = 0x0004, + MFI_EVT_LOCALE_BBU = 0x0008, + MFI_EVT_LOCALE_SAS = 0x0010, + MFI_EVT_LOCALE_CTRL = 0x0020, + MFI_EVT_LOCALE_CONFIG = 0x0040, + MFI_EVT_LOCALE_CLUSTER = 0x0080, + MFI_EVT_LOCALE_ALL = 0xffff +} mfi_evt_locale_t; + +/* Event args */ +typedef enum { + MR_EVT_ARGS_NONE = 0x00, + MR_EVT_ARGS_CDB_SENSE, + MR_EVT_ARGS_LD, + MR_EVT_ARGS_LD_COUNT, + MR_EVT_ARGS_LD_LBA, + MR_EVT_ARGS_LD_OWNER, + MR_EVT_ARGS_LD_LBA_PD_LBA, + MR_EVT_ARGS_LD_PROG, + MR_EVT_ARGS_LD_STATE, + MR_EVT_ARGS_LD_STRIP, + MR_EVT_ARGS_PD, + MR_EVT_ARGS_PD_ERR, + MR_EVT_ARGS_PD_LBA, + MR_EVT_ARGS_PD_LBA_LD, + MR_EVT_ARGS_PD_PROG, + MR_EVT_ARGS_PD_STATE, + MR_EVT_ARGS_PCI, + MR_EVT_ARGS_RATE, + MR_EVT_ARGS_STR, + MR_EVT_ARGS_TIME, + MR_EVT_ARGS_ECC, + MR_EVT_ARGS_LD_PROP, + MR_EVT_ARGS_PD_SPARE, + MR_EVT_ARGS_PD_INDEX, + MR_EVT_ARGS_DIAG_PASS, + MR_EVT_ARGS_DIAG_FAIL, + MR_EVT_ARGS_PD_LBA_LBA, + MR_EVT_ARGS_PORT_PHY, + MR_EVT_ARGS_PD_MISSING, + MR_EVT_ARGS_PD_ADDRESS, + MR_EVT_ARGS_BITMAP, + MR_EVT_ARGS_CONNECTOR, + MR_EVT_ARGS_PD_PD, + MR_EVT_ARGS_PD_FRU, + MR_EVT_ARGS_PD_PATHINFO, + MR_EVT_ARGS_PD_POWER_STATE, + MR_EVT_ARGS_GENERIC, +} mfi_evt_args; + +/* Event codes */ +#define MR_EVT_CFG_CLEARED 0x0004 +#define MR_EVT_CTRL_SHUTDOWN 0x002a +#define MR_EVT_LD_STATE_CHANGE 0x0051 +#define MR_EVT_PD_INSERTED 0x005b +#define MR_EVT_PD_REMOVED 0x0070 +#define MR_EVT_PD_STATE_CHANGED 0x0072 +#define MR_EVT_LD_CREATED 0x008a +#define MR_EVT_LD_DELETED 0x008b +#define MR_EVT_FOREIGN_CFG_IMPORTED 0x00db +#define MR_EVT_LD_OFFLINE 0x00fc +#define MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED 0x0152 + +typedef enum { + MR_LD_CACHE_WRITE_BACK = 0x01, + MR_LD_CACHE_WRITE_ADAPTIVE = 0x02, + MR_LD_CACHE_READ_AHEAD = 0x04, + MR_LD_CACHE_READ_ADAPTIVE = 0x08, + MR_LD_CACHE_WRITE_CACHE_BAD_BBU = 0x10, + MR_LD_CACHE_ALLOW_WRITE_CACHE = 0x20, + MR_LD_CACHE_ALLOW_READ_CACHE = 0x40 +} mfi_ld_cache; + +typedef enum { + MR_PD_CACHE_UNCHANGED = 0, + MR_PD_CACHE_ENABLE = 1, + MR_PD_CACHE_DISABLE = 2 +} mfi_pd_cache; + +typedef enum { + MR_PD_QUERY_TYPE_ALL = 0, + MR_PD_QUERY_TYPE_STATE = 1, + MR_PD_QUERY_TYPE_POWER_STATE = 2, + MR_PD_QUERY_TYPE_MEDIA_TYPE = 3, + MR_PD_QUERY_TYPE_SPEED = 4, + MR_PD_QUERY_TYPE_EXPOSED_TO_HOST = 5, /*query for system drives */ +} mfi_pd_query_type; + +typedef enum { + MR_LD_QUERY_TYPE_ALL = 0, + MR_LD_QUERY_TYPE_EXPOSED_TO_HOST = 1, + MR_LD_QUERY_TYPE_USED_TGT_IDS = 2, + MR_LD_QUERY_TYPE_CLUSTER_ACCESS = 3, + MR_LD_QUERY_TYPE_CLUSTER_LOCALE = 4, +} mfi_ld_query_type; + +/* + * Other propertities and definitions + */ +#define MFI_MAX_PD_CHANNELS 2 +#define MFI_MAX_LD_CHANNELS 2 +#define MFI_MAX_CHANNELS (MFI_MAX_PD_CHANNELS + MFI_MAX_LD_CHANNELS) +#define MFI_MAX_CHANNEL_DEVS 128 +#define MFI_DEFAULT_ID -1 +#define MFI_MAX_LUN 8 +#define MFI_MAX_LD 64 + +#define MFI_FRAME_SIZE 64 +#define MFI_MBOX_SIZE 12 + +/* Firmware flashing can take 40s */ +#define MFI_POLL_TIMEOUT_SECS 50 + +/* Allow for speedier math calculations */ +#define MFI_SECTOR_LEN 512 + +/* Scatter Gather elements */ +struct mfi_sg32 { + uint32_t addr; + uint32_t len; +} QEMU_PACKED; + +struct mfi_sg64 { + uint64_t addr; + uint32_t len; +} QEMU_PACKED; + +struct mfi_sg_skinny { + uint64_t addr; + uint32_t len; + uint32_t flag; +} QEMU_PACKED; + +union mfi_sgl { + struct mfi_sg32 sg32[1]; + struct mfi_sg64 sg64[1]; + struct mfi_sg_skinny sg_skinny[1]; +} QEMU_PACKED; + +/* Message frames. All messages have a common header */ +struct mfi_frame_header { + uint8_t frame_cmd; + uint8_t sense_len; + uint8_t cmd_status; + uint8_t scsi_status; + uint8_t target_id; + uint8_t lun_id; + uint8_t cdb_len; + uint8_t sge_count; + uint64_t context; + uint16_t flags; + uint16_t timeout; + uint32_t data_len; +} QEMU_PACKED; + +struct mfi_init_frame { + struct mfi_frame_header header; + uint32_t qinfo_new_addr_lo; + uint32_t qinfo_new_addr_hi; + uint32_t qinfo_old_addr_lo; + uint32_t qinfo_old_addr_hi; + uint32_t reserved[6]; +}; + +#define MFI_IO_FRAME_SIZE 40 +struct mfi_io_frame { + struct mfi_frame_header header; + uint32_t sense_addr_lo; + uint32_t sense_addr_hi; + uint32_t lba_lo; + uint32_t lba_hi; + union mfi_sgl sgl; +} QEMU_PACKED; + +#define MFI_PASS_FRAME_SIZE 48 +struct mfi_pass_frame { + struct mfi_frame_header header; + uint32_t sense_addr_lo; + uint32_t sense_addr_hi; + uint8_t cdb[16]; + union mfi_sgl sgl; +} QEMU_PACKED; + +#define MFI_DCMD_FRAME_SIZE 40 +struct mfi_dcmd_frame { + struct mfi_frame_header header; + uint32_t opcode; + uint8_t mbox[MFI_MBOX_SIZE]; + union mfi_sgl sgl; +} QEMU_PACKED; + +struct mfi_abort_frame { + struct mfi_frame_header header; + uint64_t abort_context; + uint32_t abort_mfi_addr_lo; + uint32_t abort_mfi_addr_hi; + uint32_t reserved1[6]; +} QEMU_PACKED; + +struct mfi_smp_frame { + struct mfi_frame_header header; + uint64_t sas_addr; + union { + struct mfi_sg32 sg32[2]; + struct mfi_sg64 sg64[2]; + } sgl; +} QEMU_PACKED; + +struct mfi_stp_frame { + struct mfi_frame_header header; + uint16_t fis[10]; + uint32_t stp_flags; + union { + struct mfi_sg32 sg32[2]; + struct mfi_sg64 sg64[2]; + } sgl; +} QEMU_PACKED; + +union mfi_frame { + struct mfi_frame_header header; + struct mfi_init_frame init; + struct mfi_io_frame io; + struct mfi_pass_frame pass; + struct mfi_dcmd_frame dcmd; + struct mfi_abort_frame abort; + struct mfi_smp_frame smp; + struct mfi_stp_frame stp; + uint64_t raw[8]; + uint8_t bytes[MFI_FRAME_SIZE]; +}; + +#define MFI_SENSE_LEN 128 +struct mfi_sense { + uint8_t data[MFI_SENSE_LEN]; +}; + +#define MFI_QUEUE_FLAG_CONTEXT64 0x00000002 + +/* The queue init structure that is passed with the init message */ +struct mfi_init_qinfo { + uint32_t flags; + uint32_t rq_entries; + uint32_t rq_addr_lo; + uint32_t rq_addr_hi; + uint32_t pi_addr_lo; + uint32_t pi_addr_hi; + uint32_t ci_addr_lo; + uint32_t ci_addr_hi; +} QEMU_PACKED; + +/* Controller properties */ +struct mfi_ctrl_props { + uint16_t seq_num; + uint16_t pred_fail_poll_interval; + uint16_t intr_throttle_cnt; + uint16_t intr_throttle_timeout; + uint8_t rebuild_rate; + uint8_t patrol_read_rate; + uint8_t bgi_rate; + uint8_t cc_rate; + uint8_t recon_rate; + uint8_t cache_flush_interval; + uint8_t spinup_drv_cnt; + uint8_t spinup_delay; + uint8_t cluster_enable; + uint8_t coercion_mode; + uint8_t alarm_enable; + uint8_t disable_auto_rebuild; + uint8_t disable_battery_warn; + uint8_t ecc_bucket_size; + uint16_t ecc_bucket_leak_rate; + uint8_t restore_hotspare_on_insertion; + uint8_t expose_encl_devices; + uint8_t maintainPdFailHistory; + uint8_t disallowHostRequestReordering; + uint8_t abortCCOnError; + uint8_t loadBalanceMode; + uint8_t disableAutoDetectBackplane; + uint8_t snapVDSpace; + uint32_t OnOffProperties; +/* set TRUE to disable copyBack (0=copyback enabled) */ +#define MFI_CTRL_PROP_CopyBackDisabled (1 << 0) +#define MFI_CTRL_PROP_SMARTerEnabled (1 << 1) +#define MFI_CTRL_PROP_PRCorrectUnconfiguredAreas (1 << 2) +#define MFI_CTRL_PROP_UseFdeOnly (1 << 3) +#define MFI_CTRL_PROP_DisableNCQ (1 << 4) +#define MFI_CTRL_PROP_SSDSMARTerEnabled (1 << 5) +#define MFI_CTRL_PROP_SSDPatrolReadEnabled (1 << 6) +#define MFI_CTRL_PROP_EnableSpinDownUnconfigured (1 << 7) +#define MFI_CTRL_PROP_AutoEnhancedImport (1 << 8) +#define MFI_CTRL_PROP_EnableSecretKeyControl (1 << 9) +#define MFI_CTRL_PROP_DisableOnlineCtrlReset (1 << 10) +#define MFI_CTRL_PROP_AllowBootWithPinnedCache (1 << 11) +#define MFI_CTRL_PROP_DisableSpinDownHS (1 << 12) +#define MFI_CTRL_PROP_EnableJBOD (1 << 13) + + uint8_t autoSnapVDSpace; /* % of source LD to be + * reserved for auto snapshot + * in snapshot repository, for + * metadata and user data + * 1=5%, 2=10%, 3=15% and so on + */ + uint8_t viewSpace; /* snapshot writeable VIEWs + * capacity as a % of source LD + * capacity. 0=READ only + * 1=5%, 2=10%, 3=15% and so on + */ + uint16_t spinDownTime; /* # of idle minutes before device + * is spun down (0=use FW defaults) + */ + uint8_t reserved[24]; +} QEMU_PACKED; + +/* PCI information about the card. */ +struct mfi_info_pci { + uint16_t vendor; + uint16_t device; + uint16_t subvendor; + uint16_t subdevice; + uint8_t reserved[24]; +} QEMU_PACKED; + +/* Host (front end) interface information */ +struct mfi_info_host { + uint8_t type; +#define MFI_INFO_HOST_PCIX 0x01 +#define MFI_INFO_HOST_PCIE 0x02 +#define MFI_INFO_HOST_ISCSI 0x04 +#define MFI_INFO_HOST_SAS3G 0x08 + uint8_t reserved[6]; + uint8_t port_count; + uint64_t port_addr[8]; +} QEMU_PACKED; + +/* Device (back end) interface information */ +struct mfi_info_device { + uint8_t type; +#define MFI_INFO_DEV_SPI 0x01 +#define MFI_INFO_DEV_SAS3G 0x02 +#define MFI_INFO_DEV_SATA1 0x04 +#define MFI_INFO_DEV_SATA3G 0x08 +#define MFI_INFO_DEV_PCIE 0x10 + uint8_t reserved[6]; + uint8_t port_count; + uint64_t port_addr[8]; +} QEMU_PACKED; + +/* Firmware component information */ +struct mfi_info_component { + char name[8]; + char version[32]; + char build_date[16]; + char build_time[16]; +} QEMU_PACKED; + +/* Controller default settings */ +struct mfi_defaults { + uint64_t sas_addr; + uint8_t phy_polarity; + uint8_t background_rate; + uint8_t stripe_size; + uint8_t flush_time; + uint8_t write_back; + uint8_t read_ahead; + uint8_t cache_when_bbu_bad; + uint8_t cached_io; + uint8_t smart_mode; + uint8_t alarm_disable; + uint8_t coercion; + uint8_t zrc_config; + uint8_t dirty_led_shows_drive_activity; + uint8_t bios_continue_on_error; + uint8_t spindown_mode; + uint8_t allowed_device_types; + uint8_t allow_mix_in_enclosure; + uint8_t allow_mix_in_ld; + uint8_t allow_sata_in_cluster; + uint8_t max_chained_enclosures; + uint8_t disable_ctrl_r; + uint8_t enable_web_bios; + uint8_t phy_polarity_split; + uint8_t direct_pd_mapping; + uint8_t bios_enumerate_lds; + uint8_t restored_hot_spare_on_insertion; + uint8_t expose_enclosure_devices; + uint8_t maintain_pd_fail_history; + uint8_t disable_puncture; + uint8_t zero_based_enumeration; + uint8_t disable_preboot_cli; + uint8_t show_drive_led_on_activity; + uint8_t cluster_disable; + uint8_t sas_disable; + uint8_t auto_detect_backplane; + uint8_t fde_only; + uint8_t delay_during_post; + uint8_t resv[19]; +} QEMU_PACKED; + +/* Controller default settings */ +struct mfi_bios_data { + uint16_t boot_target_id; + uint8_t do_not_int_13; + uint8_t continue_on_error; + uint8_t verbose; + uint8_t geometry; + uint8_t expose_all_drives; + uint8_t reserved[56]; + uint8_t check_sum; +} QEMU_PACKED; + +/* SAS (?) controller info, returned from MFI_DCMD_CTRL_GETINFO. */ +struct mfi_ctrl_info { + struct mfi_info_pci pci; + struct mfi_info_host host; + struct mfi_info_device device; + + /* Firmware components that are present and active. */ + uint32_t image_check_word; + uint32_t image_component_count; + struct mfi_info_component image_component[8]; + + /* Firmware components that have been flashed but are inactive */ + uint32_t pending_image_component_count; + struct mfi_info_component pending_image_component[8]; + + uint8_t max_arms; + uint8_t max_spans; + uint8_t max_arrays; + uint8_t max_lds; + char product_name[80]; + char serial_number[32]; + uint32_t hw_present; +#define MFI_INFO_HW_BBU 0x01 +#define MFI_INFO_HW_ALARM 0x02 +#define MFI_INFO_HW_NVRAM 0x04 +#define MFI_INFO_HW_UART 0x08 +#define MFI_INFO_HW_MEM 0x10 +#define MFI_INFO_HW_FLASH 0x20 + uint32_t current_fw_time; + uint16_t max_cmds; + uint16_t max_sg_elements; + uint32_t max_request_size; + uint16_t lds_present; + uint16_t lds_degraded; + uint16_t lds_offline; + uint16_t pd_present; + uint16_t pd_disks_present; + uint16_t pd_disks_pred_failure; + uint16_t pd_disks_failed; + uint16_t nvram_size; + uint16_t memory_size; + uint16_t flash_size; + uint16_t ram_correctable_errors; + uint16_t ram_uncorrectable_errors; + uint8_t cluster_allowed; + uint8_t cluster_active; + uint16_t max_strips_per_io; + + uint32_t raid_levels; +#define MFI_INFO_RAID_0 0x01 +#define MFI_INFO_RAID_1 0x02 +#define MFI_INFO_RAID_5 0x04 +#define MFI_INFO_RAID_1E 0x08 +#define MFI_INFO_RAID_6 0x10 + + uint32_t adapter_ops; +#define MFI_INFO_AOPS_RBLD_RATE 0x0001 +#define MFI_INFO_AOPS_CC_RATE 0x0002 +#define MFI_INFO_AOPS_BGI_RATE 0x0004 +#define MFI_INFO_AOPS_RECON_RATE 0x0008 +#define MFI_INFO_AOPS_PATROL_RATE 0x0010 +#define MFI_INFO_AOPS_ALARM_CONTROL 0x0020 +#define MFI_INFO_AOPS_CLUSTER_SUPPORTED 0x0040 +#define MFI_INFO_AOPS_BBU 0x0080 +#define MFI_INFO_AOPS_SPANNING_ALLOWED 0x0100 +#define MFI_INFO_AOPS_DEDICATED_SPARES 0x0200 +#define MFI_INFO_AOPS_REVERTIBLE_SPARES 0x0400 +#define MFI_INFO_AOPS_FOREIGN_IMPORT 0x0800 +#define MFI_INFO_AOPS_SELF_DIAGNOSTIC 0x1000 +#define MFI_INFO_AOPS_MIXED_ARRAY 0x2000 +#define MFI_INFO_AOPS_GLOBAL_SPARES 0x4000 + + uint32_t ld_ops; +#define MFI_INFO_LDOPS_READ_POLICY 0x01 +#define MFI_INFO_LDOPS_WRITE_POLICY 0x02 +#define MFI_INFO_LDOPS_IO_POLICY 0x04 +#define MFI_INFO_LDOPS_ACCESS_POLICY 0x08 +#define MFI_INFO_LDOPS_DISK_CACHE_POLICY 0x10 + + struct { + uint8_t min; + uint8_t max; + uint8_t reserved[2]; + } QEMU_PACKED stripe_sz_ops; + + uint32_t pd_ops; +#define MFI_INFO_PDOPS_FORCE_ONLINE 0x01 +#define MFI_INFO_PDOPS_FORCE_OFFLINE 0x02 +#define MFI_INFO_PDOPS_FORCE_REBUILD 0x04 + + uint32_t pd_mix_support; +#define MFI_INFO_PDMIX_SAS 0x01 +#define MFI_INFO_PDMIX_SATA 0x02 +#define MFI_INFO_PDMIX_ENCL 0x04 +#define MFI_INFO_PDMIX_LD 0x08 +#define MFI_INFO_PDMIX_SATA_CLUSTER 0x10 + + uint8_t ecc_bucket_count; + uint8_t reserved2[11]; + struct mfi_ctrl_props properties; + char package_version[0x60]; + uint8_t pad[0x800 - 0x6a0]; +} QEMU_PACKED; + +/* keep track of an event. */ +union mfi_evt { + struct { + uint16_t locale; + uint8_t reserved; + int8_t class; + } members; + uint32_t word; +} QEMU_PACKED; + +/* event log state. */ +struct mfi_evt_log_state { + uint32_t newest_seq_num; + uint32_t oldest_seq_num; + uint32_t clear_seq_num; + uint32_t shutdown_seq_num; + uint32_t boot_seq_num; +} QEMU_PACKED; + +struct mfi_progress { + uint16_t progress; + uint16_t elapsed_seconds; +} QEMU_PACKED; + +struct mfi_evt_ld { + uint16_t target_id; + uint8_t ld_index; + uint8_t reserved; +} QEMU_PACKED; + +struct mfi_evt_pd { + uint16_t device_id; + uint8_t enclosure_index; + uint8_t slot_number; +} QEMU_PACKED; + +/* event detail, returned from MFI_DCMD_CTRL_EVENT_WAIT. */ +struct mfi_evt_detail { + uint32_t seq; + uint32_t time; + uint32_t code; + union mfi_evt class; + uint8_t arg_type; + uint8_t reserved1[15]; + + union { + struct { + struct mfi_evt_pd pd; + uint8_t cdb_len; + uint8_t sense_len; + uint8_t reserved[2]; + uint8_t cdb[16]; + uint8_t sense[64]; + } cdb_sense; + + struct mfi_evt_ld ld; + + struct { + struct mfi_evt_ld ld; + uint64_t count; + } ld_count; + + struct { + uint64_t lba; + struct mfi_evt_ld ld; + } ld_lba; + + struct { + struct mfi_evt_ld ld; + uint32_t pre_owner; + uint32_t new_owner; + } ld_owner; + + struct { + uint64_t ld_lba; + uint64_t pd_lba; + struct mfi_evt_ld ld; + struct mfi_evt_pd pd; + } ld_lba_pd_lba; + + struct { + struct mfi_evt_ld ld; + struct mfi_progress prog; + } ld_prog; + + struct { + struct mfi_evt_ld ld; + uint32_t prev_state; + uint32_t new_state; + } ld_state; + + struct { + uint64_t strip; + struct mfi_evt_ld ld; + } ld_strip; + + struct mfi_evt_pd pd; + + struct { + struct mfi_evt_pd pd; + uint32_t err; + } pd_err; + + struct { + uint64_t lba; + struct mfi_evt_pd pd; + } pd_lba; + + struct { + uint64_t lba; + struct mfi_evt_pd pd; + struct mfi_evt_ld ld; + } pd_lba_ld; + + struct { + struct mfi_evt_pd pd; + struct mfi_progress prog; + } pd_prog; + + struct { + struct mfi_evt_pd ld; + uint32_t prev_state; + uint32_t new_state; + } pd_state; + + struct { + uint16_t venderId; + uint16_t deviceId; + uint16_t subVenderId; + uint16_t subDeviceId; + } pci; + + uint32_t rate; + + char str[96]; + + struct { + uint32_t rtc; + uint16_t elapsedSeconds; + } time; + + struct { + uint32_t ecar; + uint32_t elog; + char str[64]; + } ecc; + + uint8_t b[96]; + uint16_t s[48]; + uint32_t w[24]; + uint64_t d[12]; + } args; + + char description[128]; +} QEMU_PACKED; + +struct mfi_evt_list { + uint32_t count; + uint32_t reserved; + struct mfi_evt_detail event[1]; +} QEMU_PACKED; + +union mfi_pd_ref { + struct { + uint16_t device_id; + uint16_t seq_num; + } v; + uint32_t ref; +} QEMU_PACKED; + +union mfi_pd_ddf_type { + struct { + uint16_t pd_type; +#define MFI_PD_DDF_TYPE_FORCED_PD_GUID (1 << 0) +#define MFI_PD_DDF_TYPE_IN_VD (1 << 1) +#define MFI_PD_DDF_TYPE_IS_GLOBAL_SPARE (1 << 2) +#define MFI_PD_DDF_TYPE_IS_SPARE (1 << 3) +#define MFI_PD_DDF_TYPE_IS_FOREIGN (1 << 4) +#define MFI_PD_DDF_TYPE_INTF_SPI (1 << 12) +#define MFI_PD_DDF_TYPE_INTF_SAS (1 << 13) +#define MFI_PD_DDF_TYPE_INTF_SATA1 (1 << 14) +#define MFI_PD_DDF_TYPE_INTF_SATA3G (1 << 15) + uint16_t reserved; + } ddf; + struct { + uint32_t reserved; + } non_disk; + uint32_t type; +} QEMU_PACKED; + +struct mfi_pd_progress { + uint32_t active; +#define PD_PROGRESS_ACTIVE_REBUILD (1 << 0) +#define PD_PROGRESS_ACTIVE_PATROL (1 << 1) +#define PD_PROGRESS_ACTIVE_CLEAR (1 << 2) + struct mfi_progress rbld; + struct mfi_progress patrol; + struct mfi_progress clear; + struct mfi_progress reserved[4]; +} QEMU_PACKED; + +struct mfi_pd_info { + union mfi_pd_ref ref; + uint8_t inquiry_data[96]; + uint8_t vpd_page83[64]; + uint8_t not_supported; + uint8_t scsi_dev_type; + uint8_t connected_port_bitmap; + uint8_t device_speed; + uint32_t media_err_count; + uint32_t other_err_count; + uint32_t pred_fail_count; + uint32_t last_pred_fail_event_seq_num; + uint16_t fw_state; + uint8_t disable_for_removal; + uint8_t link_speed; + union mfi_pd_ddf_type state; + struct { + uint8_t count; + uint8_t is_path_broken; + uint8_t reserved[6]; + uint64_t sas_addr[4]; + } path_info; + uint64_t raw_size; + uint64_t non_coerced_size; + uint64_t coerced_size; + uint16_t encl_device_id; + uint8_t encl_index; + uint8_t slot_number; + struct mfi_pd_progress prog_info; + uint8_t bad_block_table_full; + uint8_t unusable_in_current_config; + uint8_t vpd_page83_ext[64]; + uint8_t reserved[512-358]; +} QEMU_PACKED; + +struct mfi_pd_address { + uint16_t device_id; + uint16_t encl_device_id; + uint8_t encl_index; + uint8_t slot_number; + uint8_t scsi_dev_type; + uint8_t connect_port_bitmap; + uint64_t sas_addr[2]; +} QEMU_PACKED; + +#define MFI_MAX_SYS_PDS 240 +struct mfi_pd_list { + uint32_t size; + uint32_t count; + struct mfi_pd_address addr[MFI_MAX_SYS_PDS]; +} QEMU_PACKED; + +union mfi_ld_ref { + struct { + uint8_t target_id; + uint8_t reserved; + uint16_t seq; + } v; + uint32_t ref; +} QEMU_PACKED; + +struct mfi_ld_list { + uint32_t ld_count; + uint32_t reserved1; + struct { + union mfi_ld_ref ld; + uint8_t state; + uint8_t reserved2[3]; + uint64_t size; + } ld_list[MFI_MAX_LD]; +} QEMU_PACKED; + +struct mfi_ld_targetid_list { + uint32_t size; + uint32_t ld_count; + uint8_t pad[3]; + uint8_t targetid[MFI_MAX_LD]; +} QEMU_PACKED; + +enum mfi_ld_access { + MFI_LD_ACCESS_RW = 0, + MFI_LD_ACCSSS_RO = 2, + MFI_LD_ACCESS_BLOCKED = 3, +}; +#define MFI_LD_ACCESS_MASK 3 + +enum mfi_ld_state { + MFI_LD_STATE_OFFLINE = 0, + MFI_LD_STATE_PARTIALLY_DEGRADED = 1, + MFI_LD_STATE_DEGRADED = 2, + MFI_LD_STATE_OPTIMAL = 3 +}; + +enum mfi_syspd_state { + MFI_PD_STATE_UNCONFIGURED_GOOD = 0x00, + MFI_PD_STATE_UNCONFIGURED_BAD = 0x01, + MFI_PD_STATE_HOT_SPARE = 0x02, + MFI_PD_STATE_OFFLINE = 0x10, + MFI_PD_STATE_FAILED = 0x11, + MFI_PD_STATE_REBUILD = 0x14, + MFI_PD_STATE_ONLINE = 0x18, + MFI_PD_STATE_COPYBACK = 0x20, + MFI_PD_STATE_SYSTEM = 0x40 +}; + +struct mfi_ld_props { + union mfi_ld_ref ld; + char name[16]; + uint8_t default_cache_policy; + uint8_t access_policy; + uint8_t disk_cache_policy; + uint8_t current_cache_policy; + uint8_t no_bgi; + uint8_t reserved[7]; +} QEMU_PACKED; + +struct mfi_ld_params { + uint8_t primary_raid_level; + uint8_t raid_level_qualifier; + uint8_t secondary_raid_level; + uint8_t stripe_size; + uint8_t num_drives; + uint8_t span_depth; + uint8_t state; + uint8_t init_state; + uint8_t is_consistent; + uint8_t reserved[23]; +} QEMU_PACKED; + +struct mfi_ld_progress { + uint32_t active; +#define MFI_LD_PROGRESS_CC (1<<0) +#define MFI_LD_PROGRESS_BGI (1<<1) +#define MFI_LD_PROGRESS_FGI (1<<2) +#define MFI_LD_PORGRESS_RECON (1<<3) + struct mfi_progress cc; + struct mfi_progress bgi; + struct mfi_progress fgi; + struct mfi_progress recon; + struct mfi_progress reserved[4]; +} QEMU_PACKED; + +struct mfi_span { + uint64_t start_block; + uint64_t num_blocks; + uint16_t array_ref; + uint8_t reserved[6]; +} QEMU_PACKED; + +#define MFI_MAX_SPAN_DEPTH 8 +struct mfi_ld_config { + struct mfi_ld_props properties; + struct mfi_ld_params params; + struct mfi_span span[MFI_MAX_SPAN_DEPTH]; +} QEMU_PACKED; + +struct mfi_ld_info { + struct mfi_ld_config ld_config; + uint64_t size; + struct mfi_ld_progress progress; + uint16_t cluster_owner; + uint8_t reconstruct_active; + uint8_t reserved1[1]; + uint8_t vpd_page83[64]; + uint8_t reserved2[16]; +} QEMU_PACKED; + +union mfi_spare_type { + uint8_t flags; +#define MFI_SPARE_IS_DEDICATED (1 << 0) +#define MFI_SPARE_IS_REVERTABLE (1 << 1) +#define MFI_SPARE_IS_ENCL_AFFINITY (1 << 2) + uint8_t type; +} QEMU_PACKED; + +#define MFI_MAX_ARRAYS 16 +struct mfi_spare { + union mfi_pd_ref ref; + union mfi_spare_type spare_type; + uint8_t reserved[2]; + uint8_t array_count; + uint16_t array_refd[MFI_MAX_ARRAYS]; +} QEMU_PACKED; + +#define MFI_MAX_ROW_SIZE 32 +struct mfi_array { + uint64_t size; + uint8_t num_drives; + uint8_t reserved; + uint16_t array_ref; + uint8_t pad[20]; + struct { + union mfi_pd_ref ref; + uint16_t fw_state; /* enum mfi_syspd_state */ + struct { + uint8_t pd; + uint8_t slot; + } encl; + } pd[MFI_MAX_ROW_SIZE]; +} QEMU_PACKED; + +struct mfi_config_data { + uint32_t size; + uint16_t array_count; + uint16_t array_size; + uint16_t log_drv_count; + uint16_t log_drv_size; + uint16_t spares_count; + uint16_t spares_size; + uint8_t reserved[16]; + /* + struct mfi_array array[]; + struct mfi_ld_config ld[]; + struct mfi_spare spare[]; + */ +} QEMU_PACKED; + +#define MFI_SCSI_MAX_TARGETS 128 +#define MFI_SCSI_MAX_LUNS 8 +#define MFI_SCSI_INITIATOR_ID 255 +#define MFI_SCSI_MAX_CMDS 8 +#define MFI_SCSI_MAX_CDB_LEN 16 + +#endif /* MFI_REG_H */ diff --git a/qemu/hw/scsi/scsi-bus.c b/qemu/hw/scsi/scsi-bus.c new file mode 100644 index 000000000..f0ae4625f --- /dev/null +++ b/qemu/hw/scsi/scsi-bus.c @@ -0,0 +1,2049 @@ +#include "hw/hw.h" +#include "qemu/error-report.h" +#include "hw/scsi/scsi.h" +#include "block/scsi.h" +#include "hw/qdev.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "trace.h" +#include "sysemu/dma.h" + +static char *scsibus_get_dev_path(DeviceState *dev); +static char *scsibus_get_fw_dev_path(DeviceState *dev); +static void scsi_req_dequeue(SCSIRequest *req); +static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len); +static void scsi_target_free_buf(SCSIRequest *req); + +static Property scsi_props[] = { + DEFINE_PROP_UINT32("channel", SCSIDevice, channel, 0), + DEFINE_PROP_UINT32("scsi-id", SCSIDevice, id, -1), + DEFINE_PROP_UINT32("lun", SCSIDevice, lun, -1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void scsi_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + + k->get_dev_path = scsibus_get_dev_path; + k->get_fw_dev_path = scsibus_get_fw_dev_path; + hc->unplug = qdev_simple_device_unplug_cb; +} + +static const TypeInfo scsi_bus_info = { + .name = TYPE_SCSI_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(SCSIBus), + .class_init = scsi_bus_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; +static int next_scsi_bus; + +static void scsi_device_realize(SCSIDevice *s, Error **errp) +{ + SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); + if (sc->realize) { + sc->realize(s, errp); + } +} + +int scsi_bus_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf, + void *hba_private) +{ + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus); + int rc; + + assert(cmd->len == 0); + rc = scsi_req_parse_cdb(dev, cmd, buf); + if (bus->info->parse_cdb) { + rc = bus->info->parse_cdb(dev, cmd, buf, hba_private); + } + return rc; +} + +static SCSIRequest *scsi_device_alloc_req(SCSIDevice *s, uint32_t tag, uint32_t lun, + uint8_t *buf, void *hba_private) +{ + SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); + if (sc->alloc_req) { + return sc->alloc_req(s, tag, lun, buf, hba_private); + } + + return NULL; +} + +void scsi_device_unit_attention_reported(SCSIDevice *s) +{ + SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); + if (sc->unit_attention_reported) { + sc->unit_attention_reported(s); + } +} + +/* Create a scsi bus, and attach devices to it. */ +void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, + const SCSIBusInfo *info, const char *bus_name) +{ + qbus_create_inplace(bus, bus_size, TYPE_SCSI_BUS, host, bus_name); + bus->busnr = next_scsi_bus++; + bus->info = info; + qbus_set_bus_hotplug_handler(BUS(bus), &error_abort); +} + +static void scsi_dma_restart_bh(void *opaque) +{ + SCSIDevice *s = opaque; + SCSIRequest *req, *next; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + scsi_req_ref(req); + if (req->retry) { + req->retry = false; + switch (req->cmd.mode) { + case SCSI_XFER_FROM_DEV: + case SCSI_XFER_TO_DEV: + scsi_req_continue(req); + break; + case SCSI_XFER_NONE: + scsi_req_dequeue(req); + scsi_req_enqueue(req); + break; + } + } + scsi_req_unref(req); + } +} + +void scsi_req_retry(SCSIRequest *req) +{ + /* No need to save a reference, because scsi_dma_restart_bh just + * looks at the request list. */ + req->retry = true; +} + +static void scsi_dma_restart_cb(void *opaque, int running, RunState state) +{ + SCSIDevice *s = opaque; + + if (!running) { + return; + } + if (!s->bh) { + s->bh = qemu_bh_new(scsi_dma_restart_bh, s); + qemu_bh_schedule(s->bh); + } +} + +static void scsi_qdev_realize(DeviceState *qdev, Error **errp) +{ + SCSIDevice *dev = SCSI_DEVICE(qdev); + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus); + SCSIDevice *d; + Error *local_err = NULL; + + if (dev->channel > bus->info->max_channel) { + error_setg(errp, "bad scsi channel id: %d", dev->channel); + return; + } + if (dev->id != -1 && dev->id > bus->info->max_target) { + error_setg(errp, "bad scsi device id: %d", dev->id); + return; + } + if (dev->lun != -1 && dev->lun > bus->info->max_lun) { + error_setg(errp, "bad scsi device lun: %d", dev->lun); + return; + } + + if (dev->id == -1) { + int id = -1; + if (dev->lun == -1) { + dev->lun = 0; + } + do { + d = scsi_device_find(bus, dev->channel, ++id, dev->lun); + } while (d && d->lun == dev->lun && id < bus->info->max_target); + if (d && d->lun == dev->lun) { + error_setg(errp, "no free target"); + return; + } + dev->id = id; + } else if (dev->lun == -1) { + int lun = -1; + do { + d = scsi_device_find(bus, dev->channel, dev->id, ++lun); + } while (d && d->lun == lun && lun < bus->info->max_lun); + if (d && d->lun == lun) { + error_setg(errp, "no free lun"); + return; + } + dev->lun = lun; + } else { + d = scsi_device_find(bus, dev->channel, dev->id, dev->lun); + assert(d); + if (d->lun == dev->lun && dev != d) { + error_setg(errp, "lun already used by '%s'", d->qdev.id); + return; + } + } + + QTAILQ_INIT(&dev->requests); + scsi_device_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + dev->vmsentry = qemu_add_vm_change_state_handler(scsi_dma_restart_cb, + dev); +} + +static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp) +{ + SCSIDevice *dev = SCSI_DEVICE(qdev); + + if (dev->vmsentry) { + qemu_del_vm_change_state_handler(dev->vmsentry); + } + + scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE)); + blockdev_mark_auto_del(dev->conf.blk); +} + +/* handle legacy '-drive if=scsi,...' cmd line args */ +SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, + int unit, bool removable, int bootindex, + const char *serial, Error **errp) +{ + const char *driver; + char *name; + DeviceState *dev; + Error *err = NULL; + + driver = blk_is_sg(blk) ? "scsi-generic" : "scsi-disk"; + dev = qdev_create(&bus->qbus, driver); + name = g_strdup_printf("legacy[%d]", unit); + object_property_add_child(OBJECT(bus), name, OBJECT(dev), NULL); + g_free(name); + + qdev_prop_set_uint32(dev, "scsi-id", unit); + if (bootindex >= 0) { + object_property_set_int(OBJECT(dev), bootindex, "bootindex", + &error_abort); + } + if (object_property_find(OBJECT(dev), "removable", NULL)) { + qdev_prop_set_bit(dev, "removable", removable); + } + if (serial && object_property_find(OBJECT(dev), "serial", NULL)) { + qdev_prop_set_string(dev, "serial", serial); + } + qdev_prop_set_drive(dev, "drive", blk, &err); + if (err) { + error_propagate(errp, err); + object_unparent(OBJECT(dev)); + return NULL; + } + object_property_set_bool(OBJECT(dev), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + object_unparent(OBJECT(dev)); + return NULL; + } + return SCSI_DEVICE(dev); +} + +void scsi_bus_legacy_handle_cmdline(SCSIBus *bus, Error **errp) +{ + Location loc; + DriveInfo *dinfo; + int unit; + Error *err = NULL; + + loc_push_none(&loc); + for (unit = 0; unit <= bus->info->max_target; unit++) { + dinfo = drive_get(IF_SCSI, bus->busnr, unit); + if (dinfo == NULL) { + continue; + } + qemu_opts_loc_restore(dinfo->opts); + scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo), + unit, false, -1, NULL, &err); + if (err != NULL) { + error_propagate(errp, err); + break; + } + } + loc_pop(&loc); +} + +static int32_t scsi_invalid_field(SCSIRequest *req, uint8_t *buf) +{ + scsi_req_build_sense(req, SENSE_CODE(INVALID_FIELD)); + scsi_req_complete(req, CHECK_CONDITION); + return 0; +} + +static const struct SCSIReqOps reqops_invalid_field = { + .size = sizeof(SCSIRequest), + .send_command = scsi_invalid_field +}; + +/* SCSIReqOps implementation for invalid commands. */ + +static int32_t scsi_invalid_command(SCSIRequest *req, uint8_t *buf) +{ + scsi_req_build_sense(req, SENSE_CODE(INVALID_OPCODE)); + scsi_req_complete(req, CHECK_CONDITION); + return 0; +} + +static const struct SCSIReqOps reqops_invalid_opcode = { + .size = sizeof(SCSIRequest), + .send_command = scsi_invalid_command +}; + +/* SCSIReqOps implementation for unit attention conditions. */ + +static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) +{ + if (req->dev->unit_attention.key == UNIT_ATTENTION) { + scsi_req_build_sense(req, req->dev->unit_attention); + } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { + scsi_req_build_sense(req, req->bus->unit_attention); + } + scsi_req_complete(req, CHECK_CONDITION); + return 0; +} + +static const struct SCSIReqOps reqops_unit_attention = { + .size = sizeof(SCSIRequest), + .send_command = scsi_unit_attention +}; + +/* SCSIReqOps implementation for REPORT LUNS and for commands sent to + an invalid LUN. */ + +typedef struct SCSITargetReq SCSITargetReq; + +struct SCSITargetReq { + SCSIRequest req; + int len; + uint8_t *buf; + int buf_len; +}; + +static void store_lun(uint8_t *outbuf, int lun) +{ + if (lun < 256) { + outbuf[1] = lun; + return; + } + outbuf[1] = (lun & 255); + outbuf[0] = (lun >> 8) | 0x40; +} + +static bool scsi_target_emulate_report_luns(SCSITargetReq *r) +{ + BusChild *kid; + int i, len, n; + int channel, id; + bool found_lun0; + + if (r->req.cmd.xfer < 16) { + return false; + } + if (r->req.cmd.buf[2] > 2) { + return false; + } + channel = r->req.dev->channel; + id = r->req.dev->id; + found_lun0 = false; + n = 0; + QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) { + DeviceState *qdev = kid->child; + SCSIDevice *dev = SCSI_DEVICE(qdev); + + if (dev->channel == channel && dev->id == id) { + if (dev->lun == 0) { + found_lun0 = true; + } + n += 8; + } + } + if (!found_lun0) { + n += 8; + } + + scsi_target_alloc_buf(&r->req, n + 8); + + len = MIN(n + 8, r->req.cmd.xfer & ~7); + memset(r->buf, 0, len); + stl_be_p(&r->buf[0], n); + i = found_lun0 ? 8 : 16; + QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) { + DeviceState *qdev = kid->child; + SCSIDevice *dev = SCSI_DEVICE(qdev); + + if (dev->channel == channel && dev->id == id) { + store_lun(&r->buf[i], dev->lun); + i += 8; + } + } + assert(i == n + 8); + r->len = len; + return true; +} + +static bool scsi_target_emulate_inquiry(SCSITargetReq *r) +{ + assert(r->req.dev->lun != r->req.lun); + + scsi_target_alloc_buf(&r->req, SCSI_INQUIRY_LEN); + + if (r->req.cmd.buf[1] & 0x2) { + /* Command support data - optional, not implemented */ + return false; + } + + if (r->req.cmd.buf[1] & 0x1) { + /* Vital product data */ + uint8_t page_code = r->req.cmd.buf[2]; + r->buf[r->len++] = page_code ; /* this page */ + r->buf[r->len++] = 0x00; + + switch (page_code) { + case 0x00: /* Supported page codes, mandatory */ + { + int pages; + pages = r->len++; + r->buf[r->len++] = 0x00; /* list of supported pages (this page) */ + r->buf[pages] = r->len - pages - 1; /* number of pages */ + break; + } + default: + return false; + } + /* done with EVPD */ + assert(r->len < r->buf_len); + r->len = MIN(r->req.cmd.xfer, r->len); + return true; + } + + /* Standard INQUIRY data */ + if (r->req.cmd.buf[2] != 0) { + return false; + } + + /* PAGE CODE == 0 */ + r->len = MIN(r->req.cmd.xfer, SCSI_INQUIRY_LEN); + memset(r->buf, 0, r->len); + if (r->req.lun != 0) { + r->buf[0] = TYPE_NO_LUN; + } else { + r->buf[0] = TYPE_NOT_PRESENT | TYPE_INACTIVE; + r->buf[2] = 5; /* Version */ + r->buf[3] = 2 | 0x10; /* HiSup, response data format */ + r->buf[4] = r->len - 5; /* Additional Length = (Len - 1) - 4 */ + r->buf[7] = 0x10 | (r->req.bus->info->tcq ? 0x02 : 0); /* Sync, TCQ. */ + memcpy(&r->buf[8], "QEMU ", 8); + memcpy(&r->buf[16], "QEMU TARGET ", 16); + pstrcpy((char *) &r->buf[32], 4, qemu_get_version()); + } + return true; +} + +static int32_t scsi_target_send_command(SCSIRequest *req, uint8_t *buf) +{ + SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req); + + switch (buf[0]) { + case REPORT_LUNS: + if (!scsi_target_emulate_report_luns(r)) { + goto illegal_request; + } + break; + case INQUIRY: + if (!scsi_target_emulate_inquiry(r)) { + goto illegal_request; + } + break; + case REQUEST_SENSE: + scsi_target_alloc_buf(&r->req, SCSI_SENSE_LEN); + r->len = scsi_device_get_sense(r->req.dev, r->buf, + MIN(req->cmd.xfer, r->buf_len), + (req->cmd.buf[1] & 1) == 0); + if (r->req.dev->sense_is_ua) { + scsi_device_unit_attention_reported(req->dev); + r->req.dev->sense_len = 0; + r->req.dev->sense_is_ua = false; + } + break; + case TEST_UNIT_READY: + break; + default: + scsi_req_build_sense(req, SENSE_CODE(LUN_NOT_SUPPORTED)); + scsi_req_complete(req, CHECK_CONDITION); + return 0; + illegal_request: + scsi_req_build_sense(req, SENSE_CODE(INVALID_FIELD)); + scsi_req_complete(req, CHECK_CONDITION); + return 0; + } + + if (!r->len) { + scsi_req_complete(req, GOOD); + } + return r->len; +} + +static void scsi_target_read_data(SCSIRequest *req) +{ + SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req); + uint32_t n; + + n = r->len; + if (n > 0) { + r->len = 0; + scsi_req_data(&r->req, n); + } else { + scsi_req_complete(&r->req, GOOD); + } +} + +static uint8_t *scsi_target_get_buf(SCSIRequest *req) +{ + SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req); + + return r->buf; +} + +static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len) +{ + SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req); + + r->buf = g_malloc(len); + r->buf_len = len; + + return r->buf; +} + +static void scsi_target_free_buf(SCSIRequest *req) +{ + SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req); + + g_free(r->buf); +} + +static const struct SCSIReqOps reqops_target_command = { + .size = sizeof(SCSITargetReq), + .send_command = scsi_target_send_command, + .read_data = scsi_target_read_data, + .get_buf = scsi_target_get_buf, + .free_req = scsi_target_free_buf, +}; + + +SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, + uint32_t tag, uint32_t lun, void *hba_private) +{ + SCSIRequest *req; + SCSIBus *bus = scsi_bus_from_device(d); + BusState *qbus = BUS(bus); + const int memset_off = offsetof(SCSIRequest, sense) + + sizeof(req->sense); + + req = g_slice_alloc(reqops->size); + memset((uint8_t *)req + memset_off, 0, reqops->size - memset_off); + req->refcount = 1; + req->bus = bus; + req->dev = d; + req->tag = tag; + req->lun = lun; + req->hba_private = hba_private; + req->status = -1; + req->ops = reqops; + object_ref(OBJECT(d)); + object_ref(OBJECT(qbus->parent)); + notifier_list_init(&req->cancel_notifiers); + trace_scsi_req_alloc(req->dev->id, req->lun, req->tag); + return req; +} + +SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun, + uint8_t *buf, void *hba_private) +{ + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, d->qdev.parent_bus); + const SCSIReqOps *ops; + SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(d); + SCSIRequest *req; + SCSICommand cmd = { .len = 0 }; + int ret; + + if ((d->unit_attention.key == UNIT_ATTENTION || + bus->unit_attention.key == UNIT_ATTENTION) && + (buf[0] != INQUIRY && + buf[0] != REPORT_LUNS && + buf[0] != GET_CONFIGURATION && + buf[0] != GET_EVENT_STATUS_NOTIFICATION && + + /* + * If we already have a pending unit attention condition, + * report this one before triggering another one. + */ + !(buf[0] == REQUEST_SENSE && d->sense_is_ua))) { + ops = &reqops_unit_attention; + } else if (lun != d->lun || + buf[0] == REPORT_LUNS || + (buf[0] == REQUEST_SENSE && d->sense_len)) { + ops = &reqops_target_command; + } else { + ops = NULL; + } + + if (ops != NULL || !sc->parse_cdb) { + ret = scsi_req_parse_cdb(d, &cmd, buf); + } else { + ret = sc->parse_cdb(d, &cmd, buf, hba_private); + } + + if (ret != 0) { + trace_scsi_req_parse_bad(d->id, lun, tag, buf[0]); + req = scsi_req_alloc(&reqops_invalid_opcode, d, tag, lun, hba_private); + } else { + assert(cmd.len != 0); + trace_scsi_req_parsed(d->id, lun, tag, buf[0], + cmd.mode, cmd.xfer); + if (cmd.lba != -1) { + trace_scsi_req_parsed_lba(d->id, lun, tag, buf[0], + cmd.lba); + } + + if (cmd.xfer > INT32_MAX) { + req = scsi_req_alloc(&reqops_invalid_field, d, tag, lun, hba_private); + } else if (ops) { + req = scsi_req_alloc(ops, d, tag, lun, hba_private); + } else { + req = scsi_device_alloc_req(d, tag, lun, buf, hba_private); + } + } + + req->cmd = cmd; + req->resid = req->cmd.xfer; + + switch (buf[0]) { + case INQUIRY: + trace_scsi_inquiry(d->id, lun, tag, cmd.buf[1], cmd.buf[2]); + break; + case TEST_UNIT_READY: + trace_scsi_test_unit_ready(d->id, lun, tag); + break; + case REPORT_LUNS: + trace_scsi_report_luns(d->id, lun, tag); + break; + case REQUEST_SENSE: + trace_scsi_request_sense(d->id, lun, tag); + break; + default: + break; + } + + return req; +} + +uint8_t *scsi_req_get_buf(SCSIRequest *req) +{ + return req->ops->get_buf(req); +} + +static void scsi_clear_unit_attention(SCSIRequest *req) +{ + SCSISense *ua; + if (req->dev->unit_attention.key != UNIT_ATTENTION && + req->bus->unit_attention.key != UNIT_ATTENTION) { + return; + } + + /* + * If an INQUIRY command enters the enabled command state, + * the device server shall [not] clear any unit attention condition; + * See also MMC-6, paragraphs 6.5 and 6.6.2. + */ + if (req->cmd.buf[0] == INQUIRY || + req->cmd.buf[0] == GET_CONFIGURATION || + req->cmd.buf[0] == GET_EVENT_STATUS_NOTIFICATION) { + return; + } + + if (req->dev->unit_attention.key == UNIT_ATTENTION) { + ua = &req->dev->unit_attention; + } else { + ua = &req->bus->unit_attention; + } + + /* + * If a REPORT LUNS command enters the enabled command state, [...] + * the device server shall clear any pending unit attention condition + * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. + */ + if (req->cmd.buf[0] == REPORT_LUNS && + !(ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && + ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq)) { + return; + } + + *ua = SENSE_CODE(NO_SENSE); +} + +int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len) +{ + int ret; + + assert(len >= 14); + if (!req->sense_len) { + return 0; + } + + ret = scsi_build_sense(req->sense, req->sense_len, buf, len, true); + + /* + * FIXME: clearing unit attention conditions upon autosense should be done + * only if the UA_INTLCK_CTRL field in the Control mode page is set to 00b + * (SAM-5, 5.14). + * + * We assume UA_INTLCK_CTRL to be 00b for HBAs that support autosense, and + * 10b for HBAs that do not support it (do not call scsi_req_get_sense). + * Here we handle unit attention clearing for UA_INTLCK_CTRL == 00b. + */ + if (req->dev->sense_is_ua) { + scsi_device_unit_attention_reported(req->dev); + req->dev->sense_len = 0; + req->dev->sense_is_ua = false; + } + return ret; +} + +int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed) +{ + return scsi_build_sense(dev->sense, dev->sense_len, buf, len, fixed); +} + +void scsi_req_build_sense(SCSIRequest *req, SCSISense sense) +{ + trace_scsi_req_build_sense(req->dev->id, req->lun, req->tag, + sense.key, sense.asc, sense.ascq); + memset(req->sense, 0, 18); + req->sense[0] = 0x70; + req->sense[2] = sense.key; + req->sense[7] = 10; + req->sense[12] = sense.asc; + req->sense[13] = sense.ascq; + req->sense_len = 18; +} + +static void scsi_req_enqueue_internal(SCSIRequest *req) +{ + assert(!req->enqueued); + scsi_req_ref(req); + if (req->bus->info->get_sg_list) { + req->sg = req->bus->info->get_sg_list(req); + } else { + req->sg = NULL; + } + req->enqueued = true; + QTAILQ_INSERT_TAIL(&req->dev->requests, req, next); +} + +int32_t scsi_req_enqueue(SCSIRequest *req) +{ + int32_t rc; + + assert(!req->retry); + scsi_req_enqueue_internal(req); + scsi_req_ref(req); + rc = req->ops->send_command(req, req->cmd.buf); + scsi_req_unref(req); + return rc; +} + +static void scsi_req_dequeue(SCSIRequest *req) +{ + trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag); + req->retry = false; + if (req->enqueued) { + QTAILQ_REMOVE(&req->dev->requests, req, next); + req->enqueued = false; + scsi_req_unref(req); + } +} + +static int scsi_get_performance_length(int num_desc, int type, int data_type) +{ + /* MMC-6, paragraph 6.7. */ + switch (type) { + case 0: + if ((data_type & 3) == 0) { + /* Each descriptor is as in Table 295 - Nominal performance. */ + return 16 * num_desc + 8; + } else { + /* Each descriptor is as in Table 296 - Exceptions. */ + return 6 * num_desc + 8; + } + case 1: + case 4: + case 5: + return 8 * num_desc + 8; + case 2: + return 2048 * num_desc + 8; + case 3: + return 16 * num_desc + 8; + default: + return 8; + } +} + +static int ata_passthrough_xfer_unit(SCSIDevice *dev, uint8_t *buf) +{ + int byte_block = (buf[2] >> 2) & 0x1; + int type = (buf[2] >> 4) & 0x1; + int xfer_unit; + + if (byte_block) { + if (type) { + xfer_unit = dev->blocksize; + } else { + xfer_unit = 512; + } + } else { + xfer_unit = 1; + } + + return xfer_unit; +} + +static int ata_passthrough_12_xfer(SCSIDevice *dev, uint8_t *buf) +{ + int length = buf[2] & 0x3; + int xfer; + int unit = ata_passthrough_xfer_unit(dev, buf); + + switch (length) { + case 0: + case 3: /* USB-specific. */ + default: + xfer = 0; + break; + case 1: + xfer = buf[3]; + break; + case 2: + xfer = buf[4]; + break; + } + + return xfer * unit; +} + +static int ata_passthrough_16_xfer(SCSIDevice *dev, uint8_t *buf) +{ + int extend = buf[1] & 0x1; + int length = buf[2] & 0x3; + int xfer; + int unit = ata_passthrough_xfer_unit(dev, buf); + + switch (length) { + case 0: + case 3: /* USB-specific. */ + default: + xfer = 0; + break; + case 1: + xfer = buf[4]; + xfer |= (extend ? buf[3] << 8 : 0); + break; + case 2: + xfer = buf[6]; + xfer |= (extend ? buf[5] << 8 : 0); + break; + } + + return xfer * unit; +} + +uint32_t scsi_data_cdb_xfer(uint8_t *buf) +{ + if ((buf[0] >> 5) == 0 && buf[4] == 0) { + return 256; + } else { + return scsi_cdb_xfer(buf); + } +} + +uint32_t scsi_cdb_xfer(uint8_t *buf) +{ + switch (buf[0] >> 5) { + case 0: + return buf[4]; + break; + case 1: + case 2: + return lduw_be_p(&buf[7]); + break; + case 4: + return ldl_be_p(&buf[10]) & 0xffffffffULL; + break; + case 5: + return ldl_be_p(&buf[6]) & 0xffffffffULL; + break; + default: + return -1; + } +} + +static int scsi_req_xfer(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf) +{ + cmd->xfer = scsi_cdb_xfer(buf); + switch (buf[0]) { + case TEST_UNIT_READY: + case REWIND: + case START_STOP: + case SET_CAPACITY: + case WRITE_FILEMARKS: + case WRITE_FILEMARKS_16: + case SPACE: + case RESERVE: + case RELEASE: + case ERASE: + case ALLOW_MEDIUM_REMOVAL: + case SEEK_10: + case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: + case LOCATE_16: + case LOCK_UNLOCK_CACHE: + case SET_CD_SPEED: + case SET_LIMITS: + case WRITE_LONG_10: + case UPDATE_BLOCK: + case RESERVE_TRACK: + case SET_READ_AHEAD: + case PRE_FETCH: + case PRE_FETCH_16: + case ALLOW_OVERWRITE: + cmd->xfer = 0; + break; + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + if ((buf[1] & 2) == 0) { + cmd->xfer = 0; + } else if ((buf[1] & 4) != 0) { + cmd->xfer = 1; + } + cmd->xfer *= dev->blocksize; + break; + case MODE_SENSE: + break; + case WRITE_SAME_10: + case WRITE_SAME_16: + cmd->xfer = dev->blocksize; + break; + case READ_CAPACITY_10: + cmd->xfer = 8; + break; + case READ_BLOCK_LIMITS: + cmd->xfer = 6; + break; + case SEND_VOLUME_TAG: + /* GPCMD_SET_STREAMING from multimedia commands. */ + if (dev->type == TYPE_ROM) { + cmd->xfer = buf[10] | (buf[9] << 8); + } else { + cmd->xfer = buf[9] | (buf[8] << 8); + } + break; + case WRITE_6: + /* length 0 means 256 blocks */ + if (cmd->xfer == 0) { + cmd->xfer = 256; + } + /* fall through */ + case WRITE_10: + case WRITE_VERIFY_10: + case WRITE_12: + case WRITE_VERIFY_12: + case WRITE_16: + case WRITE_VERIFY_16: + cmd->xfer *= dev->blocksize; + break; + case READ_6: + case READ_REVERSE: + /* length 0 means 256 blocks */ + if (cmd->xfer == 0) { + cmd->xfer = 256; + } + /* fall through */ + case READ_10: + case RECOVER_BUFFERED_DATA: + case READ_12: + case READ_16: + cmd->xfer *= dev->blocksize; + break; + case FORMAT_UNIT: + /* MMC mandates the parameter list to be 12-bytes long. Parameters + * for block devices are restricted to the header right now. */ + if (dev->type == TYPE_ROM && (buf[1] & 16)) { + cmd->xfer = 12; + } else { + cmd->xfer = (buf[1] & 16) == 0 ? 0 : (buf[1] & 32 ? 8 : 4); + } + break; + case INQUIRY: + case RECEIVE_DIAGNOSTIC: + case SEND_DIAGNOSTIC: + cmd->xfer = buf[4] | (buf[3] << 8); + break; + case READ_CD: + case READ_BUFFER: + case WRITE_BUFFER: + case SEND_CUE_SHEET: + cmd->xfer = buf[8] | (buf[7] << 8) | (buf[6] << 16); + break; + case PERSISTENT_RESERVE_OUT: + cmd->xfer = ldl_be_p(&buf[5]) & 0xffffffffULL; + break; + case ERASE_12: + if (dev->type == TYPE_ROM) { + /* MMC command GET PERFORMANCE. */ + cmd->xfer = scsi_get_performance_length(buf[9] | (buf[8] << 8), + buf[10], buf[1] & 0x1f); + } + break; + case MECHANISM_STATUS: + case READ_DVD_STRUCTURE: + case SEND_DVD_STRUCTURE: + case MAINTENANCE_OUT: + case MAINTENANCE_IN: + if (dev->type == TYPE_ROM) { + /* GPCMD_REPORT_KEY and GPCMD_SEND_KEY from multi media commands */ + cmd->xfer = buf[9] | (buf[8] << 8); + } + break; + case ATA_PASSTHROUGH_12: + if (dev->type == TYPE_ROM) { + /* BLANK command of MMC */ + cmd->xfer = 0; + } else { + cmd->xfer = ata_passthrough_12_xfer(dev, buf); + } + break; + case ATA_PASSTHROUGH_16: + cmd->xfer = ata_passthrough_16_xfer(dev, buf); + break; + } + return 0; +} + +static int scsi_req_stream_xfer(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf) +{ + switch (buf[0]) { + /* stream commands */ + case ERASE_12: + case ERASE_16: + cmd->xfer = 0; + break; + case READ_6: + case READ_REVERSE: + case RECOVER_BUFFERED_DATA: + case WRITE_6: + cmd->xfer = buf[4] | (buf[3] << 8) | (buf[2] << 16); + if (buf[1] & 0x01) { /* fixed */ + cmd->xfer *= dev->blocksize; + } + break; + case READ_16: + case READ_REVERSE_16: + case VERIFY_16: + case WRITE_16: + cmd->xfer = buf[14] | (buf[13] << 8) | (buf[12] << 16); + if (buf[1] & 0x01) { /* fixed */ + cmd->xfer *= dev->blocksize; + } + break; + case REWIND: + case LOAD_UNLOAD: + cmd->xfer = 0; + break; + case SPACE_16: + cmd->xfer = buf[13] | (buf[12] << 8); + break; + case READ_POSITION: + switch (buf[1] & 0x1f) /* operation code */ { + case SHORT_FORM_BLOCK_ID: + case SHORT_FORM_VENDOR_SPECIFIC: + cmd->xfer = 20; + break; + case LONG_FORM: + cmd->xfer = 32; + break; + case EXTENDED_FORM: + cmd->xfer = buf[8] | (buf[7] << 8); + break; + default: + return -1; + } + + break; + case FORMAT_UNIT: + cmd->xfer = buf[4] | (buf[3] << 8); + break; + /* generic commands */ + default: + return scsi_req_xfer(cmd, dev, buf); + } + return 0; +} + +static int scsi_req_medium_changer_xfer(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf) +{ + switch (buf[0]) { + /* medium changer commands */ + case EXCHANGE_MEDIUM: + case INITIALIZE_ELEMENT_STATUS: + case INITIALIZE_ELEMENT_STATUS_WITH_RANGE: + case MOVE_MEDIUM: + case POSITION_TO_ELEMENT: + cmd->xfer = 0; + break; + case READ_ELEMENT_STATUS: + cmd->xfer = buf[9] | (buf[8] << 8) | (buf[7] << 16); + break; + + /* generic commands */ + default: + return scsi_req_xfer(cmd, dev, buf); + } + return 0; +} + + +static void scsi_cmd_xfer_mode(SCSICommand *cmd) +{ + if (!cmd->xfer) { + cmd->mode = SCSI_XFER_NONE; + return; + } + switch (cmd->buf[0]) { + case WRITE_6: + case WRITE_10: + case WRITE_VERIFY_10: + case WRITE_12: + case WRITE_VERIFY_12: + case WRITE_16: + case WRITE_VERIFY_16: + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + case COPY: + case COPY_VERIFY: + case COMPARE: + case CHANGE_DEFINITION: + case LOG_SELECT: + case MODE_SELECT: + case MODE_SELECT_10: + case SEND_DIAGNOSTIC: + case WRITE_BUFFER: + case FORMAT_UNIT: + case REASSIGN_BLOCKS: + case SEARCH_EQUAL: + case SEARCH_HIGH: + case SEARCH_LOW: + case UPDATE_BLOCK: + case WRITE_LONG_10: + case WRITE_SAME_10: + case WRITE_SAME_16: + case UNMAP: + case SEARCH_HIGH_12: + case SEARCH_EQUAL_12: + case SEARCH_LOW_12: + case MEDIUM_SCAN: + case SEND_VOLUME_TAG: + case SEND_CUE_SHEET: + case SEND_DVD_STRUCTURE: + case PERSISTENT_RESERVE_OUT: + case MAINTENANCE_OUT: + cmd->mode = SCSI_XFER_TO_DEV; + break; + case ATA_PASSTHROUGH_12: + case ATA_PASSTHROUGH_16: + /* T_DIR */ + cmd->mode = (cmd->buf[2] & 0x8) ? + SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV; + break; + default: + cmd->mode = SCSI_XFER_FROM_DEV; + break; + } +} + +static uint64_t scsi_cmd_lba(SCSICommand *cmd) +{ + uint8_t *buf = cmd->buf; + uint64_t lba; + + switch (buf[0] >> 5) { + case 0: + lba = ldl_be_p(&buf[0]) & 0x1fffff; + break; + case 1: + case 2: + case 5: + lba = ldl_be_p(&buf[2]) & 0xffffffffULL; + break; + case 4: + lba = ldq_be_p(&buf[2]); + break; + default: + lba = -1; + + } + return lba; +} + +int scsi_cdb_length(uint8_t *buf) { + int cdb_len; + + switch (buf[0] >> 5) { + case 0: + cdb_len = 6; + break; + case 1: + case 2: + cdb_len = 10; + break; + case 4: + cdb_len = 16; + break; + case 5: + cdb_len = 12; + break; + default: + cdb_len = -1; + } + return cdb_len; +} + +int scsi_req_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf) +{ + int rc; + int len; + + cmd->lba = -1; + len = scsi_cdb_length(buf); + if (len < 0) { + return -1; + } + + cmd->len = len; + switch (dev->type) { + case TYPE_TAPE: + rc = scsi_req_stream_xfer(cmd, dev, buf); + break; + case TYPE_MEDIUM_CHANGER: + rc = scsi_req_medium_changer_xfer(cmd, dev, buf); + break; + default: + rc = scsi_req_xfer(cmd, dev, buf); + break; + } + + if (rc != 0) + return rc; + + memcpy(cmd->buf, buf, cmd->len); + scsi_cmd_xfer_mode(cmd); + cmd->lba = scsi_cmd_lba(cmd); + return 0; +} + +void scsi_device_report_change(SCSIDevice *dev, SCSISense sense) +{ + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, dev->qdev.parent_bus); + + scsi_device_set_ua(dev, sense); + if (bus->info->change) { + bus->info->change(bus, dev, sense); + } +} + +/* + * Predefined sense codes + */ + +/* No sense data available */ +const struct SCSISense sense_code_NO_SENSE = { + .key = NO_SENSE , .asc = 0x00 , .ascq = 0x00 +}; + +/* LUN not ready, Manual intervention required */ +const struct SCSISense sense_code_LUN_NOT_READY = { + .key = NOT_READY, .asc = 0x04, .ascq = 0x03 +}; + +/* LUN not ready, Medium not present */ +const struct SCSISense sense_code_NO_MEDIUM = { + .key = NOT_READY, .asc = 0x3a, .ascq = 0x00 +}; + +/* LUN not ready, medium removal prevented */ +const struct SCSISense sense_code_NOT_READY_REMOVAL_PREVENTED = { + .key = NOT_READY, .asc = 0x53, .ascq = 0x02 +}; + +/* Hardware error, internal target failure */ +const struct SCSISense sense_code_TARGET_FAILURE = { + .key = HARDWARE_ERROR, .asc = 0x44, .ascq = 0x00 +}; + +/* Illegal request, invalid command operation code */ +const struct SCSISense sense_code_INVALID_OPCODE = { + .key = ILLEGAL_REQUEST, .asc = 0x20, .ascq = 0x00 +}; + +/* Illegal request, LBA out of range */ +const struct SCSISense sense_code_LBA_OUT_OF_RANGE = { + .key = ILLEGAL_REQUEST, .asc = 0x21, .ascq = 0x00 +}; + +/* Illegal request, Invalid field in CDB */ +const struct SCSISense sense_code_INVALID_FIELD = { + .key = ILLEGAL_REQUEST, .asc = 0x24, .ascq = 0x00 +}; + +/* Illegal request, Invalid field in parameter list */ +const struct SCSISense sense_code_INVALID_PARAM = { + .key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x00 +}; + +/* Illegal request, Parameter list length error */ +const struct SCSISense sense_code_INVALID_PARAM_LEN = { + .key = ILLEGAL_REQUEST, .asc = 0x1a, .ascq = 0x00 +}; + +/* Illegal request, LUN not supported */ +const struct SCSISense sense_code_LUN_NOT_SUPPORTED = { + .key = ILLEGAL_REQUEST, .asc = 0x25, .ascq = 0x00 +}; + +/* Illegal request, Saving parameters not supported */ +const struct SCSISense sense_code_SAVING_PARAMS_NOT_SUPPORTED = { + .key = ILLEGAL_REQUEST, .asc = 0x39, .ascq = 0x00 +}; + +/* Illegal request, Incompatible medium installed */ +const struct SCSISense sense_code_INCOMPATIBLE_FORMAT = { + .key = ILLEGAL_REQUEST, .asc = 0x30, .ascq = 0x00 +}; + +/* Illegal request, medium removal prevented */ +const struct SCSISense sense_code_ILLEGAL_REQ_REMOVAL_PREVENTED = { + .key = ILLEGAL_REQUEST, .asc = 0x53, .ascq = 0x02 +}; + +/* Illegal request, Invalid Transfer Tag */ +const struct SCSISense sense_code_INVALID_TAG = { + .key = ILLEGAL_REQUEST, .asc = 0x4b, .ascq = 0x01 +}; + +/* Command aborted, I/O process terminated */ +const struct SCSISense sense_code_IO_ERROR = { + .key = ABORTED_COMMAND, .asc = 0x00, .ascq = 0x06 +}; + +/* Command aborted, I_T Nexus loss occurred */ +const struct SCSISense sense_code_I_T_NEXUS_LOSS = { + .key = ABORTED_COMMAND, .asc = 0x29, .ascq = 0x07 +}; + +/* Command aborted, Logical Unit failure */ +const struct SCSISense sense_code_LUN_FAILURE = { + .key = ABORTED_COMMAND, .asc = 0x3e, .ascq = 0x01 +}; + +/* Command aborted, Overlapped Commands Attempted */ +const struct SCSISense sense_code_OVERLAPPED_COMMANDS = { + .key = ABORTED_COMMAND, .asc = 0x4e, .ascq = 0x00 +}; + +/* Unit attention, Capacity data has changed */ +const struct SCSISense sense_code_CAPACITY_CHANGED = { + .key = UNIT_ATTENTION, .asc = 0x2a, .ascq = 0x09 +}; + +/* Unit attention, Power on, reset or bus device reset occurred */ +const struct SCSISense sense_code_RESET = { + .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x00 +}; + +/* Unit attention, No medium */ +const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM = { + .key = UNIT_ATTENTION, .asc = 0x3a, .ascq = 0x00 +}; + +/* Unit attention, Medium may have changed */ +const struct SCSISense sense_code_MEDIUM_CHANGED = { + .key = UNIT_ATTENTION, .asc = 0x28, .ascq = 0x00 +}; + +/* Unit attention, Reported LUNs data has changed */ +const struct SCSISense sense_code_REPORTED_LUNS_CHANGED = { + .key = UNIT_ATTENTION, .asc = 0x3f, .ascq = 0x0e +}; + +/* Unit attention, Device internal reset */ +const struct SCSISense sense_code_DEVICE_INTERNAL_RESET = { + .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x04 +}; + +/* Data Protection, Write Protected */ +const struct SCSISense sense_code_WRITE_PROTECTED = { + .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x00 +}; + +/* Data Protection, Space Allocation Failed Write Protect */ +const struct SCSISense sense_code_SPACE_ALLOC_FAILED = { + .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x07 +}; + +/* + * scsi_build_sense + * + * Convert between fixed and descriptor sense buffers + */ +int scsi_build_sense(uint8_t *in_buf, int in_len, + uint8_t *buf, int len, bool fixed) +{ + bool fixed_in; + SCSISense sense; + if (!fixed && len < 8) { + return 0; + } + + if (in_len == 0) { + sense.key = NO_SENSE; + sense.asc = 0; + sense.ascq = 0; + } else { + fixed_in = (in_buf[0] & 2) == 0; + + if (fixed == fixed_in) { + memcpy(buf, in_buf, MIN(len, in_len)); + return MIN(len, in_len); + } + + if (fixed_in) { + sense.key = in_buf[2]; + sense.asc = in_buf[12]; + sense.ascq = in_buf[13]; + } else { + sense.key = in_buf[1]; + sense.asc = in_buf[2]; + sense.ascq = in_buf[3]; + } + } + + memset(buf, 0, len); + if (fixed) { + /* Return fixed format sense buffer */ + buf[0] = 0x70; + buf[2] = sense.key; + buf[7] = 10; + buf[12] = sense.asc; + buf[13] = sense.ascq; + return MIN(len, SCSI_SENSE_LEN); + } else { + /* Return descriptor format sense buffer */ + buf[0] = 0x72; + buf[1] = sense.key; + buf[2] = sense.asc; + buf[3] = sense.ascq; + return 8; + } +} + +const char *scsi_command_name(uint8_t cmd) +{ + static const char *names[] = { + [ TEST_UNIT_READY ] = "TEST_UNIT_READY", + [ REWIND ] = "REWIND", + [ REQUEST_SENSE ] = "REQUEST_SENSE", + [ FORMAT_UNIT ] = "FORMAT_UNIT", + [ READ_BLOCK_LIMITS ] = "READ_BLOCK_LIMITS", + [ REASSIGN_BLOCKS ] = "REASSIGN_BLOCKS/INITIALIZE ELEMENT STATUS", + /* LOAD_UNLOAD and INITIALIZE_ELEMENT_STATUS use the same operation code */ + [ READ_6 ] = "READ_6", + [ WRITE_6 ] = "WRITE_6", + [ SET_CAPACITY ] = "SET_CAPACITY", + [ READ_REVERSE ] = "READ_REVERSE", + [ WRITE_FILEMARKS ] = "WRITE_FILEMARKS", + [ SPACE ] = "SPACE", + [ INQUIRY ] = "INQUIRY", + [ RECOVER_BUFFERED_DATA ] = "RECOVER_BUFFERED_DATA", + [ MAINTENANCE_IN ] = "MAINTENANCE_IN", + [ MAINTENANCE_OUT ] = "MAINTENANCE_OUT", + [ MODE_SELECT ] = "MODE_SELECT", + [ RESERVE ] = "RESERVE", + [ RELEASE ] = "RELEASE", + [ COPY ] = "COPY", + [ ERASE ] = "ERASE", + [ MODE_SENSE ] = "MODE_SENSE", + [ START_STOP ] = "START_STOP/LOAD_UNLOAD", + /* LOAD_UNLOAD and START_STOP use the same operation code */ + [ RECEIVE_DIAGNOSTIC ] = "RECEIVE_DIAGNOSTIC", + [ SEND_DIAGNOSTIC ] = "SEND_DIAGNOSTIC", + [ ALLOW_MEDIUM_REMOVAL ] = "ALLOW_MEDIUM_REMOVAL", + [ READ_CAPACITY_10 ] = "READ_CAPACITY_10", + [ READ_10 ] = "READ_10", + [ WRITE_10 ] = "WRITE_10", + [ SEEK_10 ] = "SEEK_10/POSITION_TO_ELEMENT", + /* SEEK_10 and POSITION_TO_ELEMENT use the same operation code */ + [ WRITE_VERIFY_10 ] = "WRITE_VERIFY_10", + [ VERIFY_10 ] = "VERIFY_10", + [ SEARCH_HIGH ] = "SEARCH_HIGH", + [ SEARCH_EQUAL ] = "SEARCH_EQUAL", + [ SEARCH_LOW ] = "SEARCH_LOW", + [ SET_LIMITS ] = "SET_LIMITS", + [ PRE_FETCH ] = "PRE_FETCH/READ_POSITION", + /* READ_POSITION and PRE_FETCH use the same operation code */ + [ SYNCHRONIZE_CACHE ] = "SYNCHRONIZE_CACHE", + [ LOCK_UNLOCK_CACHE ] = "LOCK_UNLOCK_CACHE", + [ READ_DEFECT_DATA ] = "READ_DEFECT_DATA/INITIALIZE_ELEMENT_STATUS_WITH_RANGE", + /* READ_DEFECT_DATA and INITIALIZE_ELEMENT_STATUS_WITH_RANGE use the same operation code */ + [ MEDIUM_SCAN ] = "MEDIUM_SCAN", + [ COMPARE ] = "COMPARE", + [ COPY_VERIFY ] = "COPY_VERIFY", + [ WRITE_BUFFER ] = "WRITE_BUFFER", + [ READ_BUFFER ] = "READ_BUFFER", + [ UPDATE_BLOCK ] = "UPDATE_BLOCK", + [ READ_LONG_10 ] = "READ_LONG_10", + [ WRITE_LONG_10 ] = "WRITE_LONG_10", + [ CHANGE_DEFINITION ] = "CHANGE_DEFINITION", + [ WRITE_SAME_10 ] = "WRITE_SAME_10", + [ UNMAP ] = "UNMAP", + [ READ_TOC ] = "READ_TOC", + [ REPORT_DENSITY_SUPPORT ] = "REPORT_DENSITY_SUPPORT", + [ SANITIZE ] = "SANITIZE", + [ GET_CONFIGURATION ] = "GET_CONFIGURATION", + [ LOG_SELECT ] = "LOG_SELECT", + [ LOG_SENSE ] = "LOG_SENSE", + [ MODE_SELECT_10 ] = "MODE_SELECT_10", + [ RESERVE_10 ] = "RESERVE_10", + [ RELEASE_10 ] = "RELEASE_10", + [ MODE_SENSE_10 ] = "MODE_SENSE_10", + [ PERSISTENT_RESERVE_IN ] = "PERSISTENT_RESERVE_IN", + [ PERSISTENT_RESERVE_OUT ] = "PERSISTENT_RESERVE_OUT", + [ WRITE_FILEMARKS_16 ] = "WRITE_FILEMARKS_16", + [ EXTENDED_COPY ] = "EXTENDED_COPY", + [ ATA_PASSTHROUGH_16 ] = "ATA_PASSTHROUGH_16", + [ ACCESS_CONTROL_IN ] = "ACCESS_CONTROL_IN", + [ ACCESS_CONTROL_OUT ] = "ACCESS_CONTROL_OUT", + [ READ_16 ] = "READ_16", + [ COMPARE_AND_WRITE ] = "COMPARE_AND_WRITE", + [ WRITE_16 ] = "WRITE_16", + [ WRITE_VERIFY_16 ] = "WRITE_VERIFY_16", + [ VERIFY_16 ] = "VERIFY_16", + [ PRE_FETCH_16 ] = "PRE_FETCH_16", + [ SYNCHRONIZE_CACHE_16 ] = "SPACE_16/SYNCHRONIZE_CACHE_16", + /* SPACE_16 and SYNCHRONIZE_CACHE_16 use the same operation code */ + [ LOCATE_16 ] = "LOCATE_16", + [ WRITE_SAME_16 ] = "ERASE_16/WRITE_SAME_16", + /* ERASE_16 and WRITE_SAME_16 use the same operation code */ + [ SERVICE_ACTION_IN_16 ] = "SERVICE_ACTION_IN_16", + [ WRITE_LONG_16 ] = "WRITE_LONG_16", + [ REPORT_LUNS ] = "REPORT_LUNS", + [ ATA_PASSTHROUGH_12 ] = "BLANK/ATA_PASSTHROUGH_12", + [ MOVE_MEDIUM ] = "MOVE_MEDIUM", + [ EXCHANGE_MEDIUM ] = "EXCHANGE MEDIUM", + [ READ_12 ] = "READ_12", + [ WRITE_12 ] = "WRITE_12", + [ ERASE_12 ] = "ERASE_12/GET_PERFORMANCE", + /* ERASE_12 and GET_PERFORMANCE use the same operation code */ + [ SERVICE_ACTION_IN_12 ] = "SERVICE_ACTION_IN_12", + [ WRITE_VERIFY_12 ] = "WRITE_VERIFY_12", + [ VERIFY_12 ] = "VERIFY_12", + [ SEARCH_HIGH_12 ] = "SEARCH_HIGH_12", + [ SEARCH_EQUAL_12 ] = "SEARCH_EQUAL_12", + [ SEARCH_LOW_12 ] = "SEARCH_LOW_12", + [ READ_ELEMENT_STATUS ] = "READ_ELEMENT_STATUS", + [ SEND_VOLUME_TAG ] = "SEND_VOLUME_TAG/SET_STREAMING", + /* SEND_VOLUME_TAG and SET_STREAMING use the same operation code */ + [ READ_CD ] = "READ_CD", + [ READ_DEFECT_DATA_12 ] = "READ_DEFECT_DATA_12", + [ READ_DVD_STRUCTURE ] = "READ_DVD_STRUCTURE", + [ RESERVE_TRACK ] = "RESERVE_TRACK", + [ SEND_CUE_SHEET ] = "SEND_CUE_SHEET", + [ SEND_DVD_STRUCTURE ] = "SEND_DVD_STRUCTURE", + [ SET_CD_SPEED ] = "SET_CD_SPEED", + [ SET_READ_AHEAD ] = "SET_READ_AHEAD", + [ ALLOW_OVERWRITE ] = "ALLOW_OVERWRITE", + [ MECHANISM_STATUS ] = "MECHANISM_STATUS", + [ GET_EVENT_STATUS_NOTIFICATION ] = "GET_EVENT_STATUS_NOTIFICATION", + [ READ_DISC_INFORMATION ] = "READ_DISC_INFORMATION", + }; + + if (cmd >= ARRAY_SIZE(names) || names[cmd] == NULL) + return "*UNKNOWN*"; + return names[cmd]; +} + +SCSIRequest *scsi_req_ref(SCSIRequest *req) +{ + assert(req->refcount > 0); + req->refcount++; + return req; +} + +void scsi_req_unref(SCSIRequest *req) +{ + assert(req->refcount > 0); + if (--req->refcount == 0) { + BusState *qbus = req->dev->qdev.parent_bus; + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, qbus); + + if (bus->info->free_request && req->hba_private) { + bus->info->free_request(bus, req->hba_private); + } + if (req->ops->free_req) { + req->ops->free_req(req); + } + object_unref(OBJECT(req->dev)); + object_unref(OBJECT(qbus->parent)); + g_slice_free1(req->ops->size, req); + } +} + +/* Tell the device that we finished processing this chunk of I/O. It + will start the next chunk or complete the command. */ +void scsi_req_continue(SCSIRequest *req) +{ + if (req->io_canceled) { + trace_scsi_req_continue_canceled(req->dev->id, req->lun, req->tag); + return; + } + trace_scsi_req_continue(req->dev->id, req->lun, req->tag); + if (req->cmd.mode == SCSI_XFER_TO_DEV) { + req->ops->write_data(req); + } else { + req->ops->read_data(req); + } +} + +/* Called by the devices when data is ready for the HBA. The HBA should + start a DMA operation to read or fill the device's data buffer. + Once it completes, calling scsi_req_continue will restart I/O. */ +void scsi_req_data(SCSIRequest *req, int len) +{ + uint8_t *buf; + if (req->io_canceled) { + trace_scsi_req_data_canceled(req->dev->id, req->lun, req->tag, len); + return; + } + trace_scsi_req_data(req->dev->id, req->lun, req->tag, len); + assert(req->cmd.mode != SCSI_XFER_NONE); + if (!req->sg) { + req->resid -= len; + req->bus->info->transfer_data(req, len); + return; + } + + /* If the device calls scsi_req_data and the HBA specified a + * scatter/gather list, the transfer has to happen in a single + * step. */ + assert(!req->dma_started); + req->dma_started = true; + + buf = scsi_req_get_buf(req); + if (req->cmd.mode == SCSI_XFER_FROM_DEV) { + req->resid = dma_buf_read(buf, len, req->sg); + } else { + req->resid = dma_buf_write(buf, len, req->sg); + } + scsi_req_continue(req); +} + +void scsi_req_print(SCSIRequest *req) +{ + FILE *fp = stderr; + int i; + + fprintf(fp, "[%s id=%d] %s", + req->dev->qdev.parent_bus->name, + req->dev->id, + scsi_command_name(req->cmd.buf[0])); + for (i = 1; i < req->cmd.len; i++) { + fprintf(fp, " 0x%02x", req->cmd.buf[i]); + } + switch (req->cmd.mode) { + case SCSI_XFER_NONE: + fprintf(fp, " - none\n"); + break; + case SCSI_XFER_FROM_DEV: + fprintf(fp, " - from-dev len=%zd\n", req->cmd.xfer); + break; + case SCSI_XFER_TO_DEV: + fprintf(fp, " - to-dev len=%zd\n", req->cmd.xfer); + break; + default: + fprintf(fp, " - Oops\n"); + break; + } +} + +void scsi_req_complete(SCSIRequest *req, int status) +{ + assert(req->status == -1); + req->status = status; + + assert(req->sense_len <= sizeof(req->sense)); + if (status == GOOD) { + req->sense_len = 0; + } + + if (req->sense_len) { + memcpy(req->dev->sense, req->sense, req->sense_len); + req->dev->sense_len = req->sense_len; + req->dev->sense_is_ua = (req->ops == &reqops_unit_attention); + } else { + req->dev->sense_len = 0; + req->dev->sense_is_ua = false; + } + + /* + * Unit attention state is now stored in the device's sense buffer + * if the HBA didn't do autosense. Clear the pending unit attention + * flags. + */ + scsi_clear_unit_attention(req); + + scsi_req_ref(req); + scsi_req_dequeue(req); + req->bus->info->complete(req, req->status, req->resid); + + /* Cancelled requests might end up being completed instead of cancelled */ + notifier_list_notify(&req->cancel_notifiers, req); + scsi_req_unref(req); +} + +/* Called by the devices when the request is canceled. */ +void scsi_req_cancel_complete(SCSIRequest *req) +{ + assert(req->io_canceled); + if (req->bus->info->cancel) { + req->bus->info->cancel(req); + } + notifier_list_notify(&req->cancel_notifiers, req); + scsi_req_unref(req); +} + +/* Cancel @req asynchronously. @notifier is added to @req's cancellation + * notifier list, the bus will be notified the requests cancellation is + * completed. + * */ +void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier) +{ + trace_scsi_req_cancel(req->dev->id, req->lun, req->tag); + if (notifier) { + notifier_list_add(&req->cancel_notifiers, notifier); + } + if (req->io_canceled) { + return; + } + scsi_req_ref(req); + scsi_req_dequeue(req); + req->io_canceled = true; + if (req->aiocb) { + blk_aio_cancel_async(req->aiocb); + } else { + scsi_req_cancel_complete(req); + } +} + +void scsi_req_cancel(SCSIRequest *req) +{ + trace_scsi_req_cancel(req->dev->id, req->lun, req->tag); + if (!req->enqueued) { + return; + } + scsi_req_ref(req); + scsi_req_dequeue(req); + req->io_canceled = true; + if (req->aiocb) { + blk_aio_cancel(req->aiocb); + } else { + scsi_req_cancel_complete(req); + } +} + +static int scsi_ua_precedence(SCSISense sense) +{ + if (sense.key != UNIT_ATTENTION) { + return INT_MAX; + } + if (sense.asc == 0x29 && sense.ascq == 0x04) { + /* DEVICE INTERNAL RESET goes with POWER ON OCCURRED */ + return 1; + } else if (sense.asc == 0x3F && sense.ascq == 0x01) { + /* MICROCODE HAS BEEN CHANGED goes with SCSI BUS RESET OCCURRED */ + return 2; + } else if (sense.asc == 0x29 && (sense.ascq == 0x05 || sense.ascq == 0x06)) { + /* These two go with "all others". */ + ; + } else if (sense.asc == 0x29 && sense.ascq <= 0x07) { + /* POWER ON, RESET OR BUS DEVICE RESET OCCURRED = 0 + * POWER ON OCCURRED = 1 + * SCSI BUS RESET OCCURRED = 2 + * BUS DEVICE RESET FUNCTION OCCURRED = 3 + * I_T NEXUS LOSS OCCURRED = 7 + */ + return sense.ascq; + } else if (sense.asc == 0x2F && sense.ascq == 0x01) { + /* COMMANDS CLEARED BY POWER LOSS NOTIFICATION */ + return 8; + } + return (sense.asc << 8) | sense.ascq; +} + +void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense) +{ + int prec1, prec2; + if (sense.key != UNIT_ATTENTION) { + return; + } + trace_scsi_device_set_ua(sdev->id, sdev->lun, sense.key, + sense.asc, sense.ascq); + + /* + * Override a pre-existing unit attention condition, except for a more + * important reset condition. + */ + prec1 = scsi_ua_precedence(sdev->unit_attention); + prec2 = scsi_ua_precedence(sense); + if (prec2 < prec1) { + sdev->unit_attention = sense; + } +} + +void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) +{ + SCSIRequest *req; + + while (!QTAILQ_EMPTY(&sdev->requests)) { + req = QTAILQ_FIRST(&sdev->requests); + scsi_req_cancel(req); + } + + scsi_device_set_ua(sdev, sense); +} + +static char *scsibus_get_dev_path(DeviceState *dev) +{ + SCSIDevice *d = DO_UPCAST(SCSIDevice, qdev, dev); + DeviceState *hba = dev->parent_bus->parent; + char *id; + char *path; + + id = qdev_get_dev_path(hba); + if (id) { + path = g_strdup_printf("%s/%d:%d:%d", id, d->channel, d->id, d->lun); + } else { + path = g_strdup_printf("%d:%d:%d", d->channel, d->id, d->lun); + } + g_free(id); + return path; +} + +static char *scsibus_get_fw_dev_path(DeviceState *dev) +{ + SCSIDevice *d = SCSI_DEVICE(dev); + return g_strdup_printf("channel@%x/%s@%x,%x", d->channel, + qdev_fw_name(dev), d->id, d->lun); +} + +SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun) +{ + BusChild *kid; + SCSIDevice *target_dev = NULL; + + QTAILQ_FOREACH_REVERSE(kid, &bus->qbus.children, ChildrenHead, sibling) { + DeviceState *qdev = kid->child; + SCSIDevice *dev = SCSI_DEVICE(qdev); + + if (dev->channel == channel && dev->id == id) { + if (dev->lun == lun) { + return dev; + } + target_dev = dev; + } + } + return target_dev; +} + +/* SCSI request list. For simplicity, pv points to the whole device */ + +static void put_scsi_requests(QEMUFile *f, void *pv, size_t size) +{ + SCSIDevice *s = pv; + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); + SCSIRequest *req; + + QTAILQ_FOREACH(req, &s->requests, next) { + assert(!req->io_canceled); + assert(req->status == -1); + assert(req->enqueued); + + qemu_put_sbyte(f, req->retry ? 1 : 2); + qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); + qemu_put_be32s(f, &req->tag); + qemu_put_be32s(f, &req->lun); + if (bus->info->save_request) { + bus->info->save_request(f, req); + } + if (req->ops->save_request) { + req->ops->save_request(f, req); + } + } + qemu_put_sbyte(f, 0); +} + +static int get_scsi_requests(QEMUFile *f, void *pv, size_t size) +{ + SCSIDevice *s = pv; + SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); + int8_t sbyte; + + while ((sbyte = qemu_get_sbyte(f)) > 0) { + uint8_t buf[SCSI_CMD_BUF_SIZE]; + uint32_t tag; + uint32_t lun; + SCSIRequest *req; + + qemu_get_buffer(f, buf, sizeof(buf)); + qemu_get_be32s(f, &tag); + qemu_get_be32s(f, &lun); + req = scsi_req_new(s, tag, lun, buf, NULL); + req->retry = (sbyte == 1); + if (bus->info->load_request) { + req->hba_private = bus->info->load_request(f, req); + } + if (req->ops->load_request) { + req->ops->load_request(f, req); + } + + /* Just restart it later. */ + scsi_req_enqueue_internal(req); + + /* At this point, the request will be kept alive by the reference + * added by scsi_req_enqueue_internal, so we can release our reference. + * The HBA of course will add its own reference in the load_request + * callback if it needs to hold on the SCSIRequest. + */ + scsi_req_unref(req); + } + + return 0; +} + +static const VMStateInfo vmstate_info_scsi_requests = { + .name = "scsi-requests", + .get = get_scsi_requests, + .put = put_scsi_requests, +}; + +static bool scsi_sense_state_needed(void *opaque) +{ + SCSIDevice *s = opaque; + + return s->sense_len > SCSI_SENSE_BUF_SIZE_OLD; +} + +static const VMStateDescription vmstate_scsi_sense_state = { + .name = "SCSIDevice/sense", + .version_id = 1, + .minimum_version_id = 1, + .needed = scsi_sense_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_SUB_ARRAY(sense, SCSIDevice, + SCSI_SENSE_BUF_SIZE_OLD, + SCSI_SENSE_BUF_SIZE - SCSI_SENSE_BUF_SIZE_OLD), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_scsi_device = { + .name = "SCSIDevice", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(unit_attention.key, SCSIDevice), + VMSTATE_UINT8(unit_attention.asc, SCSIDevice), + VMSTATE_UINT8(unit_attention.ascq, SCSIDevice), + VMSTATE_BOOL(sense_is_ua, SCSIDevice), + VMSTATE_UINT8_SUB_ARRAY(sense, SCSIDevice, 0, SCSI_SENSE_BUF_SIZE_OLD), + VMSTATE_UINT32(sense_len, SCSIDevice), + { + .name = "requests", + .version_id = 0, + .field_exists = NULL, + .size = 0, /* ouch */ + .info = &vmstate_info_scsi_requests, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_scsi_sense_state, + NULL + } +}; + +static void scsi_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *k = DEVICE_CLASS(klass); + set_bit(DEVICE_CATEGORY_STORAGE, k->categories); + k->bus_type = TYPE_SCSI_BUS; + k->realize = scsi_qdev_realize; + k->unrealize = scsi_qdev_unrealize; + k->props = scsi_props; +} + +static void scsi_dev_instance_init(Object *obj) +{ + DeviceState *dev = DEVICE(obj); + SCSIDevice *s = DO_UPCAST(SCSIDevice, qdev, dev); + + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", NULL, + &s->qdev, NULL); +} + +static const TypeInfo scsi_device_type_info = { + .name = TYPE_SCSI_DEVICE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SCSIDevice), + .abstract = true, + .class_size = sizeof(SCSIDeviceClass), + .class_init = scsi_device_class_init, + .instance_init = scsi_dev_instance_init, +}; + +static void scsi_register_types(void) +{ + type_register_static(&scsi_bus_info); + type_register_static(&scsi_device_type_info); +} + +type_init(scsi_register_types) diff --git a/qemu/hw/scsi/scsi-disk.c b/qemu/hw/scsi/scsi-disk.c new file mode 100644 index 000000000..64f069473 --- /dev/null +++ b/qemu/hw/scsi/scsi-disk.c @@ -0,0 +1,2776 @@ +/* + * SCSI Device emulation + * + * Copyright (c) 2006 CodeSourcery. + * Based on code by Fabrice Bellard + * + * Written by Paul Brook + * Modifications: + * 2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case + * when the allocation length of CDB is smaller + * than 36. + * 2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the + * MODE SENSE response. + * + * This code is licensed under the LGPL. + * + * Note that this file only handles the SCSI architecture model and device + * commands. Emulation of interface/link layer protocols is handled by + * the host adapter emulator. + */ + +//#define DEBUG_SCSI + +#ifdef DEBUG_SCSI +#define DPRINTF(fmt, ...) \ +do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) do {} while(0) +#endif + +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "hw/scsi/scsi.h" +#include "block/scsi.h" +#include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/block/block.h" +#include "sysemu/dma.h" + +#ifdef __linux +#include <scsi/sg.h> +#endif + +#define SCSI_WRITE_SAME_MAX 524288 +#define SCSI_DMA_BUF_SIZE 131072 +#define SCSI_MAX_INQUIRY_LEN 256 +#define SCSI_MAX_MODE_LEN 256 + +#define DEFAULT_DISCARD_GRANULARITY 4096 +#define DEFAULT_MAX_UNMAP_SIZE (1 << 30) /* 1 GB */ +#define DEFAULT_MAX_IO_SIZE INT_MAX /* 2 GB - 1 block */ + +typedef struct SCSIDiskState SCSIDiskState; + +typedef struct SCSIDiskReq { + SCSIRequest req; + /* Both sector and sector_count are in terms of qemu 512 byte blocks. */ + uint64_t sector; + uint32_t sector_count; + uint32_t buflen; + bool started; + struct iovec iov; + QEMUIOVector qiov; + BlockAcctCookie acct; +} SCSIDiskReq; + +#define SCSI_DISK_F_REMOVABLE 0 +#define SCSI_DISK_F_DPOFUA 1 +#define SCSI_DISK_F_NO_REMOVABLE_DEVOPS 2 + +struct SCSIDiskState +{ + SCSIDevice qdev; + uint32_t features; + bool media_changed; + bool media_event; + bool eject_request; + uint64_t wwn; + uint64_t port_wwn; + uint16_t port_index; + uint64_t max_unmap_size; + uint64_t max_io_size; + QEMUBH *bh; + char *version; + char *serial; + char *vendor; + char *product; + bool tray_open; + bool tray_locked; +}; + +static int scsi_handle_rw_error(SCSIDiskReq *r, int error); + +static void scsi_free_request(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + + qemu_vfree(r->iov.iov_base); +} + +/* Helper function for command completion with sense. */ +static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense) +{ + DPRINTF("Command complete tag=0x%x sense=%d/%d/%d\n", + r->req.tag, sense.key, sense.asc, sense.ascq); + scsi_req_build_sense(&r->req, sense); + scsi_req_complete(&r->req, CHECK_CONDITION); +} + +static uint32_t scsi_init_iovec(SCSIDiskReq *r, size_t size) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + if (!r->iov.iov_base) { + r->buflen = size; + r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen); + } + r->iov.iov_len = MIN(r->sector_count * 512, r->buflen); + qemu_iovec_init_external(&r->qiov, &r->iov, 1); + return r->qiov.size / 512; +} + +static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + + qemu_put_be64s(f, &r->sector); + qemu_put_be32s(f, &r->sector_count); + qemu_put_be32s(f, &r->buflen); + if (r->buflen) { + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len); + } else if (!req->retry) { + uint32_t len = r->iov.iov_len; + qemu_put_be32s(f, &len); + qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len); + } + } +} + +static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + + qemu_get_be64s(f, &r->sector); + qemu_get_be32s(f, &r->sector_count); + qemu_get_be32s(f, &r->buflen); + if (r->buflen) { + scsi_init_iovec(r, r->buflen); + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len); + } else if (!r->req.retry) { + uint32_t len; + qemu_get_be32s(f, &len); + r->iov.iov_len = len; + assert(r->iov.iov_len <= r->buflen); + qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len); + } + } + + qemu_iovec_init_external(&r->qiov, &r->iov, 1); +} + +static void scsi_aio_complete(void *opaque, int ret) +{ + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + scsi_req_complete(&r->req, GOOD); + +done: + scsi_req_unref(&r->req); +} + +static bool scsi_is_cmd_fua(SCSICommand *cmd) +{ + switch (cmd->buf[0]) { + case READ_10: + case READ_12: + case READ_16: + case WRITE_10: + case WRITE_12: + case WRITE_16: + return (cmd->buf[1] & 8) != 0; + + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + case WRITE_VERIFY_10: + case WRITE_VERIFY_12: + case WRITE_VERIFY_16: + return true; + + case READ_6: + case WRITE_6: + default: + return false; + } +} + +static void scsi_write_do_fua(SCSIDiskReq *r) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (scsi_is_cmd_fua(&r->req.cmd)) { + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, + BLOCK_ACCT_FLUSH); + r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); + return; + } + + scsi_req_complete(&r->req, GOOD); + +done: + scsi_req_unref(&r->req); +} + +static void scsi_dma_complete_noio(void *opaque, int ret) +{ + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + if (r->req.aiocb != NULL) { + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + r->sector += r->sector_count; + r->sector_count = 0; + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + scsi_write_do_fua(r); + return; + } else { + scsi_req_complete(&r->req, GOOD); + } + +done: + scsi_req_unref(&r->req); +} + +static void scsi_dma_complete(void *opaque, int ret) +{ + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + + assert(r->req.aiocb != NULL); + scsi_dma_complete_noio(opaque, ret); +} + +static void scsi_read_complete(void * opaque, int ret) +{ + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + int n; + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size); + + n = r->qiov.size / 512; + r->sector += n; + r->sector_count -= n; + scsi_req_data(&r->req, r->qiov.size); + +done: + scsi_req_unref(&r->req); +} + +/* Actually issue a read to the block device. */ +static void scsi_do_read(void *opaque, int ret) +{ + SCSIDiskReq *r = opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + + if (r->req.aiocb != NULL) { + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + + if (r->req.sg) { + dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ); + r->req.resid -= r->req.sg->size; + r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, r->sector, + scsi_dma_complete, r); + } else { + n = scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, + n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + r->req.aiocb = blk_aio_readv(s->qdev.conf.blk, r->sector, &r->qiov, n, + scsi_read_complete, r); + } + +done: + scsi_req_unref(&r->req); +} + +/* Read more data from scsi device into buffer. */ +static void scsi_read_data(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + bool first; + + DPRINTF("Read sector_count=%d\n", r->sector_count); + if (r->sector_count == 0) { + /* This also clears the sense buffer for REQUEST SENSE. */ + scsi_req_complete(&r->req, GOOD); + return; + } + + /* No data transfer may already be in progress */ + assert(r->req.aiocb == NULL); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + DPRINTF("Data transfer direction invalid\n"); + scsi_read_complete(r, -EINVAL); + return; + } + + if (s->tray_open) { + scsi_read_complete(r, -ENOMEDIUM); + return; + } + + first = !r->started; + r->started = true; + if (first && scsi_is_cmd_fua(&r->req.cmd)) { + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, + BLOCK_ACCT_FLUSH); + r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read, r); + } else { + scsi_do_read(r, 0); + } +} + +/* + * scsi_handle_rw_error has two return values. 0 means that the error + * must be ignored, 1 means that the error has been processed and the + * caller should not do anything else for this request. Note that + * scsi_handle_rw_error always manages its reference counts, independent + * of the return value. + */ +static int scsi_handle_rw_error(SCSIDiskReq *r, int error) +{ + bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, + is_read, error); + + if (action == BLOCK_ERROR_ACTION_REPORT) { + switch (error) { + case ENOMEDIUM: + scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); + break; + case ENOMEM: + scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); + break; + case EINVAL: + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + break; + case ENOSPC: + scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); + break; + default: + scsi_check_condition(r, SENSE_CODE(IO_ERROR)); + break; + } + } + blk_error_action(s->qdev.conf.blk, action, is_read, error); + if (action == BLOCK_ERROR_ACTION_STOP) { + scsi_req_retry(&r->req); + } + return action != BLOCK_ERROR_ACTION_IGNORE; +} + +static void scsi_write_complete(void * opaque, int ret) +{ + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + + if (r->req.aiocb != NULL) { + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + n = r->qiov.size / 512; + r->sector += n; + r->sector_count -= n; + if (r->sector_count == 0) { + scsi_write_do_fua(r); + return; + } else { + scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); + DPRINTF("Write complete tag=0x%x more=%zd\n", r->req.tag, r->qiov.size); + scsi_req_data(&r->req, r->qiov.size); + } + +done: + scsi_req_unref(&r->req); +} + +static void scsi_write_data(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + + /* No data transfer may already be in progress */ + assert(r->req.aiocb == NULL); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + if (r->req.cmd.mode != SCSI_XFER_TO_DEV) { + DPRINTF("Data transfer direction invalid\n"); + scsi_write_complete(r, -EINVAL); + return; + } + + if (!r->req.sg && !r->qiov.size) { + /* Called for the first time. Ask the driver to send us more data. */ + r->started = true; + scsi_write_complete(r, 0); + return; + } + if (s->tray_open) { + scsi_write_complete(r, -ENOMEDIUM); + return; + } + + if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 || + r->req.cmd.buf[0] == VERIFY_16) { + if (r->req.sg) { + scsi_dma_complete_noio(r, 0); + } else { + scsi_write_complete(r, 0); + } + return; + } + + if (r->req.sg) { + dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE); + r->req.resid -= r->req.sg->size; + r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector, + scsi_dma_complete, r); + } else { + n = r->qiov.size / 512; + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, + n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE); + r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, r->sector, &r->qiov, n, + scsi_write_complete, r); + } +} + +/* Return a pointer to the data buffer. */ +static uint8_t *scsi_get_buf(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + + return (uint8_t *)r->iov.iov_base; +} + +static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + int buflen = 0; + int start; + + if (req->cmd.buf[1] & 0x1) { + /* Vital product data */ + uint8_t page_code = req->cmd.buf[2]; + + outbuf[buflen++] = s->qdev.type & 0x1f; + outbuf[buflen++] = page_code ; // this page + outbuf[buflen++] = 0x00; + outbuf[buflen++] = 0x00; + start = buflen; + + switch (page_code) { + case 0x00: /* Supported page codes, mandatory */ + { + DPRINTF("Inquiry EVPD[Supported pages] " + "buffer size %zd\n", req->cmd.xfer); + outbuf[buflen++] = 0x00; // list of supported pages (this page) + if (s->serial) { + outbuf[buflen++] = 0x80; // unit serial number + } + outbuf[buflen++] = 0x83; // device identification + if (s->qdev.type == TYPE_DISK) { + outbuf[buflen++] = 0xb0; // block limits + outbuf[buflen++] = 0xb2; // thin provisioning + } + break; + } + case 0x80: /* Device serial number, optional */ + { + int l; + + if (!s->serial) { + DPRINTF("Inquiry (EVPD[Serial number] not supported\n"); + return -1; + } + + l = strlen(s->serial); + if (l > 20) { + l = 20; + } + + DPRINTF("Inquiry EVPD[Serial number] " + "buffer size %zd\n", req->cmd.xfer); + memcpy(outbuf+buflen, s->serial, l); + buflen += l; + break; + } + + case 0x83: /* Device identification page, mandatory */ + { + const char *str = s->serial ?: blk_name(s->qdev.conf.blk); + int max_len = s->serial ? 20 : 255 - 8; + int id_len = strlen(str); + + if (id_len > max_len) { + id_len = max_len; + } + DPRINTF("Inquiry EVPD[Device identification] " + "buffer size %zd\n", req->cmd.xfer); + + outbuf[buflen++] = 0x2; // ASCII + outbuf[buflen++] = 0; // not officially assigned + outbuf[buflen++] = 0; // reserved + outbuf[buflen++] = id_len; // length of data following + memcpy(outbuf+buflen, str, id_len); + buflen += id_len; + + if (s->wwn) { + outbuf[buflen++] = 0x1; // Binary + outbuf[buflen++] = 0x3; // NAA + outbuf[buflen++] = 0; // reserved + outbuf[buflen++] = 8; + stq_be_p(&outbuf[buflen], s->wwn); + buflen += 8; + } + + if (s->port_wwn) { + outbuf[buflen++] = 0x61; // SAS / Binary + outbuf[buflen++] = 0x93; // PIV / Target port / NAA + outbuf[buflen++] = 0; // reserved + outbuf[buflen++] = 8; + stq_be_p(&outbuf[buflen], s->port_wwn); + buflen += 8; + } + + if (s->port_index) { + outbuf[buflen++] = 0x61; // SAS / Binary + outbuf[buflen++] = 0x94; // PIV / Target port / relative target port + outbuf[buflen++] = 0; // reserved + outbuf[buflen++] = 4; + stw_be_p(&outbuf[buflen + 2], s->port_index); + buflen += 4; + } + break; + } + case 0xb0: /* block limits */ + { + unsigned int unmap_sectors = + s->qdev.conf.discard_granularity / s->qdev.blocksize; + unsigned int min_io_size = + s->qdev.conf.min_io_size / s->qdev.blocksize; + unsigned int opt_io_size = + s->qdev.conf.opt_io_size / s->qdev.blocksize; + unsigned int max_unmap_sectors = + s->max_unmap_size / s->qdev.blocksize; + unsigned int max_io_sectors = + s->max_io_size / s->qdev.blocksize; + + if (s->qdev.type == TYPE_ROM) { + DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n", + page_code); + return -1; + } + /* required VPD size with unmap support */ + buflen = 0x40; + memset(outbuf + 4, 0, buflen - 4); + + outbuf[4] = 0x1; /* wsnz */ + + /* optimal transfer length granularity */ + outbuf[6] = (min_io_size >> 8) & 0xff; + outbuf[7] = min_io_size & 0xff; + + /* maximum transfer length */ + outbuf[8] = (max_io_sectors >> 24) & 0xff; + outbuf[9] = (max_io_sectors >> 16) & 0xff; + outbuf[10] = (max_io_sectors >> 8) & 0xff; + outbuf[11] = max_io_sectors & 0xff; + + /* optimal transfer length */ + outbuf[12] = (opt_io_size >> 24) & 0xff; + outbuf[13] = (opt_io_size >> 16) & 0xff; + outbuf[14] = (opt_io_size >> 8) & 0xff; + outbuf[15] = opt_io_size & 0xff; + + /* max unmap LBA count, default is 1GB */ + outbuf[20] = (max_unmap_sectors >> 24) & 0xff; + outbuf[21] = (max_unmap_sectors >> 16) & 0xff; + outbuf[22] = (max_unmap_sectors >> 8) & 0xff; + outbuf[23] = max_unmap_sectors & 0xff; + + /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header. */ + outbuf[24] = 0; + outbuf[25] = 0; + outbuf[26] = 0; + outbuf[27] = 255; + + /* optimal unmap granularity */ + outbuf[28] = (unmap_sectors >> 24) & 0xff; + outbuf[29] = (unmap_sectors >> 16) & 0xff; + outbuf[30] = (unmap_sectors >> 8) & 0xff; + outbuf[31] = unmap_sectors & 0xff; + + /* max write same size */ + outbuf[36] = 0; + outbuf[37] = 0; + outbuf[38] = 0; + outbuf[39] = 0; + + outbuf[40] = (max_io_sectors >> 24) & 0xff; + outbuf[41] = (max_io_sectors >> 16) & 0xff; + outbuf[42] = (max_io_sectors >> 8) & 0xff; + outbuf[43] = max_io_sectors & 0xff; + break; + } + case 0xb2: /* thin provisioning */ + { + buflen = 8; + outbuf[4] = 0; + outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */ + outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1; + outbuf[7] = 0; + break; + } + default: + return -1; + } + /* done with EVPD */ + assert(buflen - start <= 255); + outbuf[start - 1] = buflen - start; + return buflen; + } + + /* Standard INQUIRY data */ + if (req->cmd.buf[2] != 0) { + return -1; + } + + /* PAGE CODE == 0 */ + buflen = req->cmd.xfer; + if (buflen > SCSI_MAX_INQUIRY_LEN) { + buflen = SCSI_MAX_INQUIRY_LEN; + } + + outbuf[0] = s->qdev.type & 0x1f; + outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0; + + strpadcpy((char *) &outbuf[16], 16, s->product, ' '); + strpadcpy((char *) &outbuf[8], 8, s->vendor, ' '); + + memset(&outbuf[32], 0, 4); + memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version))); + /* + * We claim conformance to SPC-3, which is required for guests + * to ask for modern features like READ CAPACITY(16) or the + * block characteristics VPD page by default. Not all of SPC-3 + * is actually implemented, but we're good enough. + */ + outbuf[2] = 5; + outbuf[3] = 2 | 0x10; /* Format 2, HiSup */ + + if (buflen > 36) { + outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */ + } else { + /* If the allocation length of CDB is too small, + the additional length is not adjusted */ + outbuf[4] = 36 - 5; + } + + /* Sync data transfer and TCQ. */ + outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0); + return buflen; +} + +static inline bool media_is_dvd(SCSIDiskState *s) +{ + uint64_t nb_sectors; + if (s->qdev.type != TYPE_ROM) { + return false; + } + if (!blk_is_inserted(s->qdev.conf.blk)) { + return false; + } + if (s->tray_open) { + return false; + } + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + return nb_sectors > CD_MAX_SECTORS; +} + +static inline bool media_is_cd(SCSIDiskState *s) +{ + uint64_t nb_sectors; + if (s->qdev.type != TYPE_ROM) { + return false; + } + if (!blk_is_inserted(s->qdev.conf.blk)) { + return false; + } + if (s->tray_open) { + return false; + } + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + return nb_sectors <= CD_MAX_SECTORS; +} + +static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r, + uint8_t *outbuf) +{ + uint8_t type = r->req.cmd.buf[1] & 7; + + if (s->qdev.type != TYPE_ROM) { + return -1; + } + + /* Types 1/2 are only defined for Blu-Ray. */ + if (type != 0) { + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + return -1; + } + + memset(outbuf, 0, 34); + outbuf[1] = 32; + outbuf[2] = 0xe; /* last session complete, disc finalized */ + outbuf[3] = 1; /* first track on disc */ + outbuf[4] = 1; /* # of sessions */ + outbuf[5] = 1; /* first track of last session */ + outbuf[6] = 1; /* last track of last session */ + outbuf[7] = 0x20; /* unrestricted use */ + outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */ + /* 9-10-11: most significant byte corresponding bytes 4-5-6 */ + /* 12-23: not meaningful for CD-ROM or DVD-ROM */ + /* 24-31: disc bar code */ + /* 32: disc application code */ + /* 33: number of OPC tables */ + + return 34; +} + +static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r, + uint8_t *outbuf) +{ + static const int rds_caps_size[5] = { + [0] = 2048 + 4, + [1] = 4 + 4, + [3] = 188 + 4, + [4] = 2048 + 4, + }; + + uint8_t media = r->req.cmd.buf[1]; + uint8_t layer = r->req.cmd.buf[6]; + uint8_t format = r->req.cmd.buf[7]; + int size = -1; + + if (s->qdev.type != TYPE_ROM) { + return -1; + } + if (media != 0) { + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + return -1; + } + + if (format != 0xff) { + if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); + return -1; + } + if (media_is_cd(s)) { + scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT)); + return -1; + } + if (format >= ARRAY_SIZE(rds_caps_size)) { + return -1; + } + size = rds_caps_size[format]; + memset(outbuf, 0, size); + } + + switch (format) { + case 0x00: { + /* Physical format information */ + uint64_t nb_sectors; + if (layer != 0) { + goto fail; + } + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + + outbuf[4] = 1; /* DVD-ROM, part version 1 */ + outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */ + outbuf[6] = 1; /* one layer, read-only (per MMC-2 spec) */ + outbuf[7] = 0; /* default densities */ + + stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */ + stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */ + break; + } + + case 0x01: /* DVD copyright information, all zeros */ + break; + + case 0x03: /* BCA information - invalid field for no BCA info */ + return -1; + + case 0x04: /* DVD disc manufacturing information, all zeros */ + break; + + case 0xff: { /* List capabilities */ + int i; + size = 4; + for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) { + if (!rds_caps_size[i]) { + continue; + } + outbuf[size] = i; + outbuf[size + 1] = 0x40; /* Not writable, readable */ + stw_be_p(&outbuf[size + 2], rds_caps_size[i]); + size += 4; + } + break; + } + + default: + return -1; + } + + /* Size of buffer, not including 2 byte size field */ + stw_be_p(outbuf, size - 2); + return size; + +fail: + return -1; +} + +static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf) +{ + uint8_t event_code, media_status; + + media_status = 0; + if (s->tray_open) { + media_status = MS_TRAY_OPEN; + } else if (blk_is_inserted(s->qdev.conf.blk)) { + media_status = MS_MEDIA_PRESENT; + } + + /* Event notification descriptor */ + event_code = MEC_NO_CHANGE; + if (media_status != MS_TRAY_OPEN) { + if (s->media_event) { + event_code = MEC_NEW_MEDIA; + s->media_event = false; + } else if (s->eject_request) { + event_code = MEC_EJECT_REQUESTED; + s->eject_request = false; + } + } + + outbuf[0] = event_code; + outbuf[1] = media_status; + + /* These fields are reserved, just clear them. */ + outbuf[2] = 0; + outbuf[3] = 0; + return 4; +} + +static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r, + uint8_t *outbuf) +{ + int size; + uint8_t *buf = r->req.cmd.buf; + uint8_t notification_class_request = buf[4]; + if (s->qdev.type != TYPE_ROM) { + return -1; + } + if ((buf[1] & 1) == 0) { + /* asynchronous */ + return -1; + } + + size = 4; + outbuf[0] = outbuf[1] = 0; + outbuf[3] = 1 << GESN_MEDIA; /* supported events */ + if (notification_class_request & (1 << GESN_MEDIA)) { + outbuf[2] = GESN_MEDIA; + size += scsi_event_status_media(s, &outbuf[size]); + } else { + outbuf[2] = 0x80; + } + stw_be_p(outbuf, size - 4); + return size; +} + +static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf) +{ + int current; + + if (s->qdev.type != TYPE_ROM) { + return -1; + } + + if (media_is_dvd(s)) { + current = MMC_PROFILE_DVD_ROM; + } else if (media_is_cd(s)) { + current = MMC_PROFILE_CD_ROM; + } else { + current = MMC_PROFILE_NONE; + } + + memset(outbuf, 0, 40); + stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */ + stw_be_p(&outbuf[6], current); + /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */ + outbuf[10] = 0x03; /* persistent, current */ + outbuf[11] = 8; /* two profiles */ + stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM); + outbuf[14] = (current == MMC_PROFILE_DVD_ROM); + stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM); + outbuf[18] = (current == MMC_PROFILE_CD_ROM); + /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */ + stw_be_p(&outbuf[20], 1); + outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */ + outbuf[23] = 8; + stl_be_p(&outbuf[24], 1); /* SCSI */ + outbuf[28] = 1; /* DBE = 1, mandatory */ + /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */ + stw_be_p(&outbuf[32], 3); + outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */ + outbuf[35] = 4; + outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */ + /* TODO: Random readable, CD read, DVD read, drive serial number, + power management */ + return 40; +} + +static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf) +{ + if (s->qdev.type != TYPE_ROM) { + return -1; + } + memset(outbuf, 0, 8); + outbuf[5] = 1; /* CD-ROM */ + return 8; +} + +static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf, + int page_control) +{ + static const int mode_sense_valid[0x3f] = { + [MODE_PAGE_HD_GEOMETRY] = (1 << TYPE_DISK), + [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK), + [MODE_PAGE_CACHING] = (1 << TYPE_DISK) | (1 << TYPE_ROM), + [MODE_PAGE_R_W_ERROR] = (1 << TYPE_DISK) | (1 << TYPE_ROM), + [MODE_PAGE_AUDIO_CTL] = (1 << TYPE_ROM), + [MODE_PAGE_CAPABILITIES] = (1 << TYPE_ROM), + }; + + uint8_t *p = *p_outbuf + 2; + int length; + + if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) { + return -1; + } + + /* + * If Changeable Values are requested, a mask denoting those mode parameters + * that are changeable shall be returned. As we currently don't support + * parameter changes via MODE_SELECT all bits are returned set to zero. + * The buffer was already menset to zero by the caller of this function. + * + * The offsets here are off by two compared to the descriptions in the + * SCSI specs, because those include a 2-byte header. This is unfortunate, + * but it is done so that offsets are consistent within our implementation + * of MODE SENSE and MODE SELECT. MODE SELECT has to deal with both + * 2-byte and 4-byte headers. + */ + switch (page) { + case MODE_PAGE_HD_GEOMETRY: + length = 0x16; + if (page_control == 1) { /* Changeable Values */ + break; + } + /* if a geometry hint is available, use it */ + p[0] = (s->qdev.conf.cyls >> 16) & 0xff; + p[1] = (s->qdev.conf.cyls >> 8) & 0xff; + p[2] = s->qdev.conf.cyls & 0xff; + p[3] = s->qdev.conf.heads & 0xff; + /* Write precomp start cylinder, disabled */ + p[4] = (s->qdev.conf.cyls >> 16) & 0xff; + p[5] = (s->qdev.conf.cyls >> 8) & 0xff; + p[6] = s->qdev.conf.cyls & 0xff; + /* Reduced current start cylinder, disabled */ + p[7] = (s->qdev.conf.cyls >> 16) & 0xff; + p[8] = (s->qdev.conf.cyls >> 8) & 0xff; + p[9] = s->qdev.conf.cyls & 0xff; + /* Device step rate [ns], 200ns */ + p[10] = 0; + p[11] = 200; + /* Landing zone cylinder */ + p[12] = 0xff; + p[13] = 0xff; + p[14] = 0xff; + /* Medium rotation rate [rpm], 5400 rpm */ + p[18] = (5400 >> 8) & 0xff; + p[19] = 5400 & 0xff; + break; + + case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY: + length = 0x1e; + if (page_control == 1) { /* Changeable Values */ + break; + } + /* Transfer rate [kbit/s], 5Mbit/s */ + p[0] = 5000 >> 8; + p[1] = 5000 & 0xff; + /* if a geometry hint is available, use it */ + p[2] = s->qdev.conf.heads & 0xff; + p[3] = s->qdev.conf.secs & 0xff; + p[4] = s->qdev.blocksize >> 8; + p[6] = (s->qdev.conf.cyls >> 8) & 0xff; + p[7] = s->qdev.conf.cyls & 0xff; + /* Write precomp start cylinder, disabled */ + p[8] = (s->qdev.conf.cyls >> 8) & 0xff; + p[9] = s->qdev.conf.cyls & 0xff; + /* Reduced current start cylinder, disabled */ + p[10] = (s->qdev.conf.cyls >> 8) & 0xff; + p[11] = s->qdev.conf.cyls & 0xff; + /* Device step rate [100us], 100us */ + p[12] = 0; + p[13] = 1; + /* Device step pulse width [us], 1us */ + p[14] = 1; + /* Device head settle delay [100us], 100us */ + p[15] = 0; + p[16] = 1; + /* Motor on delay [0.1s], 0.1s */ + p[17] = 1; + /* Motor off delay [0.1s], 0.1s */ + p[18] = 1; + /* Medium rotation rate [rpm], 5400 rpm */ + p[26] = (5400 >> 8) & 0xff; + p[27] = 5400 & 0xff; + break; + + case MODE_PAGE_CACHING: + length = 0x12; + if (page_control == 1 || /* Changeable Values */ + blk_enable_write_cache(s->qdev.conf.blk)) { + p[0] = 4; /* WCE */ + } + break; + + case MODE_PAGE_R_W_ERROR: + length = 10; + if (page_control == 1) { /* Changeable Values */ + break; + } + p[0] = 0x80; /* Automatic Write Reallocation Enabled */ + if (s->qdev.type == TYPE_ROM) { + p[1] = 0x20; /* Read Retry Count */ + } + break; + + case MODE_PAGE_AUDIO_CTL: + length = 14; + break; + + case MODE_PAGE_CAPABILITIES: + length = 0x14; + if (page_control == 1) { /* Changeable Values */ + break; + } + + p[0] = 0x3b; /* CD-R & CD-RW read */ + p[1] = 0; /* Writing not supported */ + p[2] = 0x7f; /* Audio, composite, digital out, + mode 2 form 1&2, multi session */ + p[3] = 0xff; /* CD DA, DA accurate, RW supported, + RW corrected, C2 errors, ISRC, + UPC, Bar code */ + p[4] = 0x2d | (s->tray_locked ? 2 : 0); + /* Locking supported, jumper present, eject, tray */ + p[5] = 0; /* no volume & mute control, no + changer */ + p[6] = (50 * 176) >> 8; /* 50x read speed */ + p[7] = (50 * 176) & 0xff; + p[8] = 2 >> 8; /* Two volume levels */ + p[9] = 2 & 0xff; + p[10] = 2048 >> 8; /* 2M buffer */ + p[11] = 2048 & 0xff; + p[12] = (16 * 176) >> 8; /* 16x read speed current */ + p[13] = (16 * 176) & 0xff; + p[16] = (16 * 176) >> 8; /* 16x write speed */ + p[17] = (16 * 176) & 0xff; + p[18] = (16 * 176) >> 8; /* 16x write speed current */ + p[19] = (16 * 176) & 0xff; + break; + + default: + return -1; + } + + assert(length < 256); + (*p_outbuf)[0] = page; + (*p_outbuf)[1] = length; + *p_outbuf += length + 2; + return length + 2; +} + +static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint64_t nb_sectors; + bool dbd; + int page, buflen, ret, page_control; + uint8_t *p; + uint8_t dev_specific_param; + + dbd = (r->req.cmd.buf[1] & 0x8) != 0; + page = r->req.cmd.buf[2] & 0x3f; + page_control = (r->req.cmd.buf[2] & 0xc0) >> 6; + DPRINTF("Mode Sense(%d) (page %d, xfer %zd, page_control %d)\n", + (r->req.cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, r->req.cmd.xfer, page_control); + memset(outbuf, 0, r->req.cmd.xfer); + p = outbuf; + + if (s->qdev.type == TYPE_DISK) { + dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0; + if (blk_is_read_only(s->qdev.conf.blk)) { + dev_specific_param |= 0x80; /* Readonly. */ + } + } else { + /* MMC prescribes that CD/DVD drives have no block descriptors, + * and defines no device-specific parameter. */ + dev_specific_param = 0x00; + dbd = true; + } + + if (r->req.cmd.buf[0] == MODE_SENSE) { + p[1] = 0; /* Default media type. */ + p[2] = dev_specific_param; + p[3] = 0; /* Block descriptor length. */ + p += 4; + } else { /* MODE_SENSE_10 */ + p[2] = 0; /* Default media type. */ + p[3] = dev_specific_param; + p[6] = p[7] = 0; /* Block descriptor length. */ + p += 8; + } + + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + if (!dbd && nb_sectors) { + if (r->req.cmd.buf[0] == MODE_SENSE) { + outbuf[3] = 8; /* Block descriptor length */ + } else { /* MODE_SENSE_10 */ + outbuf[7] = 8; /* Block descriptor length */ + } + nb_sectors /= (s->qdev.blocksize / 512); + if (nb_sectors > 0xffffff) { + nb_sectors = 0; + } + p[0] = 0; /* media density code */ + p[1] = (nb_sectors >> 16) & 0xff; + p[2] = (nb_sectors >> 8) & 0xff; + p[3] = nb_sectors & 0xff; + p[4] = 0; /* reserved */ + p[5] = 0; /* bytes 5-7 are the sector size in bytes */ + p[6] = s->qdev.blocksize >> 8; + p[7] = 0; + p += 8; + } + + if (page_control == 3) { + /* Saved Values */ + scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED)); + return -1; + } + + if (page == 0x3f) { + for (page = 0; page <= 0x3e; page++) { + mode_sense_page(s, page, &p, page_control); + } + } else { + ret = mode_sense_page(s, page, &p, page_control); + if (ret == -1) { + return -1; + } + } + + buflen = p - outbuf; + /* + * The mode data length field specifies the length in bytes of the + * following data that is available to be transferred. The mode data + * length does not include itself. + */ + if (r->req.cmd.buf[0] == MODE_SENSE) { + outbuf[0] = buflen - 1; + } else { /* MODE_SENSE_10 */ + outbuf[0] = ((buflen - 2) >> 8) & 0xff; + outbuf[1] = (buflen - 2) & 0xff; + } + return buflen; +} + +static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + int start_track, format, msf, toclen; + uint64_t nb_sectors; + + msf = req->cmd.buf[1] & 2; + format = req->cmd.buf[2] & 0xf; + start_track = req->cmd.buf[6]; + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + DPRINTF("Read TOC (track %d format %d msf %d)\n", start_track, format, msf >> 1); + nb_sectors /= s->qdev.blocksize / 512; + switch (format) { + case 0: + toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track); + break; + case 1: + /* multi session : only a single session defined */ + toclen = 12; + memset(outbuf, 0, 12); + outbuf[1] = 0x0a; + outbuf[2] = 0x01; + outbuf[3] = 0x01; + break; + case 2: + toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track); + break; + default: + return -1; + } + return toclen; +} + +static int scsi_disk_emulate_start_stop(SCSIDiskReq *r) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + bool start = req->cmd.buf[4] & 1; + bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */ + int pwrcnd = req->cmd.buf[4] & 0xf0; + + if (pwrcnd) { + /* eject/load only happens for power condition == 0 */ + return 0; + } + + if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) { + if (!start && !s->tray_open && s->tray_locked) { + scsi_check_condition(r, + blk_is_inserted(s->qdev.conf.blk) + ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED) + : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED)); + return -1; + } + + if (s->tray_open != !start) { + blk_eject(s->qdev.conf.blk, !start); + s->tray_open = !start; + } + } + return 0; +} + +static void scsi_disk_emulate_read_data(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + int buflen = r->iov.iov_len; + + if (buflen) { + DPRINTF("Read buf_len=%d\n", buflen); + r->iov.iov_len = 0; + r->started = true; + scsi_req_data(&r->req, buflen); + return; + } + + /* This also clears the sense buffer for REQUEST SENSE. */ + scsi_req_complete(&r->req, GOOD); +} + +static int scsi_disk_check_mode_select(SCSIDiskState *s, int page, + uint8_t *inbuf, int inlen) +{ + uint8_t mode_current[SCSI_MAX_MODE_LEN]; + uint8_t mode_changeable[SCSI_MAX_MODE_LEN]; + uint8_t *p; + int len, expected_len, changeable_len, i; + + /* The input buffer does not include the page header, so it is + * off by 2 bytes. + */ + expected_len = inlen + 2; + if (expected_len > SCSI_MAX_MODE_LEN) { + return -1; + } + + p = mode_current; + memset(mode_current, 0, inlen + 2); + len = mode_sense_page(s, page, &p, 0); + if (len < 0 || len != expected_len) { + return -1; + } + + p = mode_changeable; + memset(mode_changeable, 0, inlen + 2); + changeable_len = mode_sense_page(s, page, &p, 1); + assert(changeable_len == len); + + /* Check that unchangeable bits are the same as what MODE SENSE + * would return. + */ + for (i = 2; i < len; i++) { + if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) { + return -1; + } + } + return 0; +} + +static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p) +{ + switch (page) { + case MODE_PAGE_CACHING: + blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0); + break; + + default: + break; + } +} + +static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + while (len > 0) { + int page, subpage, page_len; + + /* Parse both possible formats for the mode page headers. */ + page = p[0] & 0x3f; + if (p[0] & 0x40) { + if (len < 4) { + goto invalid_param_len; + } + subpage = p[1]; + page_len = lduw_be_p(&p[2]); + p += 4; + len -= 4; + } else { + if (len < 2) { + goto invalid_param_len; + } + subpage = 0; + page_len = p[1]; + p += 2; + len -= 2; + } + + if (subpage) { + goto invalid_param; + } + if (page_len > len) { + goto invalid_param_len; + } + + if (!change) { + if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) { + goto invalid_param; + } + } else { + scsi_disk_apply_mode_select(s, page, p); + } + + p += page_len; + len -= page_len; + } + return 0; + +invalid_param: + scsi_check_condition(r, SENSE_CODE(INVALID_PARAM)); + return -1; + +invalid_param_len: + scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN)); + return -1; +} + +static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint8_t *p = inbuf; + int cmd = r->req.cmd.buf[0]; + int len = r->req.cmd.xfer; + int hdr_len = (cmd == MODE_SELECT ? 4 : 8); + int bd_len; + int pass; + + /* We only support PF=1, SP=0. */ + if ((r->req.cmd.buf[1] & 0x11) != 0x10) { + goto invalid_field; + } + + if (len < hdr_len) { + goto invalid_param_len; + } + + bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6])); + len -= hdr_len; + p += hdr_len; + if (len < bd_len) { + goto invalid_param_len; + } + if (bd_len != 0 && bd_len != 8) { + goto invalid_param; + } + + len -= bd_len; + p += bd_len; + + /* Ensure no change is made if there is an error! */ + for (pass = 0; pass < 2; pass++) { + if (mode_select_pages(r, p, len, pass == 1) < 0) { + assert(pass == 0); + return; + } + } + if (!blk_enable_write_cache(s->qdev.conf.blk)) { + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, + BLOCK_ACCT_FLUSH); + r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); + return; + } + + scsi_req_complete(&r->req, GOOD); + return; + +invalid_param: + scsi_check_condition(r, SENSE_CODE(INVALID_PARAM)); + return; + +invalid_param_len: + scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN)); + return; + +invalid_field: + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); +} + +static inline bool check_lba_range(SCSIDiskState *s, + uint64_t sector_num, uint32_t nb_sectors) +{ + /* + * The first line tests that no overflow happens when computing the last + * sector. The second line tests that the last accessed sector is in + * range. + * + * Careful, the computations should not underflow for nb_sectors == 0, + * and a 0-block read to the first LBA beyond the end of device is + * valid. + */ + return (sector_num <= sector_num + nb_sectors && + sector_num + nb_sectors <= s->qdev.max_lba + 1); +} + +typedef struct UnmapCBData { + SCSIDiskReq *r; + uint8_t *inbuf; + int count; +} UnmapCBData; + +static void scsi_unmap_complete(void *opaque, int ret) +{ + UnmapCBData *data = opaque; + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint64_t sector_num; + uint32_t nb_sectors; + + r->req.aiocb = NULL; + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + if (data->count > 0) { + sector_num = ldq_be_p(&data->inbuf[0]); + nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL; + if (!check_lba_range(s, sector_num, nb_sectors)) { + scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); + goto done; + } + + r->req.aiocb = blk_aio_discard(s->qdev.conf.blk, + sector_num * (s->qdev.blocksize / 512), + nb_sectors * (s->qdev.blocksize / 512), + scsi_unmap_complete, data); + data->count--; + data->inbuf += 16; + return; + } + + scsi_req_complete(&r->req, GOOD); + +done: + scsi_req_unref(&r->req); + g_free(data); +} + +static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint8_t *p = inbuf; + int len = r->req.cmd.xfer; + UnmapCBData *data; + + /* Reject ANCHOR=1. */ + if (r->req.cmd.buf[1] & 0x1) { + goto invalid_field; + } + + if (len < 8) { + goto invalid_param_len; + } + if (len < lduw_be_p(&p[0]) + 2) { + goto invalid_param_len; + } + if (len < lduw_be_p(&p[2]) + 8) { + goto invalid_param_len; + } + if (lduw_be_p(&p[2]) & 15) { + goto invalid_param_len; + } + + if (blk_is_read_only(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED)); + return; + } + + data = g_new0(UnmapCBData, 1); + data->r = r; + data->inbuf = &p[8]; + data->count = lduw_be_p(&p[2]) >> 4; + + /* The matching unref is in scsi_unmap_complete, before data is freed. */ + scsi_req_ref(&r->req); + scsi_unmap_complete(data, 0); + return; + +invalid_param_len: + scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN)); + return; + +invalid_field: + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); +} + +typedef struct WriteSameCBData { + SCSIDiskReq *r; + int64_t sector; + int nb_sectors; + QEMUIOVector qiov; + struct iovec iov; +} WriteSameCBData; + +static void scsi_write_same_complete(void *opaque, int ret) +{ + WriteSameCBData *data = opaque; + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + data->nb_sectors -= data->iov.iov_len / 512; + data->sector += data->iov.iov_len / 512; + data->iov.iov_len = MIN(data->nb_sectors * 512, data->iov.iov_len); + if (data->iov.iov_len) { + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, + data->iov.iov_len, BLOCK_ACCT_WRITE); + r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector, + &data->qiov, data->iov.iov_len / 512, + scsi_write_same_complete, data); + return; + } + + scsi_req_complete(&r->req, GOOD); + +done: + scsi_req_unref(&r->req); + qemu_vfree(data->iov.iov_base); + g_free(data); +} + +static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf); + WriteSameCBData *data; + uint8_t *buf; + int i; + + /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1. */ + if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) { + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + return; + } + + if (blk_is_read_only(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED)); + return; + } + if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) { + scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); + return; + } + + if (buffer_is_zero(inbuf, s->qdev.blocksize)) { + int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0; + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, + nb_sectors * s->qdev.blocksize, + BLOCK_ACCT_WRITE); + r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk, + r->req.cmd.lba * (s->qdev.blocksize / 512), + nb_sectors * (s->qdev.blocksize / 512), + flags, scsi_aio_complete, r); + return; + } + + data = g_new0(WriteSameCBData, 1); + data->r = r; + data->sector = r->req.cmd.lba * (s->qdev.blocksize / 512); + data->nb_sectors = nb_sectors * (s->qdev.blocksize / 512); + data->iov.iov_len = MIN(data->nb_sectors * 512, SCSI_WRITE_SAME_MAX); + data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk, + data->iov.iov_len); + qemu_iovec_init_external(&data->qiov, &data->iov, 1); + + for (i = 0; i < data->iov.iov_len; i += s->qdev.blocksize) { + memcpy(&buf[i], inbuf, s->qdev.blocksize); + } + + scsi_req_ref(&r->req); + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, + data->iov.iov_len, BLOCK_ACCT_WRITE); + r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector, + &data->qiov, data->iov.iov_len / 512, + scsi_write_same_complete, data); +} + +static void scsi_disk_emulate_write_data(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + + if (r->iov.iov_len) { + int buflen = r->iov.iov_len; + DPRINTF("Write buf_len=%d\n", buflen); + r->iov.iov_len = 0; + scsi_req_data(&r->req, buflen); + return; + } + + switch (req->cmd.buf[0]) { + case MODE_SELECT: + case MODE_SELECT_10: + /* This also clears the sense buffer for REQUEST SENSE. */ + scsi_disk_emulate_mode_select(r, r->iov.iov_base); + break; + + case UNMAP: + scsi_disk_emulate_unmap(r, r->iov.iov_base); + break; + + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + if (r->req.status == -1) { + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + } + break; + + case WRITE_SAME_10: + case WRITE_SAME_16: + scsi_disk_emulate_write_same(r, r->iov.iov_base); + break; + + default: + abort(); + } +} + +static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + uint64_t nb_sectors; + uint8_t *outbuf; + int buflen; + + switch (req->cmd.buf[0]) { + case INQUIRY: + case MODE_SENSE: + case MODE_SENSE_10: + case RESERVE: + case RESERVE_10: + case RELEASE: + case RELEASE_10: + case START_STOP: + case ALLOW_MEDIUM_REMOVAL: + case GET_CONFIGURATION: + case GET_EVENT_STATUS_NOTIFICATION: + case MECHANISM_STATUS: + case REQUEST_SENSE: + break; + + default: + if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); + return 0; + } + break; + } + + /* + * FIXME: we shouldn't return anything bigger than 4k, but the code + * requires the buffer to be as big as req->cmd.xfer in several + * places. So, do not allow CDBs with a very large ALLOCATION + * LENGTH. The real fix would be to modify scsi_read_data and + * dma_buf_read, so that they return data beyond the buflen + * as all zeros. + */ + if (req->cmd.xfer > 65536) { + goto illegal_request; + } + r->buflen = MAX(4096, req->cmd.xfer); + + if (!r->iov.iov_base) { + r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen); + } + + buflen = req->cmd.xfer; + outbuf = r->iov.iov_base; + memset(outbuf, 0, r->buflen); + switch (req->cmd.buf[0]) { + case TEST_UNIT_READY: + assert(!s->tray_open && blk_is_inserted(s->qdev.conf.blk)); + break; + case INQUIRY: + buflen = scsi_disk_emulate_inquiry(req, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case MODE_SENSE: + case MODE_SENSE_10: + buflen = scsi_disk_emulate_mode_sense(r, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case READ_TOC: + buflen = scsi_disk_emulate_read_toc(req, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case RESERVE: + if (req->cmd.buf[1] & 1) { + goto illegal_request; + } + break; + case RESERVE_10: + if (req->cmd.buf[1] & 3) { + goto illegal_request; + } + break; + case RELEASE: + if (req->cmd.buf[1] & 1) { + goto illegal_request; + } + break; + case RELEASE_10: + if (req->cmd.buf[1] & 3) { + goto illegal_request; + } + break; + case START_STOP: + if (scsi_disk_emulate_start_stop(r) < 0) { + return 0; + } + break; + case ALLOW_MEDIUM_REMOVAL: + s->tray_locked = req->cmd.buf[4] & 1; + blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1); + break; + case READ_CAPACITY_10: + /* The normal LEN field for this command is zero. */ + memset(outbuf, 0, 8); + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + if (!nb_sectors) { + scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY)); + return 0; + } + if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) { + goto illegal_request; + } + nb_sectors /= s->qdev.blocksize / 512; + /* Returned value is the address of the last sector. */ + nb_sectors--; + /* Remember the new size for read/write sanity checking. */ + s->qdev.max_lba = nb_sectors; + /* Clip to 2TB, instead of returning capacity modulo 2TB. */ + if (nb_sectors > UINT32_MAX) { + nb_sectors = UINT32_MAX; + } + outbuf[0] = (nb_sectors >> 24) & 0xff; + outbuf[1] = (nb_sectors >> 16) & 0xff; + outbuf[2] = (nb_sectors >> 8) & 0xff; + outbuf[3] = nb_sectors & 0xff; + outbuf[4] = 0; + outbuf[5] = 0; + outbuf[6] = s->qdev.blocksize >> 8; + outbuf[7] = 0; + break; + case REQUEST_SENSE: + /* Just return "NO SENSE". */ + buflen = scsi_build_sense(NULL, 0, outbuf, r->buflen, + (req->cmd.buf[1] & 1) == 0); + if (buflen < 0) { + goto illegal_request; + } + break; + case MECHANISM_STATUS: + buflen = scsi_emulate_mechanism_status(s, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case GET_CONFIGURATION: + buflen = scsi_get_configuration(s, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case GET_EVENT_STATUS_NOTIFICATION: + buflen = scsi_get_event_status_notification(s, r, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case READ_DISC_INFORMATION: + buflen = scsi_read_disc_information(s, r, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case READ_DVD_STRUCTURE: + buflen = scsi_read_dvd_structure(s, r, outbuf); + if (buflen < 0) { + goto illegal_request; + } + break; + case SERVICE_ACTION_IN_16: + /* Service Action In subcommands. */ + if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) { + DPRINTF("SAI READ CAPACITY(16)\n"); + memset(outbuf, 0, req->cmd.xfer); + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + if (!nb_sectors) { + scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY)); + return 0; + } + if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) { + goto illegal_request; + } + nb_sectors /= s->qdev.blocksize / 512; + /* Returned value is the address of the last sector. */ + nb_sectors--; + /* Remember the new size for read/write sanity checking. */ + s->qdev.max_lba = nb_sectors; + outbuf[0] = (nb_sectors >> 56) & 0xff; + outbuf[1] = (nb_sectors >> 48) & 0xff; + outbuf[2] = (nb_sectors >> 40) & 0xff; + outbuf[3] = (nb_sectors >> 32) & 0xff; + outbuf[4] = (nb_sectors >> 24) & 0xff; + outbuf[5] = (nb_sectors >> 16) & 0xff; + outbuf[6] = (nb_sectors >> 8) & 0xff; + outbuf[7] = nb_sectors & 0xff; + outbuf[8] = 0; + outbuf[9] = 0; + outbuf[10] = s->qdev.blocksize >> 8; + outbuf[11] = 0; + outbuf[12] = 0; + outbuf[13] = get_physical_block_exp(&s->qdev.conf); + + /* set TPE bit if the format supports discard */ + if (s->qdev.conf.discard_granularity) { + outbuf[14] = 0x80; + } + + /* Protection, exponent and lowest lba field left blank. */ + break; + } + DPRINTF("Unsupported Service Action In\n"); + goto illegal_request; + case SYNCHRONIZE_CACHE: + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, + BLOCK_ACCT_FLUSH); + r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); + return 0; + case SEEK_10: + DPRINTF("Seek(10) (sector %" PRId64 ")\n", r->req.cmd.lba); + if (r->req.cmd.lba > s->qdev.max_lba) { + goto illegal_lba; + } + break; + case MODE_SELECT: + DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer); + break; + case MODE_SELECT_10: + DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer); + break; + case UNMAP: + DPRINTF("Unmap (len %lu)\n", (long)r->req.cmd.xfer); + break; + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + DPRINTF("Verify (bytchk %d)\n", (req->cmd.buf[1] >> 1) & 3); + if (req->cmd.buf[1] & 6) { + goto illegal_request; + } + break; + case WRITE_SAME_10: + case WRITE_SAME_16: + DPRINTF("WRITE SAME %d (len %lu)\n", + req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, + (long)r->req.cmd.xfer); + break; + default: + DPRINTF("Unknown SCSI command (%2.2x=%s)\n", buf[0], + scsi_command_name(buf[0])); + scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE)); + return 0; + } + assert(!r->req.aiocb); + r->iov.iov_len = MIN(r->buflen, req->cmd.xfer); + if (r->iov.iov_len == 0) { + scsi_req_complete(&r->req, GOOD); + } + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + assert(r->iov.iov_len == req->cmd.xfer); + return -r->iov.iov_len; + } else { + return r->iov.iov_len; + } + +illegal_request: + if (r->req.status == -1) { + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + } + return 0; + +illegal_lba: + scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); + return 0; +} + +/* Execute a scsi command. Returns the length of the data expected by the + command. This will be Positive for data transfers from the device + (eg. disk reads), negative for transfers to the device (eg. disk writes), + and zero if the command does not transfer any data. */ + +static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + uint32_t len; + uint8_t command; + + command = buf[0]; + + if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); + return 0; + } + + len = scsi_data_cdb_xfer(r->req.cmd.buf); + switch (command) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + DPRINTF("Read (sector %" PRId64 ", count %u)\n", r->req.cmd.lba, len); + if (r->req.cmd.buf[1] & 0xe0) { + goto illegal_request; + } + if (!check_lba_range(s, r->req.cmd.lba, len)) { + goto illegal_lba; + } + r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512); + r->sector_count = len * (s->qdev.blocksize / 512); + break; + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + case WRITE_VERIFY_10: + case WRITE_VERIFY_12: + case WRITE_VERIFY_16: + if (blk_is_read_only(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED)); + return 0; + } + DPRINTF("Write %s(sector %" PRId64 ", count %u)\n", + (command & 0xe) == 0xe ? "And Verify " : "", + r->req.cmd.lba, len); + if (r->req.cmd.buf[1] & 0xe0) { + goto illegal_request; + } + if (!check_lba_range(s, r->req.cmd.lba, len)) { + goto illegal_lba; + } + r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512); + r->sector_count = len * (s->qdev.blocksize / 512); + break; + default: + abort(); + illegal_request: + scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); + return 0; + illegal_lba: + scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); + return 0; + } + if (r->sector_count == 0) { + scsi_req_complete(&r->req, GOOD); + } + assert(r->iov.iov_len == 0); + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + return -r->sector_count * 512; + } else { + return r->sector_count * 512; + } +} + +static void scsi_disk_reset(DeviceState *dev) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev); + uint64_t nb_sectors; + + scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET)); + + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); + nb_sectors /= s->qdev.blocksize / 512; + if (nb_sectors) { + nb_sectors--; + } + s->qdev.max_lba = nb_sectors; + /* reset tray statuses */ + s->tray_locked = 0; + s->tray_open = 0; +} + +static void scsi_disk_resize_cb(void *opaque) +{ + SCSIDiskState *s = opaque; + + /* SPC lists this sense code as available only for + * direct-access devices. + */ + if (s->qdev.type == TYPE_DISK) { + scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED)); + } +} + +static void scsi_cd_change_media_cb(void *opaque, bool load) +{ + SCSIDiskState *s = opaque; + + /* + * When a CD gets changed, we have to report an ejected state and + * then a loaded state to guests so that they detect tray + * open/close and media change events. Guests that do not use + * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close + * states rely on this behavior. + * + * media_changed governs the state machine used for unit attention + * report. media_event is used by GET EVENT STATUS NOTIFICATION. + */ + s->media_changed = load; + s->tray_open = !load; + scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM)); + s->media_event = true; + s->eject_request = false; +} + +static void scsi_cd_eject_request_cb(void *opaque, bool force) +{ + SCSIDiskState *s = opaque; + + s->eject_request = true; + if (force) { + s->tray_locked = false; + } +} + +static bool scsi_cd_is_tray_open(void *opaque) +{ + return ((SCSIDiskState *)opaque)->tray_open; +} + +static bool scsi_cd_is_medium_locked(void *opaque) +{ + return ((SCSIDiskState *)opaque)->tray_locked; +} + +static const BlockDevOps scsi_disk_removable_block_ops = { + .change_media_cb = scsi_cd_change_media_cb, + .eject_request_cb = scsi_cd_eject_request_cb, + .is_tray_open = scsi_cd_is_tray_open, + .is_medium_locked = scsi_cd_is_medium_locked, + + .resize_cb = scsi_disk_resize_cb, +}; + +static const BlockDevOps scsi_disk_block_ops = { + .resize_cb = scsi_disk_resize_cb, +}; + +static void scsi_disk_unit_attention_reported(SCSIDevice *dev) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); + if (s->media_changed) { + s->media_changed = false; + scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED)); + } +} + +static void scsi_realize(SCSIDevice *dev, Error **errp) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); + Error *err = NULL; + + if (!s->qdev.conf.blk) { + error_setg(errp, "drive property not set"); + return; + } + + if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) && + !blk_is_inserted(s->qdev.conf.blk)) { + error_setg(errp, "Device needs media, but drive is empty"); + return; + } + + blkconf_serial(&s->qdev.conf, &s->serial); + blkconf_blocksizes(&s->qdev.conf); + if (dev->type == TYPE_DISK) { + blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err); + if (err) { + error_propagate(errp, err); + return; + } + } + + if (s->qdev.conf.discard_granularity == -1) { + s->qdev.conf.discard_granularity = + MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY); + } + + if (!s->version) { + s->version = g_strdup(qemu_get_version()); + } + if (!s->vendor) { + s->vendor = g_strdup("QEMU"); + } + + if (blk_is_sg(s->qdev.conf.blk)) { + error_setg(errp, "unwanted /dev/sg*"); + return; + } + + if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && + !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) { + blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s); + } else { + blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s); + } + blk_set_guest_block_size(s->qdev.conf.blk, s->qdev.blocksize); + + blk_iostatus_enable(s->qdev.conf.blk); +} + +static void scsi_hd_realize(SCSIDevice *dev, Error **errp) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); + /* can happen for devices without drive. The error message for missing + * backend will be issued in scsi_realize + */ + if (s->qdev.conf.blk) { + blkconf_blocksizes(&s->qdev.conf); + } + s->qdev.blocksize = s->qdev.conf.logical_block_size; + s->qdev.type = TYPE_DISK; + if (!s->product) { + s->product = g_strdup("QEMU HARDDISK"); + } + scsi_realize(&s->qdev, errp); +} + +static void scsi_cd_realize(SCSIDevice *dev, Error **errp) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); + s->qdev.blocksize = 2048; + s->qdev.type = TYPE_ROM; + s->features |= 1 << SCSI_DISK_F_REMOVABLE; + if (!s->product) { + s->product = g_strdup("QEMU CD-ROM"); + } + scsi_realize(&s->qdev, errp); +} + +static void scsi_disk_realize(SCSIDevice *dev, Error **errp) +{ + DriveInfo *dinfo; + Error *local_err = NULL; + + if (!dev->conf.blk) { + scsi_realize(dev, &local_err); + assert(local_err); + error_propagate(errp, local_err); + return; + } + + dinfo = blk_legacy_dinfo(dev->conf.blk); + if (dinfo && dinfo->media_cd) { + scsi_cd_realize(dev, errp); + } else { + scsi_hd_realize(dev, errp); + } +} + +static const SCSIReqOps scsi_disk_emulate_reqops = { + .size = sizeof(SCSIDiskReq), + .free_req = scsi_free_request, + .send_command = scsi_disk_emulate_command, + .read_data = scsi_disk_emulate_read_data, + .write_data = scsi_disk_emulate_write_data, + .get_buf = scsi_get_buf, +}; + +static const SCSIReqOps scsi_disk_dma_reqops = { + .size = sizeof(SCSIDiskReq), + .free_req = scsi_free_request, + .send_command = scsi_disk_dma_command, + .read_data = scsi_read_data, + .write_data = scsi_write_data, + .get_buf = scsi_get_buf, + .load_request = scsi_disk_load_request, + .save_request = scsi_disk_save_request, +}; + +static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = { + [TEST_UNIT_READY] = &scsi_disk_emulate_reqops, + [INQUIRY] = &scsi_disk_emulate_reqops, + [MODE_SENSE] = &scsi_disk_emulate_reqops, + [MODE_SENSE_10] = &scsi_disk_emulate_reqops, + [START_STOP] = &scsi_disk_emulate_reqops, + [ALLOW_MEDIUM_REMOVAL] = &scsi_disk_emulate_reqops, + [READ_CAPACITY_10] = &scsi_disk_emulate_reqops, + [READ_TOC] = &scsi_disk_emulate_reqops, + [READ_DVD_STRUCTURE] = &scsi_disk_emulate_reqops, + [READ_DISC_INFORMATION] = &scsi_disk_emulate_reqops, + [GET_CONFIGURATION] = &scsi_disk_emulate_reqops, + [GET_EVENT_STATUS_NOTIFICATION] = &scsi_disk_emulate_reqops, + [MECHANISM_STATUS] = &scsi_disk_emulate_reqops, + [SERVICE_ACTION_IN_16] = &scsi_disk_emulate_reqops, + [REQUEST_SENSE] = &scsi_disk_emulate_reqops, + [SYNCHRONIZE_CACHE] = &scsi_disk_emulate_reqops, + [SEEK_10] = &scsi_disk_emulate_reqops, + [MODE_SELECT] = &scsi_disk_emulate_reqops, + [MODE_SELECT_10] = &scsi_disk_emulate_reqops, + [UNMAP] = &scsi_disk_emulate_reqops, + [WRITE_SAME_10] = &scsi_disk_emulate_reqops, + [WRITE_SAME_16] = &scsi_disk_emulate_reqops, + [VERIFY_10] = &scsi_disk_emulate_reqops, + [VERIFY_12] = &scsi_disk_emulate_reqops, + [VERIFY_16] = &scsi_disk_emulate_reqops, + + [READ_6] = &scsi_disk_dma_reqops, + [READ_10] = &scsi_disk_dma_reqops, + [READ_12] = &scsi_disk_dma_reqops, + [READ_16] = &scsi_disk_dma_reqops, + [WRITE_6] = &scsi_disk_dma_reqops, + [WRITE_10] = &scsi_disk_dma_reqops, + [WRITE_12] = &scsi_disk_dma_reqops, + [WRITE_16] = &scsi_disk_dma_reqops, + [WRITE_VERIFY_10] = &scsi_disk_dma_reqops, + [WRITE_VERIFY_12] = &scsi_disk_dma_reqops, + [WRITE_VERIFY_16] = &scsi_disk_dma_reqops, +}; + +static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, + uint8_t *buf, void *hba_private) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d); + SCSIRequest *req; + const SCSIReqOps *ops; + uint8_t command; + + command = buf[0]; + ops = scsi_disk_reqops_dispatch[command]; + if (!ops) { + ops = &scsi_disk_emulate_reqops; + } + req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private); + +#ifdef DEBUG_SCSI + DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]); + { + int i; + for (i = 1; i < scsi_cdb_length(buf); i++) { + printf(" 0x%02x", buf[i]); + } + printf("\n"); + } +#endif + + return req; +} + +#ifdef __linux__ +static int get_device_type(SCSIDiskState *s) +{ + uint8_t cmd[16]; + uint8_t buf[36]; + uint8_t sensebuf[8]; + sg_io_hdr_t io_header; + int ret; + + memset(cmd, 0, sizeof(cmd)); + memset(buf, 0, sizeof(buf)); + cmd[0] = INQUIRY; + cmd[4] = sizeof(buf); + + memset(&io_header, 0, sizeof(io_header)); + io_header.interface_id = 'S'; + io_header.dxfer_direction = SG_DXFER_FROM_DEV; + io_header.dxfer_len = sizeof(buf); + io_header.dxferp = buf; + io_header.cmdp = cmd; + io_header.cmd_len = sizeof(cmd); + io_header.mx_sb_len = sizeof(sensebuf); + io_header.sbp = sensebuf; + io_header.timeout = 6000; /* XXX */ + + ret = blk_ioctl(s->qdev.conf.blk, SG_IO, &io_header); + if (ret < 0 || io_header.driver_status || io_header.host_status) { + return -1; + } + s->qdev.type = buf[0]; + if (buf[1] & 0x80) { + s->features |= 1 << SCSI_DISK_F_REMOVABLE; + } + return 0; +} + +static void scsi_block_realize(SCSIDevice *dev, Error **errp) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); + int sg_version; + int rc; + + if (!s->qdev.conf.blk) { + error_setg(errp, "drive property not set"); + return; + } + + /* check we are using a driver managing SG_IO (version 3 and after) */ + rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); + if (rc < 0) { + error_setg(errp, "cannot get SG_IO version number: %s. " + "Is this a SCSI device?", + strerror(-rc)); + return; + } + if (sg_version < 30000) { + error_setg(errp, "scsi generic interface too old"); + return; + } + + /* get device type from INQUIRY data */ + rc = get_device_type(s); + if (rc < 0) { + error_setg(errp, "INQUIRY failed"); + return; + } + + /* Make a guess for the block size, we'll fix it when the guest sends. + * READ CAPACITY. If they don't, they likely would assume these sizes + * anyway. (TODO: check in /sys). + */ + if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) { + s->qdev.blocksize = 2048; + } else { + s->qdev.blocksize = 512; + } + + /* Makes the scsi-block device not removable by using HMP and QMP eject + * command. + */ + s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS); + + scsi_realize(&s->qdev, errp); +} + +static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf) +{ + switch (buf[0]) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + case WRITE_VERIFY_10: + case WRITE_VERIFY_12: + case WRITE_VERIFY_16: + /* If we are not using O_DIRECT, we might read stale data from the + * host cache if writes were made using other commands than these + * ones (such as WRITE SAME or EXTENDED COPY, etc.). So, without + * O_DIRECT everything must go through SG_IO. + */ + if (!(blk_get_flags(s->qdev.conf.blk) & BDRV_O_NOCACHE)) { + break; + } + + /* MMC writing cannot be done via pread/pwrite, because it sometimes + * involves writing beyond the maximum LBA or to negative LBA (lead-in). + * And once you do these writes, reading from the block device is + * unreliable, too. It is even possible that reads deliver random data + * from the host page cache (this is probably a Linux bug). + * + * We might use scsi_disk_dma_reqops as long as no writing commands are + * seen, but performance usually isn't paramount on optical media. So, + * just make scsi-block operate the same as scsi-generic for them. + */ + if (s->qdev.type != TYPE_ROM) { + return false; + } + break; + + default: + break; + } + + return true; +} + + +static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag, + uint32_t lun, uint8_t *buf, + void *hba_private) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d); + + if (scsi_block_is_passthrough(s, buf)) { + return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun, + hba_private); + } else { + return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun, + hba_private); + } +} + +static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd, + uint8_t *buf, void *hba_private) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d); + + if (scsi_block_is_passthrough(s, buf)) { + return scsi_bus_parse_cdb(&s->qdev, cmd, buf, hba_private); + } else { + return scsi_req_parse_cdb(&s->qdev, cmd, buf); + } +} + +#endif + +#define DEFINE_SCSI_DISK_PROPERTIES() \ + DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf), \ + DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ + DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ + DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ + DEFINE_PROP_STRING("product", SCSIDiskState, product) + +static Property scsi_hd_properties[] = { + DEFINE_SCSI_DISK_PROPERTIES(), + DEFINE_PROP_BIT("removable", SCSIDiskState, features, + SCSI_DISK_F_REMOVABLE, false), + DEFINE_PROP_BIT("dpofua", SCSIDiskState, features, + SCSI_DISK_F_DPOFUA, false), + DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0), + DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0), + DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), + DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, + DEFAULT_MAX_UNMAP_SIZE), + DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, + DEFAULT_MAX_IO_SIZE), + DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_scsi_disk_state = { + .name = "scsi-disk", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState), + VMSTATE_BOOL(media_changed, SCSIDiskState), + VMSTATE_BOOL(media_event, SCSIDiskState), + VMSTATE_BOOL(eject_request, SCSIDiskState), + VMSTATE_BOOL(tray_open, SCSIDiskState), + VMSTATE_BOOL(tray_locked, SCSIDiskState), + VMSTATE_END_OF_LIST() + } +}; + +static void scsi_hd_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); + + sc->realize = scsi_hd_realize; + sc->alloc_req = scsi_new_request; + sc->unit_attention_reported = scsi_disk_unit_attention_reported; + dc->fw_name = "disk"; + dc->desc = "virtual SCSI disk"; + dc->reset = scsi_disk_reset; + dc->props = scsi_hd_properties; + dc->vmsd = &vmstate_scsi_disk_state; +} + +static const TypeInfo scsi_hd_info = { + .name = "scsi-hd", + .parent = TYPE_SCSI_DEVICE, + .instance_size = sizeof(SCSIDiskState), + .class_init = scsi_hd_class_initfn, +}; + +static Property scsi_cd_properties[] = { + DEFINE_SCSI_DISK_PROPERTIES(), + DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0), + DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0), + DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), + DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, + DEFAULT_MAX_IO_SIZE), + DEFINE_PROP_END_OF_LIST(), +}; + +static void scsi_cd_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); + + sc->realize = scsi_cd_realize; + sc->alloc_req = scsi_new_request; + sc->unit_attention_reported = scsi_disk_unit_attention_reported; + dc->fw_name = "disk"; + dc->desc = "virtual SCSI CD-ROM"; + dc->reset = scsi_disk_reset; + dc->props = scsi_cd_properties; + dc->vmsd = &vmstate_scsi_disk_state; +} + +static const TypeInfo scsi_cd_info = { + .name = "scsi-cd", + .parent = TYPE_SCSI_DEVICE, + .instance_size = sizeof(SCSIDiskState), + .class_init = scsi_cd_class_initfn, +}; + +#ifdef __linux__ +static Property scsi_block_properties[] = { + DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static void scsi_block_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); + + sc->realize = scsi_block_realize; + sc->alloc_req = scsi_block_new_request; + sc->parse_cdb = scsi_block_parse_cdb; + dc->fw_name = "disk"; + dc->desc = "SCSI block device passthrough"; + dc->reset = scsi_disk_reset; + dc->props = scsi_block_properties; + dc->vmsd = &vmstate_scsi_disk_state; +} + +static const TypeInfo scsi_block_info = { + .name = "scsi-block", + .parent = TYPE_SCSI_DEVICE, + .instance_size = sizeof(SCSIDiskState), + .class_init = scsi_block_class_initfn, +}; +#endif + +static Property scsi_disk_properties[] = { + DEFINE_SCSI_DISK_PROPERTIES(), + DEFINE_PROP_BIT("removable", SCSIDiskState, features, + SCSI_DISK_F_REMOVABLE, false), + DEFINE_PROP_BIT("dpofua", SCSIDiskState, features, + SCSI_DISK_F_DPOFUA, false), + DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0), + DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0), + DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), + DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, + DEFAULT_MAX_UNMAP_SIZE), + DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, + DEFAULT_MAX_IO_SIZE), + DEFINE_PROP_END_OF_LIST(), +}; + +static void scsi_disk_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); + + sc->realize = scsi_disk_realize; + sc->alloc_req = scsi_new_request; + sc->unit_attention_reported = scsi_disk_unit_attention_reported; + dc->fw_name = "disk"; + dc->desc = "virtual SCSI disk or CD-ROM (legacy)"; + dc->reset = scsi_disk_reset; + dc->props = scsi_disk_properties; + dc->vmsd = &vmstate_scsi_disk_state; +} + +static const TypeInfo scsi_disk_info = { + .name = "scsi-disk", + .parent = TYPE_SCSI_DEVICE, + .instance_size = sizeof(SCSIDiskState), + .class_init = scsi_disk_class_initfn, +}; + +static void scsi_disk_register_types(void) +{ + type_register_static(&scsi_hd_info); + type_register_static(&scsi_cd_info); +#ifdef __linux__ + type_register_static(&scsi_block_info); +#endif + type_register_static(&scsi_disk_info); +} + +type_init(scsi_disk_register_types) diff --git a/qemu/hw/scsi/scsi-generic.c b/qemu/hw/scsi/scsi-generic.c new file mode 100644 index 000000000..e53470f85 --- /dev/null +++ b/qemu/hw/scsi/scsi-generic.c @@ -0,0 +1,496 @@ +/* + * Generic SCSI Device support + * + * Copyright (c) 2007 Bull S.A.S. + * Based on code by Paul Brook + * Based on code by Fabrice Bellard + * + * Written by Laurent Vivier <Laurent.Vivier@bull.net> + * + * This code is licensed under the LGPL. + * + */ + +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "hw/scsi/scsi.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" + +#ifdef __linux__ + +//#define DEBUG_SCSI + +#ifdef DEBUG_SCSI +#define DPRINTF(fmt, ...) \ +do { printf("scsi-generic: " fmt , ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) do {} while(0) +#endif + +#define BADF(fmt, ...) \ +do { fprintf(stderr, "scsi-generic: " fmt , ## __VA_ARGS__); } while (0) + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <scsi/sg.h> +#include "block/scsi.h" + +#define SG_ERR_DRIVER_TIMEOUT 0x06 +#define SG_ERR_DRIVER_SENSE 0x08 + +#define SG_ERR_DID_OK 0x00 +#define SG_ERR_DID_NO_CONNECT 0x01 +#define SG_ERR_DID_BUS_BUSY 0x02 +#define SG_ERR_DID_TIME_OUT 0x03 + +#ifndef MAX_UINT +#define MAX_UINT ((unsigned int)-1) +#endif + +typedef struct SCSIGenericReq { + SCSIRequest req; + uint8_t *buf; + int buflen; + int len; + sg_io_hdr_t io_header; +} SCSIGenericReq; + +static void scsi_generic_save_request(QEMUFile *f, SCSIRequest *req) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + + qemu_put_sbe32s(f, &r->buflen); + if (r->buflen && r->req.cmd.mode == SCSI_XFER_TO_DEV) { + assert(!r->req.sg); + qemu_put_buffer(f, r->buf, r->req.cmd.xfer); + } +} + +static void scsi_generic_load_request(QEMUFile *f, SCSIRequest *req) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + + qemu_get_sbe32s(f, &r->buflen); + if (r->buflen && r->req.cmd.mode == SCSI_XFER_TO_DEV) { + assert(!r->req.sg); + qemu_get_buffer(f, r->buf, r->req.cmd.xfer); + } +} + +static void scsi_free_request(SCSIRequest *req) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + + g_free(r->buf); +} + +/* Helper function for command completion. */ +static void scsi_command_complete(void *opaque, int ret) +{ + int status; + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + + r->req.aiocb = NULL; + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + if (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) { + r->req.sense_len = r->io_header.sb_len_wr; + } + + if (ret != 0) { + switch (ret) { + case -EDOM: + status = TASK_SET_FULL; + break; + case -ENOMEM: + status = CHECK_CONDITION; + scsi_req_build_sense(&r->req, SENSE_CODE(TARGET_FAILURE)); + break; + default: + status = CHECK_CONDITION; + scsi_req_build_sense(&r->req, SENSE_CODE(IO_ERROR)); + break; + } + } else { + if (r->io_header.host_status == SG_ERR_DID_NO_CONNECT || + r->io_header.host_status == SG_ERR_DID_BUS_BUSY || + r->io_header.host_status == SG_ERR_DID_TIME_OUT || + (r->io_header.driver_status & SG_ERR_DRIVER_TIMEOUT)) { + status = BUSY; + BADF("Driver Timeout\n"); + } else if (r->io_header.host_status) { + status = CHECK_CONDITION; + scsi_req_build_sense(&r->req, SENSE_CODE(I_T_NEXUS_LOSS)); + } else if (r->io_header.status) { + status = r->io_header.status; + } else if (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) { + status = CHECK_CONDITION; + } else { + status = GOOD; + } + } + DPRINTF("Command complete 0x%p tag=0x%x status=%d\n", + r, r->req.tag, status); + + scsi_req_complete(&r->req, status); +done: + scsi_req_unref(&r->req); +} + +static int execute_command(BlockBackend *blk, + SCSIGenericReq *r, int direction, + BlockCompletionFunc *complete) +{ + r->io_header.interface_id = 'S'; + r->io_header.dxfer_direction = direction; + r->io_header.dxferp = r->buf; + r->io_header.dxfer_len = r->buflen; + r->io_header.cmdp = r->req.cmd.buf; + r->io_header.cmd_len = r->req.cmd.len; + r->io_header.mx_sb_len = sizeof(r->req.sense); + r->io_header.sbp = r->req.sense; + r->io_header.timeout = MAX_UINT; + r->io_header.usr_ptr = r; + r->io_header.flags |= SG_FLAG_DIRECT_IO; + + r->req.aiocb = blk_aio_ioctl(blk, SG_IO, &r->io_header, complete, r); + if (r->req.aiocb == NULL) { + return -EIO; + } + + return 0; +} + +static void scsi_read_complete(void * opaque, int ret) +{ + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + SCSIDevice *s = r->req.dev; + int len; + + r->req.aiocb = NULL; + if (ret || r->req.io_canceled) { + scsi_command_complete(r, ret); + return; + } + len = r->io_header.dxfer_len - r->io_header.resid; + DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, len); + + r->len = -1; + if (len == 0) { + scsi_command_complete(r, 0); + } else { + /* Snoop READ CAPACITY output to set the blocksize. */ + if (r->req.cmd.buf[0] == READ_CAPACITY_10 && + (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) { + s->blocksize = ldl_be_p(&r->buf[4]); + s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL; + } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 && + (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) { + s->blocksize = ldl_be_p(&r->buf[8]); + s->max_lba = ldq_be_p(&r->buf[0]); + } + blk_set_guest_block_size(s->conf.blk, s->blocksize); + + scsi_req_data(&r->req, len); + scsi_req_unref(&r->req); + } +} + +/* Read more data from scsi device into buffer. */ +static void scsi_read_data(SCSIRequest *req) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + SCSIDevice *s = r->req.dev; + int ret; + + DPRINTF("scsi_read_data 0x%x\n", req->tag); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + if (r->len == -1) { + scsi_command_complete(r, 0); + return; + } + + ret = execute_command(s->conf.blk, r, SG_DXFER_FROM_DEV, + scsi_read_complete); + if (ret < 0) { + scsi_command_complete(r, ret); + } +} + +static void scsi_write_complete(void * opaque, int ret) +{ + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + SCSIDevice *s = r->req.dev; + + DPRINTF("scsi_write_complete() ret = %d\n", ret); + r->req.aiocb = NULL; + if (ret || r->req.io_canceled) { + scsi_command_complete(r, ret); + return; + } + + if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && + s->type == TYPE_TAPE) { + s->blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11]; + DPRINTF("block size %d\n", s->blocksize); + } + + scsi_command_complete(r, ret); +} + +/* Write data to a scsi device. Returns nonzero on failure. + The transfer may complete asynchronously. */ +static void scsi_write_data(SCSIRequest *req) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + SCSIDevice *s = r->req.dev; + int ret; + + DPRINTF("scsi_write_data 0x%x\n", req->tag); + if (r->len == 0) { + r->len = r->buflen; + scsi_req_data(&r->req, r->len); + return; + } + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + ret = execute_command(s->conf.blk, r, SG_DXFER_TO_DEV, scsi_write_complete); + if (ret < 0) { + scsi_command_complete(r, ret); + } +} + +/* Return a pointer to the data buffer. */ +static uint8_t *scsi_get_buf(SCSIRequest *req) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + + return r->buf; +} + +/* Execute a scsi command. Returns the length of the data expected by the + command. This will be Positive for data transfers from the device + (eg. disk reads), negative for transfers to the device (eg. disk writes), + and zero if the command does not transfer any data. */ + +static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd) +{ + SCSIGenericReq *r = DO_UPCAST(SCSIGenericReq, req, req); + SCSIDevice *s = r->req.dev; + int ret; + +#ifdef DEBUG_SCSI + { + int i; + for (i = 1; i < r->req.cmd.len; i++) { + printf(" 0x%02x", cmd[i]); + } + printf("\n"); + } +#endif + + if (r->req.cmd.xfer == 0) { + g_free(r->buf); + r->buflen = 0; + r->buf = NULL; + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + ret = execute_command(s->conf.blk, r, SG_DXFER_NONE, + scsi_command_complete); + if (ret < 0) { + scsi_command_complete(r, ret); + return 0; + } + return 0; + } + + if (r->buflen != r->req.cmd.xfer) { + g_free(r->buf); + r->buf = g_malloc(r->req.cmd.xfer); + r->buflen = r->req.cmd.xfer; + } + + memset(r->buf, 0, r->buflen); + r->len = r->req.cmd.xfer; + if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { + r->len = 0; + return -r->req.cmd.xfer; + } else { + return r->req.cmd.xfer; + } +} + +static int get_stream_blocksize(BlockBackend *blk) +{ + uint8_t cmd[6]; + uint8_t buf[12]; + uint8_t sensebuf[8]; + sg_io_hdr_t io_header; + int ret; + + memset(cmd, 0, sizeof(cmd)); + memset(buf, 0, sizeof(buf)); + cmd[0] = MODE_SENSE; + cmd[4] = sizeof(buf); + + memset(&io_header, 0, sizeof(io_header)); + io_header.interface_id = 'S'; + io_header.dxfer_direction = SG_DXFER_FROM_DEV; + io_header.dxfer_len = sizeof(buf); + io_header.dxferp = buf; + io_header.cmdp = cmd; + io_header.cmd_len = sizeof(cmd); + io_header.mx_sb_len = sizeof(sensebuf); + io_header.sbp = sensebuf; + io_header.timeout = 6000; /* XXX */ + + ret = blk_ioctl(blk, SG_IO, &io_header); + if (ret < 0 || io_header.driver_status || io_header.host_status) { + return -1; + } + return (buf[9] << 16) | (buf[10] << 8) | buf[11]; +} + +static void scsi_generic_reset(DeviceState *dev) +{ + SCSIDevice *s = SCSI_DEVICE(dev); + + scsi_device_purge_requests(s, SENSE_CODE(RESET)); +} + +static void scsi_generic_realize(SCSIDevice *s, Error **errp) +{ + int rc; + int sg_version; + struct sg_scsi_id scsiid; + + if (!s->conf.blk) { + error_setg(errp, "drive property not set"); + return; + } + + if (blk_get_on_error(s->conf.blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { + error_setg(errp, "Device doesn't support drive option werror"); + return; + } + if (blk_get_on_error(s->conf.blk, 1) != BLOCKDEV_ON_ERROR_REPORT) { + error_setg(errp, "Device doesn't support drive option rerror"); + return; + } + + /* check we are using a driver managing SG_IO (version 3 and after */ + rc = blk_ioctl(s->conf.blk, SG_GET_VERSION_NUM, &sg_version); + if (rc < 0) { + error_setg(errp, "cannot get SG_IO version number: %s. " + "Is this a SCSI device?", + strerror(-rc)); + return; + } + if (sg_version < 30000) { + error_setg(errp, "scsi generic interface too old"); + return; + } + + /* get LUN of the /dev/sg? */ + if (blk_ioctl(s->conf.blk, SG_GET_SCSI_ID, &scsiid)) { + error_setg(errp, "SG_GET_SCSI_ID ioctl failed"); + return; + } + + /* define device state */ + s->type = scsiid.scsi_type; + DPRINTF("device type %d\n", s->type); + + switch (s->type) { + case TYPE_TAPE: + s->blocksize = get_stream_blocksize(s->conf.blk); + if (s->blocksize == -1) { + s->blocksize = 0; + } + break; + + /* Make a guess for block devices, we'll fix it when the guest sends. + * READ CAPACITY. If they don't, they likely would assume these sizes + * anyway. (TODO: they could also send MODE SENSE). + */ + case TYPE_ROM: + case TYPE_WORM: + s->blocksize = 2048; + break; + default: + s->blocksize = 512; + break; + } + + DPRINTF("block size %d\n", s->blocksize); +} + +const SCSIReqOps scsi_generic_req_ops = { + .size = sizeof(SCSIGenericReq), + .free_req = scsi_free_request, + .send_command = scsi_send_command, + .read_data = scsi_read_data, + .write_data = scsi_write_data, + .get_buf = scsi_get_buf, + .load_request = scsi_generic_load_request, + .save_request = scsi_generic_save_request, +}; + +static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, + uint8_t *buf, void *hba_private) +{ + SCSIRequest *req; + + req = scsi_req_alloc(&scsi_generic_req_ops, d, tag, lun, hba_private); + return req; +} + +static Property scsi_generic_properties[] = { + DEFINE_PROP_DRIVE("drive", SCSIDevice, conf.blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static int scsi_generic_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, + uint8_t *buf, void *hba_private) +{ + return scsi_bus_parse_cdb(dev, cmd, buf, hba_private); +} + +static void scsi_generic_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); + + sc->realize = scsi_generic_realize; + sc->alloc_req = scsi_new_request; + sc->parse_cdb = scsi_generic_parse_cdb; + dc->fw_name = "disk"; + dc->desc = "pass through generic scsi device (/dev/sg*)"; + dc->reset = scsi_generic_reset; + dc->props = scsi_generic_properties; + dc->vmsd = &vmstate_scsi_device; +} + +static const TypeInfo scsi_generic_info = { + .name = "scsi-generic", + .parent = TYPE_SCSI_DEVICE, + .instance_size = sizeof(SCSIDevice), + .class_init = scsi_generic_class_initfn, +}; + +static void scsi_generic_register_types(void) +{ + type_register_static(&scsi_generic_info); +} + +type_init(scsi_generic_register_types) + +#endif /* __linux__ */ diff --git a/qemu/hw/scsi/spapr_vscsi.c b/qemu/hw/scsi/spapr_vscsi.c new file mode 100644 index 000000000..891424fae --- /dev/null +++ b/qemu/hw/scsi/spapr_vscsi.c @@ -0,0 +1,1302 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * PAPR Virtual SCSI, aka ibmvscsi + * + * Copyright (c) 2010,2011 Benjamin Herrenschmidt, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * TODO: + * + * - Cleanups :-) + * - Sort out better how to assign devices to VSCSI instances + * - Fix residual counts + * - Add indirect descriptors support + * - Maybe do autosense (PAPR seems to mandate it, linux doesn't care) + */ +#include "hw/hw.h" +#include "hw/scsi/scsi.h" +#include "block/scsi.h" +#include "srp.h" +#include "hw/qdev.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "viosrp.h" + +#include <libfdt.h> + +/*#define DEBUG_VSCSI*/ + +#ifdef DEBUG_VSCSI +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* + * Virtual SCSI device + */ + +/* Random numbers */ +#define VSCSI_MAX_SECTORS 4096 +#define VSCSI_REQ_LIMIT 24 + +#define SRP_RSP_SENSE_DATA_LEN 18 + +#define SRP_REPORT_LUNS_WLUN 0xc10100000000000ULL + +typedef union vscsi_crq { + struct viosrp_crq s; + uint8_t raw[16]; +} vscsi_crq; + +typedef struct vscsi_req { + vscsi_crq crq; + union viosrp_iu iu; + + /* SCSI request tracking */ + SCSIRequest *sreq; + uint32_t qtag; /* qemu tag != srp tag */ + bool active; + bool writing; + bool dma_error; + uint32_t data_len; + uint32_t senselen; + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + + /* RDMA related bits */ + uint8_t dma_fmt; + uint16_t local_desc; + uint16_t total_desc; + uint16_t cdb_offset; + uint16_t cur_desc_num; + uint16_t cur_desc_offset; +} vscsi_req; + +#define TYPE_VIO_SPAPR_VSCSI_DEVICE "spapr-vscsi" +#define VIO_SPAPR_VSCSI_DEVICE(obj) \ + OBJECT_CHECK(VSCSIState, (obj), TYPE_VIO_SPAPR_VSCSI_DEVICE) + +typedef struct { + VIOsPAPRDevice vdev; + SCSIBus bus; + vscsi_req reqs[VSCSI_REQ_LIMIT]; +} VSCSIState; + +static struct vscsi_req *vscsi_get_req(VSCSIState *s) +{ + vscsi_req *req; + int i; + + for (i = 0; i < VSCSI_REQ_LIMIT; i++) { + req = &s->reqs[i]; + if (!req->active) { + memset(req, 0, sizeof(*req)); + req->qtag = i; + req->active = 1; + return req; + } + } + return NULL; +} + +static struct vscsi_req *vscsi_find_req(VSCSIState *s, uint64_t srp_tag) +{ + vscsi_req *req; + int i; + + for (i = 0; i < VSCSI_REQ_LIMIT; i++) { + req = &s->reqs[i]; + if (req->iu.srp.cmd.tag == srp_tag) { + return req; + } + } + return NULL; +} + +static void vscsi_put_req(vscsi_req *req) +{ + if (req->sreq != NULL) { + scsi_req_unref(req->sreq); + } + req->sreq = NULL; + req->active = 0; +} + +static SCSIDevice *vscsi_device_find(SCSIBus *bus, uint64_t srp_lun, int *lun) +{ + int channel = 0, id = 0; + +retry: + switch (srp_lun >> 62) { + case 0: + if ((srp_lun >> 56) != 0) { + channel = (srp_lun >> 56) & 0x3f; + id = (srp_lun >> 48) & 0xff; + srp_lun <<= 16; + goto retry; + } + *lun = (srp_lun >> 48) & 0xff; + break; + + case 1: + *lun = (srp_lun >> 48) & 0x3fff; + break; + case 2: + channel = (srp_lun >> 53) & 0x7; + id = (srp_lun >> 56) & 0x3f; + *lun = (srp_lun >> 48) & 0x1f; + break; + case 3: + *lun = -1; + return NULL; + default: + abort(); + } + + return scsi_device_find(bus, channel, id, *lun); +} + +static int vscsi_send_iu(VSCSIState *s, vscsi_req *req, + uint64_t length, uint8_t format) +{ + long rc, rc1; + + /* First copy the SRP */ + rc = spapr_vio_dma_write(&s->vdev, req->crq.s.IU_data_ptr, + &req->iu, length); + if (rc) { + fprintf(stderr, "vscsi_send_iu: DMA write failure !\n"); + } + + req->crq.s.valid = 0x80; + req->crq.s.format = format; + req->crq.s.reserved = 0x00; + req->crq.s.timeout = cpu_to_be16(0x0000); + req->crq.s.IU_length = cpu_to_be16(length); + req->crq.s.IU_data_ptr = req->iu.srp.rsp.tag; /* right byte order */ + + if (rc == 0) { + req->crq.s.status = VIOSRP_OK; + } else { + req->crq.s.status = VIOSRP_ADAPTER_FAIL; + } + + rc1 = spapr_vio_send_crq(&s->vdev, req->crq.raw); + if (rc1) { + fprintf(stderr, "vscsi_send_iu: Error sending response\n"); + return rc1; + } + + return rc; +} + +static void vscsi_makeup_sense(VSCSIState *s, vscsi_req *req, + uint8_t key, uint8_t asc, uint8_t ascq) +{ + req->senselen = SRP_RSP_SENSE_DATA_LEN; + + /* Valid bit and 'current errors' */ + req->sense[0] = (0x1 << 7 | 0x70); + /* Sense key */ + req->sense[2] = key; + /* Additional sense length */ + req->sense[7] = 0xa; /* 10 bytes */ + /* Additional sense code */ + req->sense[12] = asc; + req->sense[13] = ascq; +} + +static int vscsi_send_rsp(VSCSIState *s, vscsi_req *req, + uint8_t status, int32_t res_in, int32_t res_out) +{ + union viosrp_iu *iu = &req->iu; + uint64_t tag = iu->srp.rsp.tag; + int total_len = sizeof(iu->srp.rsp); + uint8_t sol_not = iu->srp.cmd.sol_not; + + DPRINTF("VSCSI: Sending resp status: 0x%x, " + "res_in: %d, res_out: %d\n", status, res_in, res_out); + + memset(iu, 0, sizeof(struct srp_rsp)); + iu->srp.rsp.opcode = SRP_RSP; + iu->srp.rsp.req_lim_delta = cpu_to_be32(1); + iu->srp.rsp.tag = tag; + + /* Handle residuals */ + if (res_in < 0) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DIUNDER; + res_in = -res_in; + } else if (res_in) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER; + } + if (res_out < 0) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DOUNDER; + res_out = -res_out; + } else if (res_out) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DOOVER; + } + iu->srp.rsp.data_in_res_cnt = cpu_to_be32(res_in); + iu->srp.rsp.data_out_res_cnt = cpu_to_be32(res_out); + + /* We don't do response data */ + /* iu->srp.rsp.flags &= ~SRP_RSP_FLAG_RSPVALID; */ + iu->srp.rsp.resp_data_len = cpu_to_be32(0); + + /* Handle success vs. failure */ + iu->srp.rsp.status = status; + if (status) { + iu->srp.rsp.sol_not = (sol_not & 0x04) >> 2; + if (req->senselen) { + req->iu.srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID; + req->iu.srp.rsp.sense_data_len = cpu_to_be32(req->senselen); + memcpy(req->iu.srp.rsp.data, req->sense, req->senselen); + total_len += req->senselen; + } + } else { + iu->srp.rsp.sol_not = (sol_not & 0x02) >> 1; + } + + vscsi_send_iu(s, req, total_len, VIOSRP_SRP_FORMAT); + return 0; +} + +static inline struct srp_direct_buf vscsi_swap_desc(struct srp_direct_buf desc) +{ + desc.va = be64_to_cpu(desc.va); + desc.len = be32_to_cpu(desc.len); + return desc; +} + +static int vscsi_fetch_desc(VSCSIState *s, struct vscsi_req *req, + unsigned n, unsigned buf_offset, + struct srp_direct_buf *ret) +{ + struct srp_cmd *cmd = &req->iu.srp.cmd; + + switch (req->dma_fmt) { + case SRP_NO_DATA_DESC: { + DPRINTF("VSCSI: no data descriptor\n"); + return 0; + } + case SRP_DATA_DESC_DIRECT: { + memcpy(ret, cmd->add_data + req->cdb_offset, sizeof(*ret)); + assert(req->cur_desc_num == 0); + DPRINTF("VSCSI: direct segment\n"); + break; + } + case SRP_DATA_DESC_INDIRECT: { + struct srp_indirect_buf *tmp = (struct srp_indirect_buf *) + (cmd->add_data + req->cdb_offset); + if (n < req->local_desc) { + *ret = tmp->desc_list[n]; + DPRINTF("VSCSI: indirect segment local tag=0x%x desc#%d/%d\n", + req->qtag, n, req->local_desc); + + } else if (n < req->total_desc) { + int rc; + struct srp_direct_buf tbl_desc = vscsi_swap_desc(tmp->table_desc); + unsigned desc_offset = n * sizeof(struct srp_direct_buf); + + if (desc_offset >= tbl_desc.len) { + DPRINTF("VSCSI: #%d is ouf of range (%d bytes)\n", + n, desc_offset); + return -1; + } + rc = spapr_vio_dma_read(&s->vdev, tbl_desc.va + desc_offset, + ret, sizeof(struct srp_direct_buf)); + if (rc) { + DPRINTF("VSCSI: spapr_vio_dma_read -> %d reading ext_desc\n", + rc); + return -1; + } + DPRINTF("VSCSI: indirect segment ext. tag=0x%x desc#%d/%d { va=%"PRIx64" len=%x }\n", + req->qtag, n, req->total_desc, tbl_desc.va, tbl_desc.len); + } else { + DPRINTF("VSCSI: Out of descriptors !\n"); + return 0; + } + break; + } + default: + fprintf(stderr, "VSCSI: Unknown format %x\n", req->dma_fmt); + return -1; + } + + *ret = vscsi_swap_desc(*ret); + if (buf_offset > ret->len) { + DPRINTF(" offset=%x is out of a descriptor #%d boundary=%x\n", + buf_offset, req->cur_desc_num, ret->len); + return -1; + } + ret->va += buf_offset; + ret->len -= buf_offset; + + DPRINTF(" cur=%d offs=%x ret { va=%"PRIx64" len=%x }\n", + req->cur_desc_num, req->cur_desc_offset, ret->va, ret->len); + + return ret->len ? 1 : 0; +} + +static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req, + uint8_t *buf, uint32_t len) +{ + struct srp_direct_buf md; + uint32_t llen; + int rc = 0; + + rc = vscsi_fetch_desc(s, req, req->cur_desc_num, req->cur_desc_offset, &md); + if (rc < 0) { + return -1; + } else if (rc == 0) { + return 0; + } + + llen = MIN(len, md.len); + if (llen) { + if (req->writing) { /* writing = to device = reading from memory */ + rc = spapr_vio_dma_read(&s->vdev, md.va, buf, llen); + } else { + rc = spapr_vio_dma_write(&s->vdev, md.va, buf, llen); + } + } + + if (rc) { + return -1; + } + req->cur_desc_offset += llen; + + return llen; +} + +static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req, + uint8_t *buf, uint32_t len) +{ + struct srp_direct_buf md; + int rc = 0; + uint32_t llen, total = 0; + + DPRINTF("VSCSI: indirect segment 0x%x bytes\n", len); + + /* While we have data ... */ + while (len) { + rc = vscsi_fetch_desc(s, req, req->cur_desc_num, req->cur_desc_offset, &md); + if (rc < 0) { + return -1; + } else if (rc == 0) { + break; + } + + /* Perform transfer */ + llen = MIN(len, md.len); + if (req->writing) { /* writing = to device = reading from memory */ + rc = spapr_vio_dma_read(&s->vdev, md.va, buf, llen); + } else { + rc = spapr_vio_dma_write(&s->vdev, md.va, buf, llen); + } + if (rc) { + DPRINTF("VSCSI: spapr_vio_dma_r/w(%d) -> %d\n", req->writing, rc); + break; + } + DPRINTF("VSCSI: data: %02x %02x %02x %02x...\n", + buf[0], buf[1], buf[2], buf[3]); + + len -= llen; + buf += llen; + + total += llen; + + /* Update current position in the current descriptor */ + req->cur_desc_offset += llen; + if (md.len == llen) { + /* Go to the next descriptor if the current one finished */ + ++req->cur_desc_num; + req->cur_desc_offset = 0; + } + } + + return rc ? -1 : total; +} + +static int vscsi_srp_transfer_data(VSCSIState *s, vscsi_req *req, + int writing, uint8_t *buf, uint32_t len) +{ + int err = 0; + + switch (req->dma_fmt) { + case SRP_NO_DATA_DESC: + DPRINTF("VSCSI: no data desc transfer, skipping 0x%x bytes\n", len); + break; + case SRP_DATA_DESC_DIRECT: + err = vscsi_srp_direct_data(s, req, buf, len); + break; + case SRP_DATA_DESC_INDIRECT: + err = vscsi_srp_indirect_data(s, req, buf, len); + break; + } + return err; +} + +/* Bits from linux srp */ +static int data_out_desc_size(struct srp_cmd *cmd) +{ + int size = 0; + uint8_t fmt = cmd->buf_fmt >> 4; + + switch (fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + size = sizeof(struct srp_direct_buf); + break; + case SRP_DATA_DESC_INDIRECT: + size = sizeof(struct srp_indirect_buf) + + sizeof(struct srp_direct_buf)*cmd->data_out_desc_cnt; + break; + default: + break; + } + return size; +} + +static int vscsi_preprocess_desc(vscsi_req *req) +{ + struct srp_cmd *cmd = &req->iu.srp.cmd; + + req->cdb_offset = cmd->add_cdb_len & ~3; + + if (req->writing) { + req->dma_fmt = cmd->buf_fmt >> 4; + } else { + req->cdb_offset += data_out_desc_size(cmd); + req->dma_fmt = cmd->buf_fmt & ((1U << 4) - 1); + } + + switch (req->dma_fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + req->total_desc = req->local_desc = 1; + break; + case SRP_DATA_DESC_INDIRECT: { + struct srp_indirect_buf *ind_tmp = (struct srp_indirect_buf *) + (cmd->add_data + req->cdb_offset); + + req->total_desc = be32_to_cpu(ind_tmp->table_desc.len) / + sizeof(struct srp_direct_buf); + req->local_desc = req->writing ? cmd->data_out_desc_cnt : + cmd->data_in_desc_cnt; + break; + } + default: + fprintf(stderr, + "vscsi_preprocess_desc: Unknown format %x\n", req->dma_fmt); + return -1; + } + + return 0; +} + +/* Callback to indicate that the SCSI layer has completed a transfer. */ +static void vscsi_transfer_data(SCSIRequest *sreq, uint32_t len) +{ + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(sreq->bus->qbus.parent); + vscsi_req *req = sreq->hba_private; + uint8_t *buf; + int rc = 0; + + DPRINTF("VSCSI: SCSI xfer complete tag=0x%x len=0x%x, req=%p\n", + sreq->tag, len, req); + if (req == NULL) { + fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", sreq->tag); + return; + } + + if (len) { + buf = scsi_req_get_buf(sreq); + rc = vscsi_srp_transfer_data(s, req, req->writing, buf, len); + } + if (rc < 0) { + fprintf(stderr, "VSCSI: RDMA error rc=%d!\n", rc); + req->dma_error = true; + scsi_req_cancel(req->sreq); + return; + } + + /* Start next chunk */ + req->data_len -= rc; + scsi_req_continue(sreq); +} + +/* Callback to indicate that the SCSI layer has completed a transfer. */ +static void vscsi_command_complete(SCSIRequest *sreq, uint32_t status, size_t resid) +{ + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(sreq->bus->qbus.parent); + vscsi_req *req = sreq->hba_private; + int32_t res_in = 0, res_out = 0; + + DPRINTF("VSCSI: SCSI cmd complete, tag=0x%x status=0x%x, req=%p\n", + sreq->tag, status, req); + if (req == NULL) { + fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", sreq->tag); + return; + } + + if (status == CHECK_CONDITION) { + req->senselen = scsi_req_get_sense(req->sreq, req->sense, + sizeof(req->sense)); + DPRINTF("VSCSI: Sense data, %d bytes:\n", req->senselen); + DPRINTF(" %02x %02x %02x %02x %02x %02x %02x %02x\n", + req->sense[0], req->sense[1], req->sense[2], req->sense[3], + req->sense[4], req->sense[5], req->sense[6], req->sense[7]); + DPRINTF(" %02x %02x %02x %02x %02x %02x %02x %02x\n", + req->sense[8], req->sense[9], req->sense[10], req->sense[11], + req->sense[12], req->sense[13], req->sense[14], req->sense[15]); + } + + DPRINTF("VSCSI: Command complete err=%d\n", status); + if (status == 0) { + /* We handle overflows, not underflows for normal commands, + * but hopefully nobody cares + */ + if (req->writing) { + res_out = req->data_len; + } else { + res_in = req->data_len; + } + } + vscsi_send_rsp(s, req, status, res_in, res_out); + vscsi_put_req(req); +} + +static void vscsi_request_cancelled(SCSIRequest *sreq) +{ + vscsi_req *req = sreq->hba_private; + + if (req->dma_error) { + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(sreq->bus->qbus.parent); + + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } + vscsi_put_req(req); +} + +static const VMStateDescription vmstate_spapr_vscsi_req = { + .name = "spapr_vscsi_req", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BUFFER(crq.raw, vscsi_req), + VMSTATE_BUFFER(iu.srp.reserved, vscsi_req), + VMSTATE_UINT32(qtag, vscsi_req), + VMSTATE_BOOL(active, vscsi_req), + VMSTATE_UINT32(data_len, vscsi_req), + VMSTATE_BOOL(writing, vscsi_req), + VMSTATE_UINT32(senselen, vscsi_req), + VMSTATE_BUFFER(sense, vscsi_req), + VMSTATE_UINT8(dma_fmt, vscsi_req), + VMSTATE_UINT16(local_desc, vscsi_req), + VMSTATE_UINT16(total_desc, vscsi_req), + VMSTATE_UINT16(cdb_offset, vscsi_req), + /*Restart SCSI request from the beginning for now */ + /*VMSTATE_UINT16(cur_desc_num, vscsi_req), + VMSTATE_UINT16(cur_desc_offset, vscsi_req),*/ + VMSTATE_END_OF_LIST() + }, +}; + +static void vscsi_save_request(QEMUFile *f, SCSIRequest *sreq) +{ + vscsi_req *req = sreq->hba_private; + assert(req->active); + + vmstate_save_state(f, &vmstate_spapr_vscsi_req, req, NULL); + + DPRINTF("VSCSI: saving tag=%u, current desc#%d, offset=%x\n", + req->qtag, req->cur_desc_num, req->cur_desc_offset); +} + +static void *vscsi_load_request(QEMUFile *f, SCSIRequest *sreq) +{ + SCSIBus *bus = sreq->bus; + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(bus->qbus.parent); + vscsi_req *req; + int rc; + + assert(sreq->tag < VSCSI_REQ_LIMIT); + req = &s->reqs[sreq->tag]; + assert(!req->active); + + memset(req, 0, sizeof(*req)); + rc = vmstate_load_state(f, &vmstate_spapr_vscsi_req, req, 1); + if (rc) { + fprintf(stderr, "VSCSI: failed loading request tag#%u\n", sreq->tag); + return NULL; + } + assert(req->active); + + req->sreq = scsi_req_ref(sreq); + + DPRINTF("VSCSI: restoring tag=%u, current desc#%d, offset=%x\n", + req->qtag, req->cur_desc_num, req->cur_desc_offset); + + return req; +} + +static void vscsi_process_login(VSCSIState *s, vscsi_req *req) +{ + union viosrp_iu *iu = &req->iu; + struct srp_login_rsp *rsp = &iu->srp.login_rsp; + uint64_t tag = iu->srp.rsp.tag; + + DPRINTF("VSCSI: Got login, sendin response !\n"); + + /* TODO handle case that requested size is wrong and + * buffer format is wrong + */ + memset(iu, 0, sizeof(struct srp_login_rsp)); + rsp->opcode = SRP_LOGIN_RSP; + /* Don't advertise quite as many request as we support to + * keep room for management stuff etc... + */ + rsp->req_lim_delta = cpu_to_be32(VSCSI_REQ_LIMIT-2); + rsp->tag = tag; + rsp->max_it_iu_len = cpu_to_be32(sizeof(union srp_iu)); + rsp->max_ti_iu_len = cpu_to_be32(sizeof(union srp_iu)); + /* direct and indirect */ + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); + + vscsi_send_iu(s, req, sizeof(*rsp), VIOSRP_SRP_FORMAT); +} + +static void vscsi_inquiry_no_target(VSCSIState *s, vscsi_req *req) +{ + uint8_t *cdb = req->iu.srp.cmd.cdb; + uint8_t resp_data[36]; + int rc, len, alen; + + /* We dont do EVPD. Also check that page_code is 0 */ + if ((cdb[1] & 0x01) || cdb[2] != 0) { + /* Send INVALID FIELD IN CDB */ + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + return; + } + alen = cdb[3]; + alen = (alen << 8) | cdb[4]; + len = MIN(alen, 36); + + /* Fake up inquiry using PQ=3 */ + memset(resp_data, 0, 36); + resp_data[0] = 0x7f; /* Not capable of supporting a device here */ + resp_data[2] = 0x06; /* SPS-4 */ + resp_data[3] = 0x02; /* Resp data format */ + resp_data[4] = 36 - 5; /* Additional length */ + resp_data[7] = 0x10; /* Sync transfers */ + memcpy(&resp_data[16], "QEMU EMPTY ", 16); + memcpy(&resp_data[8], "QEMU ", 8); + + req->writing = 0; + vscsi_preprocess_desc(req); + rc = vscsi_srp_transfer_data(s, req, 0, resp_data, len); + if (rc < 0) { + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } else { + vscsi_send_rsp(s, req, 0, 36 - rc, 0); + } +} + +static void vscsi_report_luns(VSCSIState *s, vscsi_req *req) +{ + BusChild *kid; + int i, len, n, rc; + uint8_t *resp_data; + bool found_lun0; + + n = 0; + found_lun0 = false; + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + SCSIDevice *dev = SCSI_DEVICE(kid->child); + + n += 8; + if (dev->channel == 0 && dev->id == 0 && dev->lun == 0) { + found_lun0 = true; + } + } + if (!found_lun0) { + n += 8; + } + len = n+8; + + resp_data = g_malloc0(len); + memset(resp_data, 0, len); + stl_be_p(resp_data, n); + i = found_lun0 ? 8 : 16; + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + DeviceState *qdev = kid->child; + SCSIDevice *dev = SCSI_DEVICE(qdev); + + if (dev->id == 0 && dev->channel == 0) { + resp_data[i] = 0; /* Use simple LUN for 0 (SAM5 4.7.7.1) */ + } else { + resp_data[i] = (2 << 6); /* Otherwise LUN addressing (4.7.7.4) */ + } + resp_data[i] |= dev->id; + resp_data[i+1] = (dev->channel << 5); + resp_data[i+1] |= dev->lun; + i += 8; + } + + vscsi_preprocess_desc(req); + rc = vscsi_srp_transfer_data(s, req, 0, resp_data, len); + g_free(resp_data); + if (rc < 0) { + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } else { + vscsi_send_rsp(s, req, 0, len - rc, 0); + } +} + +static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req) +{ + union srp_iu *srp = &req->iu.srp; + SCSIDevice *sdev; + int n, lun; + + if ((srp->cmd.lun == 0 || be64_to_cpu(srp->cmd.lun) == SRP_REPORT_LUNS_WLUN) + && srp->cmd.cdb[0] == REPORT_LUNS) { + vscsi_report_luns(s, req); + return 0; + } + + sdev = vscsi_device_find(&s->bus, be64_to_cpu(srp->cmd.lun), &lun); + if (!sdev) { + DPRINTF("VSCSI: Command for lun %08" PRIx64 " with no drive\n", + be64_to_cpu(srp->cmd.lun)); + if (srp->cmd.cdb[0] == INQUIRY) { + vscsi_inquiry_no_target(s, req); + } else { + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0x00); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } return 1; + } + + req->sreq = scsi_req_new(sdev, req->qtag, lun, srp->cmd.cdb, req); + n = scsi_req_enqueue(req->sreq); + + DPRINTF("VSCSI: Queued command tag 0x%x CMD 0x%x=%s LUN %d ret: %d\n", + req->qtag, srp->cmd.cdb[0], scsi_command_name(srp->cmd.cdb[0]), + lun, n); + + if (n) { + /* Transfer direction must be set before preprocessing the + * descriptors + */ + req->writing = (n < 1); + + /* Preprocess RDMA descriptors */ + vscsi_preprocess_desc(req); + + /* Get transfer direction and initiate transfer */ + if (n > 0) { + req->data_len = n; + } else if (n < 0) { + req->data_len = -n; + } + scsi_req_continue(req->sreq); + } + /* Don't touch req here, it may have been recycled already */ + + return 0; +} + +static int vscsi_process_tsk_mgmt(VSCSIState *s, vscsi_req *req) +{ + union viosrp_iu *iu = &req->iu; + vscsi_req *tmpreq; + int i, lun = 0, resp = SRP_TSK_MGMT_COMPLETE; + SCSIDevice *d; + uint64_t tag = iu->srp.rsp.tag; + uint8_t sol_not = iu->srp.cmd.sol_not; + + fprintf(stderr, "vscsi_process_tsk_mgmt %02x\n", + iu->srp.tsk_mgmt.tsk_mgmt_func); + + d = vscsi_device_find(&s->bus, be64_to_cpu(req->iu.srp.tsk_mgmt.lun), &lun); + if (!d) { + resp = SRP_TSK_MGMT_FIELDS_INVALID; + } else { + switch (iu->srp.tsk_mgmt.tsk_mgmt_func) { + case SRP_TSK_ABORT_TASK: + if (d->lun != lun) { + resp = SRP_TSK_MGMT_FIELDS_INVALID; + break; + } + + tmpreq = vscsi_find_req(s, req->iu.srp.tsk_mgmt.task_tag); + if (tmpreq && tmpreq->sreq) { + assert(tmpreq->sreq->hba_private); + scsi_req_cancel(tmpreq->sreq); + } + break; + + case SRP_TSK_LUN_RESET: + if (d->lun != lun) { + resp = SRP_TSK_MGMT_FIELDS_INVALID; + break; + } + + qdev_reset_all(&d->qdev); + break; + + case SRP_TSK_ABORT_TASK_SET: + case SRP_TSK_CLEAR_TASK_SET: + if (d->lun != lun) { + resp = SRP_TSK_MGMT_FIELDS_INVALID; + break; + } + + for (i = 0; i < VSCSI_REQ_LIMIT; i++) { + tmpreq = &s->reqs[i]; + if (tmpreq->iu.srp.cmd.lun != req->iu.srp.tsk_mgmt.lun) { + continue; + } + if (!tmpreq->active || !tmpreq->sreq) { + continue; + } + assert(tmpreq->sreq->hba_private); + scsi_req_cancel(tmpreq->sreq); + } + break; + + case SRP_TSK_CLEAR_ACA: + resp = SRP_TSK_MGMT_NOT_SUPPORTED; + break; + + default: + resp = SRP_TSK_MGMT_FIELDS_INVALID; + break; + } + } + + /* Compose the response here as */ + memset(iu, 0, sizeof(struct srp_rsp) + 4); + iu->srp.rsp.opcode = SRP_RSP; + iu->srp.rsp.req_lim_delta = cpu_to_be32(1); + iu->srp.rsp.tag = tag; + iu->srp.rsp.flags |= SRP_RSP_FLAG_RSPVALID; + iu->srp.rsp.resp_data_len = cpu_to_be32(4); + if (resp) { + iu->srp.rsp.sol_not = (sol_not & 0x04) >> 2; + } else { + iu->srp.rsp.sol_not = (sol_not & 0x02) >> 1; + } + + iu->srp.rsp.status = GOOD; + iu->srp.rsp.data[3] = resp; + + vscsi_send_iu(s, req, sizeof(iu->srp.rsp) + 4, VIOSRP_SRP_FORMAT); + + return 1; +} + +static int vscsi_handle_srp_req(VSCSIState *s, vscsi_req *req) +{ + union srp_iu *srp = &req->iu.srp; + int done = 1; + uint8_t opcode = srp->rsp.opcode; + + switch (opcode) { + case SRP_LOGIN_REQ: + vscsi_process_login(s, req); + break; + case SRP_TSK_MGMT: + done = vscsi_process_tsk_mgmt(s, req); + break; + case SRP_CMD: + done = vscsi_queue_cmd(s, req); + break; + case SRP_LOGIN_RSP: + case SRP_I_LOGOUT: + case SRP_T_LOGOUT: + case SRP_RSP: + case SRP_CRED_REQ: + case SRP_CRED_RSP: + case SRP_AER_REQ: + case SRP_AER_RSP: + fprintf(stderr, "VSCSI: Unsupported opcode %02x\n", opcode); + break; + default: + fprintf(stderr, "VSCSI: Unknown type %02x\n", opcode); + } + + return done; +} + +static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req) +{ + struct viosrp_adapter_info *sinfo; + struct mad_adapter_info_data info; + int rc; + + sinfo = &req->iu.mad.adapter_info; + +#if 0 /* What for ? */ + rc = spapr_vio_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer), + &info, be16_to_cpu(sinfo->common.length)); + if (rc) { + fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n"); + } +#endif + memset(&info, 0, sizeof(info)); + strcpy(info.srp_version, SRP_VERSION); + memcpy(info.partition_name, "qemu", sizeof("qemu")); + info.partition_number = cpu_to_be32(0); + info.mad_version = cpu_to_be32(1); + info.os_type = cpu_to_be32(2); + info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS << 9); + + rc = spapr_vio_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer), + &info, be16_to_cpu(sinfo->common.length)); + if (rc) { + fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n"); + } + + sinfo->common.status = rc ? cpu_to_be32(1) : 0; + + return vscsi_send_iu(s, req, sizeof(*sinfo), VIOSRP_MAD_FORMAT); +} + +static int vscsi_send_capabilities(VSCSIState *s, vscsi_req *req) +{ + struct viosrp_capabilities *vcap; + struct capabilities cap = { }; + uint16_t len, req_len; + uint64_t buffer; + int rc; + + vcap = &req->iu.mad.capabilities; + req_len = len = be16_to_cpu(vcap->common.length); + buffer = be64_to_cpu(vcap->buffer); + if (len > sizeof(cap)) { + fprintf(stderr, "vscsi_send_capabilities: capabilities size mismatch !\n"); + + /* + * Just read and populate the structure that is known. + * Zero rest of the structure. + */ + len = sizeof(cap); + } + rc = spapr_vio_dma_read(&s->vdev, buffer, &cap, len); + if (rc) { + fprintf(stderr, "vscsi_send_capabilities: DMA read failure !\n"); + } + + /* + * Current implementation does not suppport any migration or + * reservation capabilities. Construct the response telling the + * guest not to use them. + */ + cap.flags = 0; + cap.migration.ecl = 0; + cap.reserve.type = 0; + cap.migration.common.server_support = 0; + cap.reserve.common.server_support = 0; + + rc = spapr_vio_dma_write(&s->vdev, buffer, &cap, len); + if (rc) { + fprintf(stderr, "vscsi_send_capabilities: DMA write failure !\n"); + } + if (req_len > len) { + /* + * Being paranoid and lets not worry about the error code + * here. Actual write of the cap is done above. + */ + spapr_vio_dma_set(&s->vdev, (buffer + len), 0, (req_len - len)); + } + vcap->common.status = rc ? cpu_to_be32(1) : 0; + return vscsi_send_iu(s, req, sizeof(*vcap), VIOSRP_MAD_FORMAT); +} + +static int vscsi_handle_mad_req(VSCSIState *s, vscsi_req *req) +{ + union mad_iu *mad = &req->iu.mad; + bool request_handled = false; + uint64_t retlen = 0; + + switch (be32_to_cpu(mad->empty_iu.common.type)) { + case VIOSRP_EMPTY_IU_TYPE: + fprintf(stderr, "Unsupported EMPTY MAD IU\n"); + retlen = sizeof(mad->empty_iu); + break; + case VIOSRP_ERROR_LOG_TYPE: + fprintf(stderr, "Unsupported ERROR LOG MAD IU\n"); + retlen = sizeof(mad->error_log); + break; + case VIOSRP_ADAPTER_INFO_TYPE: + vscsi_send_adapter_info(s, req); + request_handled = true; + break; + case VIOSRP_HOST_CONFIG_TYPE: + retlen = sizeof(mad->host_config); + break; + case VIOSRP_CAPABILITIES_TYPE: + vscsi_send_capabilities(s, req); + request_handled = true; + break; + default: + fprintf(stderr, "VSCSI: Unknown MAD type %02x\n", + be32_to_cpu(mad->empty_iu.common.type)); + /* + * PAPR+ says that "The length field is set to the length + * of the data structure(s) used in the command". + * As we did not recognize the request type, put zero there. + */ + retlen = 0; + } + + if (!request_handled) { + mad->empty_iu.common.status = cpu_to_be16(VIOSRP_MAD_NOT_SUPPORTED); + vscsi_send_iu(s, req, retlen, VIOSRP_MAD_FORMAT); + } + + return 1; +} + +static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq) +{ + vscsi_req *req; + int done; + + req = vscsi_get_req(s); + if (req == NULL) { + fprintf(stderr, "VSCSI: Failed to get a request !\n"); + return; + } + + /* We only support a limited number of descriptors, we know + * the ibmvscsi driver uses up to 10 max, so it should fit + * in our 256 bytes IUs. If not we'll have to increase the size + * of the structure. + */ + if (crq->s.IU_length > sizeof(union viosrp_iu)) { + fprintf(stderr, "VSCSI: SRP IU too long (%d bytes) !\n", + crq->s.IU_length); + vscsi_put_req(req); + return; + } + + /* XXX Handle failure differently ? */ + if (spapr_vio_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu, + crq->s.IU_length)) { + fprintf(stderr, "vscsi_got_payload: DMA read failure !\n"); + vscsi_put_req(req); + return; + } + memcpy(&req->crq, crq, sizeof(vscsi_crq)); + + if (crq->s.format == VIOSRP_MAD_FORMAT) { + done = vscsi_handle_mad_req(s, req); + } else { + done = vscsi_handle_srp_req(s, req); + } + + if (done) { + vscsi_put_req(req); + } +} + + +static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data) +{ + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(dev); + vscsi_crq crq; + + memcpy(crq.raw, crq_data, 16); + crq.s.timeout = be16_to_cpu(crq.s.timeout); + crq.s.IU_length = be16_to_cpu(crq.s.IU_length); + crq.s.IU_data_ptr = be64_to_cpu(crq.s.IU_data_ptr); + + DPRINTF("VSCSI: do_crq %02x %02x ...\n", crq.raw[0], crq.raw[1]); + + switch (crq.s.valid) { + case 0xc0: /* Init command/response */ + + /* Respond to initialization request */ + if (crq.s.format == 0x01) { + memset(crq.raw, 0, 16); + crq.s.valid = 0xc0; + crq.s.format = 0x02; + spapr_vio_send_crq(dev, crq.raw); + } + + /* Note that in hotplug cases, we might get a 0x02 + * as a result of us emitting the init request + */ + + break; + case 0xff: /* Link event */ + + /* Not handled for now */ + + break; + case 0x80: /* Payloads */ + switch (crq.s.format) { + case VIOSRP_SRP_FORMAT: /* AKA VSCSI request */ + case VIOSRP_MAD_FORMAT: /* AKA VSCSI response */ + vscsi_got_payload(s, &crq); + break; + case VIOSRP_OS400_FORMAT: + case VIOSRP_AIX_FORMAT: + case VIOSRP_LINUX_FORMAT: + case VIOSRP_INLINE_FORMAT: + fprintf(stderr, "vscsi_do_srq: Unsupported payload format %02x\n", + crq.s.format); + break; + default: + fprintf(stderr, "vscsi_do_srq: Unknown payload format %02x\n", + crq.s.format); + } + break; + default: + fprintf(stderr, "vscsi_do_crq: unknown CRQ %02x %02x ...\n", + crq.raw[0], crq.raw[1]); + }; + + return 0; +} + +static const struct SCSIBusInfo vscsi_scsi_info = { + .tcq = true, + .max_channel = 7, /* logical unit addressing format */ + .max_target = 63, + .max_lun = 31, + + .transfer_data = vscsi_transfer_data, + .complete = vscsi_command_complete, + .cancel = vscsi_request_cancelled, + .save_request = vscsi_save_request, + .load_request = vscsi_load_request, +}; + +static void spapr_vscsi_reset(VIOsPAPRDevice *dev) +{ + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(dev); + int i; + + memset(s->reqs, 0, sizeof(s->reqs)); + for (i = 0; i < VSCSI_REQ_LIMIT; i++) { + s->reqs[i].qtag = i; + } +} + +static void spapr_vscsi_realize(VIOsPAPRDevice *dev, Error **errp) +{ + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(dev); + + dev->crq.SendFunc = vscsi_do_crq; + + scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev), + &vscsi_scsi_info, NULL); + if (!dev->qdev.hotplugged) { + scsi_bus_legacy_handle_cmdline(&s->bus, errp); + } +} + +void spapr_vscsi_create(VIOsPAPRBus *bus) +{ + DeviceState *dev; + + dev = qdev_create(&bus->bus, "spapr-vscsi"); + + qdev_init_nofail(dev); +} + +static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) +{ + int ret; + + ret = fdt_setprop_cell(fdt, node_off, "#address-cells", 2); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop_cell(fdt, node_off, "#size-cells", 0); + if (ret < 0) { + return ret; + } + + return 0; +} + +static Property spapr_vscsi_properties[] = { + DEFINE_SPAPR_PROPERTIES(VSCSIState, vdev), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_spapr_vscsi = { + .name = "spapr_vscsi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_SPAPR_VIO(vdev, VSCSIState), + /* VSCSI state */ + /* ???? */ + + VMSTATE_END_OF_LIST() + }, +}; + +static void spapr_vscsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VIOsPAPRDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); + + k->realize = spapr_vscsi_realize; + k->reset = spapr_vscsi_reset; + k->devnode = spapr_vscsi_devnode; + k->dt_name = "v-scsi"; + k->dt_type = "vscsi"; + k->dt_compatible = "IBM,v-scsi"; + k->signal_mask = 0x00000001; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->props = spapr_vscsi_properties; + k->rtce_window_size = 0x10000000; + dc->vmsd = &vmstate_spapr_vscsi; +} + +static const TypeInfo spapr_vscsi_info = { + .name = TYPE_VIO_SPAPR_VSCSI_DEVICE, + .parent = TYPE_VIO_SPAPR_DEVICE, + .instance_size = sizeof(VSCSIState), + .class_init = spapr_vscsi_class_init, +}; + +static void spapr_vscsi_register_types(void) +{ + type_register_static(&spapr_vscsi_info); +} + +type_init(spapr_vscsi_register_types) diff --git a/qemu/hw/scsi/srp.h b/qemu/hw/scsi/srp.h new file mode 100644 index 000000000..d27f31d2d --- /dev/null +++ b/qemu/hw/scsi/srp.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef SCSI_SRP_H +#define SCSI_SRP_H + +/* + * Structures and constants for the SCSI RDMA Protocol (SRP) as + * defined by the INCITS T10 committee. This file was written using + * draft Revision 16a of the SRP standard. + */ + +enum { + + SRP_LOGIN_REQ = 0x00, + SRP_TSK_MGMT = 0x01, + SRP_CMD = 0x02, + SRP_I_LOGOUT = 0x03, + SRP_LOGIN_RSP = 0xc0, + SRP_RSP = 0xc1, + SRP_LOGIN_REJ = 0xc2, + SRP_T_LOGOUT = 0x80, + SRP_CRED_REQ = 0x81, + SRP_AER_REQ = 0x82, + SRP_CRED_RSP = 0x41, + SRP_AER_RSP = 0x42 +}; + +enum { + SRP_BUF_FORMAT_DIRECT = 1 << 1, + SRP_BUF_FORMAT_INDIRECT = 1 << 2 +}; + +enum { + SRP_NO_DATA_DESC = 0, + SRP_DATA_DESC_DIRECT = 1, + SRP_DATA_DESC_INDIRECT = 2 +}; + +enum { + SRP_TSK_ABORT_TASK = 0x01, + SRP_TSK_ABORT_TASK_SET = 0x02, + SRP_TSK_CLEAR_TASK_SET = 0x04, + SRP_TSK_LUN_RESET = 0x08, + SRP_TSK_CLEAR_ACA = 0x40 +}; + +enum srp_login_rej_reason { + SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL = 0x00010000, + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES = 0x00010001, + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002, + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL = 0x00010003, + SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004, + SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED = 0x00010005, + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED = 0x00010006 +}; + +enum { + SRP_REV10_IB_IO_CLASS = 0xff00, + SRP_REV16A_IB_IO_CLASS = 0x0100 +}; + +enum { + SRP_TSK_MGMT_COMPLETE = 0x00, + SRP_TSK_MGMT_FIELDS_INVALID = 0x02, + SRP_TSK_MGMT_NOT_SUPPORTED = 0x04, + SRP_TSK_MGMT_FAILED = 0x05 +}; + +struct srp_direct_buf { + uint64_t va; + uint32_t key; + uint32_t len; +}; + +/* + * We need the packed attribute because the SRP spec puts the list of + * descriptors at an offset of 20, which is not aligned to the size of + * struct srp_direct_buf. The whole structure must be packed to avoid + * having the 20-byte structure padded to 24 bytes on 64-bit architectures. + */ +struct srp_indirect_buf { + struct srp_direct_buf table_desc; + uint32_t len; + struct srp_direct_buf desc_list[0]; +} QEMU_PACKED; + +enum { + SRP_MULTICHAN_SINGLE = 0, + SRP_MULTICHAN_MULTI = 1 +}; + +struct srp_login_req { + uint8_t opcode; + uint8_t reserved1[7]; + uint64_t tag; + uint32_t req_it_iu_len; + uint8_t reserved2[4]; + uint16_t req_buf_fmt; + uint8_t req_flags; + uint8_t reserved3[5]; + uint8_t initiator_port_id[16]; + uint8_t target_port_id[16]; +}; + +/* + * The SRP spec defines the size of the LOGIN_RSP structure to be 52 + * bytes, so it needs to be packed to avoid having it padded to 56 + * bytes on 64-bit architectures. + */ +struct srp_login_rsp { + uint8_t opcode; + uint8_t reserved1[3]; + uint32_t req_lim_delta; + uint64_t tag; + uint32_t max_it_iu_len; + uint32_t max_ti_iu_len; + uint16_t buf_fmt; + uint8_t rsp_flags; + uint8_t reserved2[25]; +} QEMU_PACKED; + +struct srp_login_rej { + uint8_t opcode; + uint8_t reserved1[3]; + uint32_t reason; + uint64_t tag; + uint8_t reserved2[8]; + uint16_t buf_fmt; + uint8_t reserved3[6]; +}; + +struct srp_i_logout { + uint8_t opcode; + uint8_t reserved[7]; + uint64_t tag; +}; + +struct srp_t_logout { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved[2]; + uint32_t reason; + uint64_t tag; +}; + +/* + * We need the packed attribute because the SRP spec only aligns the + * 8-byte LUN field to 4 bytes. + */ +struct srp_tsk_mgmt { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved1[6]; + uint64_t tag; + uint8_t reserved2[4]; + uint64_t lun; + uint8_t reserved3[2]; + uint8_t tsk_mgmt_func; + uint8_t reserved4; + uint64_t task_tag; + uint8_t reserved5[8]; +} QEMU_PACKED; + +/* + * We need the packed attribute because the SRP spec only aligns the + * 8-byte LUN field to 4 bytes. + */ +struct srp_cmd { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved1[3]; + uint8_t buf_fmt; + uint8_t data_out_desc_cnt; + uint8_t data_in_desc_cnt; + uint64_t tag; + uint8_t reserved2[4]; + uint64_t lun; + uint8_t reserved3; + uint8_t task_attr; + uint8_t reserved4; + uint8_t add_cdb_len; + uint8_t cdb[16]; + uint8_t add_data[0]; +} QEMU_PACKED; + +enum { + SRP_RSP_FLAG_RSPVALID = 1 << 0, + SRP_RSP_FLAG_SNSVALID = 1 << 1, + SRP_RSP_FLAG_DOOVER = 1 << 2, + SRP_RSP_FLAG_DOUNDER = 1 << 3, + SRP_RSP_FLAG_DIOVER = 1 << 4, + SRP_RSP_FLAG_DIUNDER = 1 << 5 +}; + +/* + * The SRP spec defines the size of the RSP structure to be 36 bytes, + * so it needs to be packed to avoid having it padded to 40 bytes on + * 64-bit architectures. + */ +struct srp_rsp { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved1[2]; + uint32_t req_lim_delta; + uint64_t tag; + uint8_t reserved2[2]; + uint8_t flags; + uint8_t status; + uint32_t data_out_res_cnt; + uint32_t data_in_res_cnt; + uint32_t sense_data_len; + uint32_t resp_data_len; + uint8_t data[0]; +} QEMU_PACKED; + +#endif /* SCSI_SRP_H */ diff --git a/qemu/hw/scsi/vhost-scsi.c b/qemu/hw/scsi/vhost-scsi.c new file mode 100644 index 000000000..a69918bef --- /dev/null +++ b/qemu/hw/scsi/vhost-scsi.c @@ -0,0 +1,351 @@ +/* + * vhost_scsi host device + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * + * Changes for QEMU mainline + tcm_vhost kernel upstream: + * Nicholas Bellinger <nab@risingtidesystems.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <sys/ioctl.h> +#include "config.h" +#include "qemu/error-report.h" +#include "qemu/queue.h" +#include "monitor/monitor.h" +#include "migration/migration.h" +#include "hw/virtio/vhost-scsi.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/fw-path-provider.h" + +/* Features supported by host kernel. */ +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_SCSI_F_HOTPLUG, + VHOST_INVALID_FEATURE_BIT +}; + +static int vhost_scsi_set_endpoint(VHostSCSI *s) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + const VhostOps *vhost_ops = s->dev.vhost_ops; + struct vhost_scsi_target backend; + int ret; + + memset(&backend, 0, sizeof(backend)); + pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); + ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend); + if (ret < 0) { + return -errno; + } + return 0; +} + +static void vhost_scsi_clear_endpoint(VHostSCSI *s) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + struct vhost_scsi_target backend; + const VhostOps *vhost_ops = s->dev.vhost_ops; + + memset(&backend, 0, sizeof(backend)); + pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); + vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend); +} + +static int vhost_scsi_start(VHostSCSI *s) +{ + int ret, abi_version, i; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + const VhostOps *vhost_ops = s->dev.vhost_ops; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return -ENOSYS; + } + + ret = vhost_ops->vhost_call(&s->dev, + VHOST_SCSI_GET_ABI_VERSION, &abi_version); + if (ret < 0) { + return -errno; + } + if (abi_version > VHOST_SCSI_ABI_VERSION) { + error_report("vhost-scsi: The running tcm_vhost kernel abi_version:" + " %d is greater than vhost_scsi userspace supports: %d, please" + " upgrade your version of QEMU", abi_version, + VHOST_SCSI_ABI_VERSION); + return -ENOSYS; + } + + ret = vhost_dev_enable_notifiers(&s->dev, vdev); + if (ret < 0) { + return ret; + } + + s->dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&s->dev, vdev); + if (ret < 0) { + error_report("Error start vhost dev"); + goto err_notifiers; + } + + ret = vhost_scsi_set_endpoint(s); + if (ret < 0) { + error_report("Error set vhost-scsi endpoint"); + goto err_vhost_stop; + } + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier"); + goto err_endpoint; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < s->dev.nvqs; i++) { + vhost_virtqueue_mask(&s->dev, vdev, i, false); + } + + return ret; + +err_endpoint: + vhost_scsi_clear_endpoint(s); +err_vhost_stop: + vhost_dev_stop(&s->dev, vdev); +err_notifiers: + vhost_dev_disable_notifiers(&s->dev, vdev); + return ret; +} + +static void vhost_scsi_stop(VHostSCSI *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret = 0; + + if (k->set_guest_notifiers) { + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + } + } + assert(ret >= 0); + + vhost_scsi_clear_endpoint(s); + vhost_dev_stop(&s->dev, vdev); + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static uint64_t vhost_scsi_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostSCSI *s = VHOST_SCSI(vdev); + + return vhost_get_features(&s->dev, kernel_feature_bits, features); +} + +static void vhost_scsi_set_config(VirtIODevice *vdev, + const uint8_t *config) +{ + VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + if ((uint32_t) virtio_ldl_p(vdev, &scsiconf->sense_size) != vs->sense_size || + (uint32_t) virtio_ldl_p(vdev, &scsiconf->cdb_size) != vs->cdb_size) { + error_report("vhost-scsi does not support changing the sense data and CDB sizes"); + exit(1); + } +} + +static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) +{ + VHostSCSI *s = (VHostSCSI *)vdev; + bool start = (val & VIRTIO_CONFIG_S_DRIVER_OK); + + if (s->dev.started == start) { + return; + } + + if (start) { + int ret; + + ret = vhost_scsi_start(s); + if (ret < 0) { + error_report("virtio-scsi: unable to start vhost: %s", + strerror(-ret)); + + /* There is no userspace virtio-scsi fallback so exit */ + exit(1); + } + } else { + vhost_scsi_stop(s); + } +} + +static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +} + +static void vhost_scsi_realize(DeviceState *dev, Error **errp) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + VHostSCSI *s = VHOST_SCSI(dev); + Error *err = NULL; + int vhostfd = -1; + int ret; + + if (!vs->conf.wwpn) { + error_setg(errp, "vhost-scsi: missing wwpn"); + return; + } + + if (vs->conf.vhostfd) { + vhostfd = monitor_fd_param(cur_mon, vs->conf.vhostfd, &err); + if (vhostfd == -1) { + error_setg(errp, "vhost-scsi: unable to parse vhostfd: %s", + error_get_pretty(err)); + error_free(err); + return; + } + } else { + vhostfd = open("/dev/vhost-scsi", O_RDWR); + if (vhostfd < 0) { + error_setg(errp, "vhost-scsi: open vhost char device failed: %s", + strerror(errno)); + return; + } + } + + virtio_scsi_common_realize(dev, &err, vhost_dummy_handle_output, + vhost_dummy_handle_output, + vhost_dummy_handle_output); + if (err != NULL) { + error_propagate(errp, err); + close(vhostfd); + return; + } + + s->dev.nvqs = VHOST_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; + s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); + s->dev.vq_index = 0; + s->dev.backend_features = 0; + + ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd, + VHOST_BACKEND_TYPE_KERNEL); + if (ret < 0) { + error_setg(errp, "vhost-scsi: vhost initialization failed: %s", + strerror(-ret)); + return; + } + + /* At present, channel and lun both are 0 for bootable vhost-scsi disk */ + s->channel = 0; + s->lun = 0; + /* Note: we can also get the minimum tpgt from kernel */ + s->target = vs->conf.boot_tpgt; + + error_setg(&s->migration_blocker, + "vhost-scsi does not support migration"); + migrate_add_blocker(s->migration_blocker); +} + +static void vhost_scsi_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostSCSI *s = VHOST_SCSI(dev); + + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); + + /* This will stop vhost backend. */ + vhost_scsi_set_status(vdev, 0); + + g_free(s->dev.vqs); + + virtio_scsi_common_unrealize(dev, errp); +} + +/* + * Implementation of an interface to adjust firmware path + * for the bootindex property handling. + */ +static char *vhost_scsi_get_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev) +{ + VHostSCSI *s = VHOST_SCSI(dev); + /* format: channel@channel/vhost-scsi@target,lun */ + return g_strdup_printf("channel@%x/%s@%x,%x", s->channel, + qdev_fw_name(dev), s->target, s->lun); +} + +static Property vhost_scsi_properties[] = { + DEFINE_PROP_STRING("vhostfd", VHostSCSI, parent_obj.conf.vhostfd), + DEFINE_PROP_STRING("wwpn", VHostSCSI, parent_obj.conf.wwpn), + DEFINE_PROP_UINT32("boot_tpgt", VHostSCSI, parent_obj.conf.boot_tpgt, 0), + DEFINE_PROP_UINT32("num_queues", VHostSCSI, parent_obj.conf.num_queues, 1), + DEFINE_PROP_UINT32("max_sectors", VHostSCSI, parent_obj.conf.max_sectors, + 0xFFFF), + DEFINE_PROP_UINT32("cmd_per_lun", VHostSCSI, parent_obj.conf.cmd_per_lun, + 128), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_scsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass); + + dc->props = vhost_scsi_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = vhost_scsi_realize; + vdc->unrealize = vhost_scsi_unrealize; + vdc->get_features = vhost_scsi_get_features; + vdc->set_config = vhost_scsi_set_config; + vdc->set_status = vhost_scsi_set_status; + fwc->get_dev_path = vhost_scsi_get_fw_dev_path; +} + +static void vhost_scsi_instance_init(Object *obj) +{ + VHostSCSI *dev = VHOST_SCSI(obj); + + device_add_bootindex_property(obj, &dev->bootindex, "bootindex", NULL, + DEVICE(dev), NULL); +} + +static const TypeInfo vhost_scsi_info = { + .name = TYPE_VHOST_SCSI, + .parent = TYPE_VIRTIO_SCSI_COMMON, + .instance_size = sizeof(VHostSCSI), + .class_init = vhost_scsi_class_init, + .instance_init = vhost_scsi_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, +}; + +static void virtio_register_types(void) +{ + type_register_static(&vhost_scsi_info); +} + +type_init(virtio_register_types) diff --git a/qemu/hw/scsi/viosrp.h b/qemu/hw/scsi/viosrp.h new file mode 100644 index 000000000..d8e365db1 --- /dev/null +++ b/qemu/hw/scsi/viosrp.h @@ -0,0 +1,216 @@ +/*****************************************************************************/ +/* srp.h -- SCSI RDMA Protocol definitions */ +/* */ +/* Written By: Colin Devilbis, IBM Corporation */ +/* */ +/* Copyright (C) 2003 IBM Corporation */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* */ +/* */ +/* This file contains structures and definitions for IBM RPA (RS/6000 */ +/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */ +/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */ +/* commands between logical partitions. */ +/* */ +/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */ +/* between partitions. The definitions in this file are architected, */ +/* and cannot be changed without breaking compatibility with other versions */ +/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/ +/* between logical partitions */ +/*****************************************************************************/ +#ifndef PPC_VIOSRP_H +#define PPC_VIOSRP_H + +#define SRP_VERSION "16.a" +#define SRP_MAX_IU_LEN 256 +#define SRP_MAX_LOC_LEN 32 + +union srp_iu { + struct srp_login_req login_req; + struct srp_login_rsp login_rsp; + struct srp_login_rej login_rej; + struct srp_i_logout i_logout; + struct srp_t_logout t_logout; + struct srp_tsk_mgmt tsk_mgmt; + struct srp_cmd cmd; + struct srp_rsp rsp; + uint8_t reserved[SRP_MAX_IU_LEN]; +}; + +enum viosrp_crq_formats { + VIOSRP_SRP_FORMAT = 0x01, + VIOSRP_MAD_FORMAT = 0x02, + VIOSRP_OS400_FORMAT = 0x03, + VIOSRP_AIX_FORMAT = 0x04, + VIOSRP_LINUX_FORMAT = 0x06, + VIOSRP_INLINE_FORMAT = 0x07 +}; + +enum viosrp_crq_status { + VIOSRP_OK = 0x0, + VIOSRP_NONRECOVERABLE_ERR = 0x1, + VIOSRP_VIOLATES_MAX_XFER = 0x2, + VIOSRP_PARTNER_PANIC = 0x3, + VIOSRP_DEVICE_BUSY = 0x8, + VIOSRP_ADAPTER_FAIL = 0x10, + VIOSRP_OK2 = 0x99, +}; + +struct viosrp_crq { + uint8_t valid; /* used by RPA */ + uint8_t format; /* SCSI vs out-of-band */ + uint8_t reserved; + uint8_t status; /* non-scsi failure? (e.g. DMA failure) */ + uint16_t timeout; /* in seconds */ + uint16_t IU_length; /* in bytes */ + uint64_t IU_data_ptr; /* the TCE for transferring data */ +}; + +/* MADs are Management requests above and beyond the IUs defined in the SRP + * standard. + */ +enum viosrp_mad_types { + VIOSRP_EMPTY_IU_TYPE = 0x01, + VIOSRP_ERROR_LOG_TYPE = 0x02, + VIOSRP_ADAPTER_INFO_TYPE = 0x03, + VIOSRP_HOST_CONFIG_TYPE = 0x04, + VIOSRP_CAPABILITIES_TYPE = 0x05, + VIOSRP_ENABLE_FAST_FAIL = 0x08, +}; + +enum viosrp_mad_status { + VIOSRP_MAD_SUCCESS = 0x00, + VIOSRP_MAD_NOT_SUPPORTED = 0xF1, + VIOSRP_MAD_FAILED = 0xF7, +}; + +enum viosrp_capability_type { + MIGRATION_CAPABILITIES = 0x01, + RESERVATION_CAPABILITIES = 0x02, +}; + +enum viosrp_capability_support { + SERVER_DOES_NOT_SUPPORTS_CAP = 0x0, + SERVER_SUPPORTS_CAP = 0x01, + SERVER_CAP_DATA = 0x02, +}; + +enum viosrp_reserve_type { + CLIENT_RESERVE_SCSI_2 = 0x01, +}; + +enum viosrp_capability_flag { + CLIENT_MIGRATED = 0x01, + CLIENT_RECONNECT = 0x02, + CAP_LIST_SUPPORTED = 0x04, + CAP_LIST_DATA = 0x08, +}; + +/* + * Common MAD header + */ +struct mad_common { + uint32_t type; + uint16_t status; + uint16_t length; + uint64_t tag; +}; + +/* + * All SRP (and MAD) requests normally flow from the + * client to the server. There is no way for the server to send + * an asynchronous message back to the client. The Empty IU is used + * to hang out a meaningless request to the server so that it can respond + * asynchrouously with something like a SCSI AER + */ +struct viosrp_empty_iu { + struct mad_common common; + uint64_t buffer; + uint32_t port; +}; + +struct viosrp_error_log { + struct mad_common common; + uint64_t buffer; +}; + +struct viosrp_adapter_info { + struct mad_common common; + uint64_t buffer; +}; + +struct viosrp_host_config { + struct mad_common common; + uint64_t buffer; +}; + +struct viosrp_fast_fail { + struct mad_common common; +}; + +struct viosrp_capabilities { + struct mad_common common; + uint64_t buffer; +}; + +struct mad_capability_common { + uint32_t cap_type; + uint16_t length; + uint16_t server_support; +}; + +struct mad_reserve_cap { + struct mad_capability_common common; + uint32_t type; +}; + +struct mad_migration_cap { + struct mad_capability_common common; + uint32_t ecl; +}; + +struct capabilities { + uint32_t flags; + char name[SRP_MAX_LOC_LEN]; + char loc[SRP_MAX_LOC_LEN]; + struct mad_migration_cap migration; + struct mad_reserve_cap reserve; +}; + +union mad_iu { + struct viosrp_empty_iu empty_iu; + struct viosrp_error_log error_log; + struct viosrp_adapter_info adapter_info; + struct viosrp_host_config host_config; + struct viosrp_fast_fail fast_fail; + struct viosrp_capabilities capabilities; +}; + +union viosrp_iu { + union srp_iu srp; + union mad_iu mad; +}; + +struct mad_adapter_info_data { + char srp_version[8]; + char partition_name[96]; + uint32_t partition_number; + uint32_t mad_version; + uint32_t os_type; + uint32_t port_max_txu[8]; /* per-port maximum transfer */ +}; + +#endif diff --git a/qemu/hw/scsi/virtio-scsi-dataplane.c b/qemu/hw/scsi/virtio-scsi-dataplane.c new file mode 100644 index 000000000..5575648a9 --- /dev/null +++ b/qemu/hw/scsi/virtio-scsi-dataplane.c @@ -0,0 +1,316 @@ +/* + * Virtio SCSI dataplane + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Fam Zheng <famz@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/virtio/virtio-scsi.h" +#include "qemu/error-report.h" +#include "sysemu/block-backend.h" +#include <hw/scsi/scsi.h> +#include <block/scsi.h> +#include <hw/virtio/virtio-bus.h> +#include "hw/virtio/virtio-access.h" +#include "stdio.h" + +/* Context: QEMU global mutex held */ +void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + + assert(!s->ctx); + s->ctx = iothread_get_aio_context(vs->conf.iothread); + + /* Don't try if transport does not support notifiers. */ + if (!k->set_guest_notifiers || !k->set_host_notifier) { + fprintf(stderr, "virtio-scsi: Failed to set iothread " + "(transport does not support notifiers)"); + exit(1); + } +} + +static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s, + VirtQueue *vq, + EventNotifierHandler *handler, + int n) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOSCSIVring *r; + int rc; + + /* Set up virtqueue notify */ + rc = k->set_host_notifier(qbus->parent, n, true); + if (rc != 0) { + fprintf(stderr, "virtio-scsi: Failed to set host notifier (%d)\n", + rc); + s->dataplane_fenced = true; + return NULL; + } + + r = g_slice_new(VirtIOSCSIVring); + r->host_notifier = *virtio_queue_get_host_notifier(vq); + r->guest_notifier = *virtio_queue_get_guest_notifier(vq); + aio_set_event_notifier(s->ctx, &r->host_notifier, handler); + + r->parent = s; + + if (!vring_setup(&r->vring, VIRTIO_DEVICE(s), n)) { + fprintf(stderr, "virtio-scsi: VRing setup failed\n"); + goto fail_vring; + } + return r; + +fail_vring: + aio_set_event_notifier(s->ctx, &r->host_notifier, NULL); + k->set_host_notifier(qbus->parent, n, false); + g_slice_free(VirtIOSCSIVring, r); + return NULL; +} + +VirtIOSCSIReq *virtio_scsi_pop_req_vring(VirtIOSCSI *s, + VirtIOSCSIVring *vring) +{ + VirtIOSCSIReq *req = virtio_scsi_init_req(s, NULL); + int r; + + req->vring = vring; + r = vring_pop((VirtIODevice *)s, &vring->vring, &req->elem); + if (r < 0) { + virtio_scsi_free_req(req); + req = NULL; + } + return req; +} + +void virtio_scsi_vring_push_notify(VirtIOSCSIReq *req) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(req->vring->parent); + + vring_push(vdev, &req->vring->vring, &req->elem, + req->qsgl.size + req->resp_iov.size); + + if (vring_should_notify(vdev, &req->vring->vring)) { + event_notifier_set(&req->vring->guest_notifier); + } +} + +static void virtio_scsi_iothread_handle_ctrl(EventNotifier *notifier) +{ + VirtIOSCSIVring *vring = container_of(notifier, + VirtIOSCSIVring, host_notifier); + VirtIOSCSI *s = VIRTIO_SCSI(vring->parent); + VirtIOSCSIReq *req; + + event_notifier_test_and_clear(notifier); + while ((req = virtio_scsi_pop_req_vring(s, vring))) { + virtio_scsi_handle_ctrl_req(s, req); + } +} + +static void virtio_scsi_iothread_handle_event(EventNotifier *notifier) +{ + VirtIOSCSIVring *vring = container_of(notifier, + VirtIOSCSIVring, host_notifier); + VirtIOSCSI *s = vring->parent; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + event_notifier_test_and_clear(notifier); + + if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + if (s->events_dropped) { + virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); + } +} + +static void virtio_scsi_iothread_handle_cmd(EventNotifier *notifier) +{ + VirtIOSCSIVring *vring = container_of(notifier, + VirtIOSCSIVring, host_notifier); + VirtIOSCSI *s = (VirtIOSCSI *)vring->parent; + VirtIOSCSIReq *req, *next; + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + + event_notifier_test_and_clear(notifier); + while ((req = virtio_scsi_pop_req_vring(s, vring))) { + if (virtio_scsi_handle_cmd_req_prepare(s, req)) { + QTAILQ_INSERT_TAIL(&reqs, req, next); + } + } + + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { + virtio_scsi_handle_cmd_req_submit(s, req); + } +} + +/* assumes s->ctx held */ +static void virtio_scsi_clear_aio(VirtIOSCSI *s) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + int i; + + if (s->ctrl_vring) { + aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL); + } + if (s->event_vring) { + aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL); + } + if (s->cmd_vrings) { + for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) { + aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL); + } + } +} + +static void virtio_scsi_vring_teardown(VirtIOSCSI *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + int i; + + if (s->ctrl_vring) { + vring_teardown(&s->ctrl_vring->vring, vdev, 0); + g_slice_free(VirtIOSCSIVring, s->ctrl_vring); + s->ctrl_vring = NULL; + } + if (s->event_vring) { + vring_teardown(&s->event_vring->vring, vdev, 1); + g_slice_free(VirtIOSCSIVring, s->event_vring); + s->event_vring = NULL; + } + if (s->cmd_vrings) { + for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) { + vring_teardown(&s->cmd_vrings[i]->vring, vdev, 2 + i); + g_slice_free(VirtIOSCSIVring, s->cmd_vrings[i]); + s->cmd_vrings[i] = NULL; + } + free(s->cmd_vrings); + s->cmd_vrings = NULL; + } +} + +/* Context: QEMU global mutex held */ +void virtio_scsi_dataplane_start(VirtIOSCSI *s) +{ + int i; + int rc; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + + if (s->dataplane_started || + s->dataplane_starting || + s->dataplane_fenced || + s->ctx != iothread_get_aio_context(vs->conf.iothread)) { + return; + } + + s->dataplane_starting = true; + + /* Set up guest notifier (irq) */ + rc = k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, true); + if (rc != 0) { + fprintf(stderr, "virtio-scsi: Failed to set guest notifiers (%d), " + "ensure -enable-kvm is set\n", rc); + s->dataplane_fenced = true; + goto fail_guest_notifiers; + } + + aio_context_acquire(s->ctx); + s->ctrl_vring = virtio_scsi_vring_init(s, vs->ctrl_vq, + virtio_scsi_iothread_handle_ctrl, + 0); + if (!s->ctrl_vring) { + goto fail_vrings; + } + s->event_vring = virtio_scsi_vring_init(s, vs->event_vq, + virtio_scsi_iothread_handle_event, + 1); + if (!s->event_vring) { + goto fail_vrings; + } + s->cmd_vrings = g_new(VirtIOSCSIVring *, vs->conf.num_queues); + for (i = 0; i < vs->conf.num_queues; i++) { + s->cmd_vrings[i] = + virtio_scsi_vring_init(s, vs->cmd_vqs[i], + virtio_scsi_iothread_handle_cmd, + i + 2); + if (!s->cmd_vrings[i]) { + goto fail_vrings; + } + } + + s->dataplane_starting = false; + s->dataplane_started = true; + aio_context_release(s->ctx); + return; + +fail_vrings: + virtio_scsi_clear_aio(s); + aio_context_release(s->ctx); + virtio_scsi_vring_teardown(s); + for (i = 0; i < vs->conf.num_queues + 2; i++) { + k->set_host_notifier(qbus->parent, i, false); + } + k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); +fail_guest_notifiers: + s->dataplane_starting = false; +} + +/* Context: QEMU global mutex held */ +void virtio_scsi_dataplane_stop(VirtIOSCSI *s) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + int i; + + /* Better luck next time. */ + if (s->dataplane_fenced) { + s->dataplane_fenced = false; + return; + } + if (!s->dataplane_started || s->dataplane_stopping) { + return; + } + s->dataplane_stopping = true; + assert(s->ctx == iothread_get_aio_context(vs->conf.iothread)); + + aio_context_acquire(s->ctx); + + aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL); + aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL); + for (i = 0; i < vs->conf.num_queues; i++) { + aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL); + } + + blk_drain_all(); /* ensure there are no in-flight requests */ + + aio_context_release(s->ctx); + + /* Sync vring state back to virtqueue so that non-dataplane request + * processing can continue when we disable the host notifier below. + */ + virtio_scsi_vring_teardown(s); + + for (i = 0; i < vs->conf.num_queues + 2; i++) { + k->set_host_notifier(qbus->parent, i, false); + } + + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); + s->dataplane_stopping = false; + s->dataplane_started = false; +} diff --git a/qemu/hw/scsi/virtio-scsi.c b/qemu/hw/scsi/virtio-scsi.c new file mode 100644 index 000000000..811c3da8b --- /dev/null +++ b/qemu/hw/scsi/virtio-scsi.c @@ -0,0 +1,1016 @@ +/* + * Virtio SCSI HBA + * + * Copyright IBM, Corp. 2010 + * Copyright Red Hat, Inc. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "standard-headers/linux/virtio_ids.h" +#include "hw/virtio/virtio-scsi.h" +#include "qemu/error-report.h" +#include "qemu/iov.h" +#include "sysemu/block-backend.h" +#include <hw/scsi/scsi.h> +#include <block/scsi.h> +#include <hw/virtio/virtio-bus.h> +#include "hw/virtio/virtio-access.h" +#include "migration/migration.h" + +static inline int virtio_scsi_get_lun(uint8_t *lun) +{ + return ((lun[2] << 8) | lun[3]) & 0x3FFF; +} + +static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) +{ + if (lun[0] != 1) { + return NULL; + } + if (lun[2] != 0 && !(lun[2] >= 0x40 && lun[2] < 0x80)) { + return NULL; + } + return scsi_device_find(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); +} + +VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) +{ + VirtIOSCSIReq *req; + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s; + const size_t zero_skip = offsetof(VirtIOSCSIReq, elem) + + sizeof(VirtQueueElement); + + req = g_slice_alloc(sizeof(*req) + vs->cdb_size); + req->vq = vq; + req->dev = s; + qemu_sglist_init(&req->qsgl, DEVICE(s), 8, &address_space_memory); + qemu_iovec_init(&req->resp_iov, 1); + memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); + return req; +} + +void virtio_scsi_free_req(VirtIOSCSIReq *req) +{ + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)req->dev; + + qemu_iovec_destroy(&req->resp_iov); + qemu_sglist_destroy(&req->qsgl); + g_slice_free1(sizeof(*req) + vs->cdb_size, req); +} + +static void virtio_scsi_complete_req(VirtIOSCSIReq *req) +{ + VirtIOSCSI *s = req->dev; + VirtQueue *vq = req->vq; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size); + if (req->vring) { + assert(req->vq == NULL); + virtio_scsi_vring_push_notify(req); + } else { + virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size); + virtio_notify(vdev, vq); + } + + if (req->sreq) { + req->sreq->hba_private = NULL; + scsi_req_unref(req->sreq); + } + virtio_scsi_free_req(req); +} + +static void virtio_scsi_bad_req(void) +{ + error_report("wrong size for virtio-scsi headers"); + exit(1); +} + +static size_t qemu_sgl_concat(VirtIOSCSIReq *req, struct iovec *iov, + hwaddr *addr, int num, size_t skip) +{ + QEMUSGList *qsgl = &req->qsgl; + size_t copied = 0; + + while (num) { + if (skip >= iov->iov_len) { + skip -= iov->iov_len; + } else { + qemu_sglist_add(qsgl, *addr + skip, iov->iov_len - skip); + copied += iov->iov_len - skip; + skip = 0; + } + iov++; + addr++; + num--; + } + + assert(skip == 0); + return copied; +} + +static int virtio_scsi_parse_req(VirtIOSCSIReq *req, + unsigned req_size, unsigned resp_size) +{ + VirtIODevice *vdev = (VirtIODevice *) req->dev; + size_t in_size, out_size; + + if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0, + &req->req, req_size) < req_size) { + return -EINVAL; + } + + if (qemu_iovec_concat_iov(&req->resp_iov, + req->elem.in_sg, req->elem.in_num, 0, + resp_size) < resp_size) { + return -EINVAL; + } + + req->resp_size = resp_size; + + /* Old BIOSes left some padding by mistake after the req_size/resp_size. + * As a workaround, always consider the first buffer as the virtio-scsi + * request/response, making the payload start at the second element + * of the iovec. + * + * The actual length of the response header, stored in req->resp_size, + * does not change. + * + * TODO: always disable this workaround for virtio 1.0 devices. + */ + if (!virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT)) { + if (req->elem.out_num) { + req_size = req->elem.out_sg[0].iov_len; + } + if (req->elem.in_num) { + resp_size = req->elem.in_sg[0].iov_len; + } + } + + out_size = qemu_sgl_concat(req, req->elem.out_sg, + &req->elem.out_addr[0], req->elem.out_num, + req_size); + in_size = qemu_sgl_concat(req, req->elem.in_sg, + &req->elem.in_addr[0], req->elem.in_num, + resp_size); + + if (out_size && in_size) { + return -ENOTSUP; + } + + if (out_size) { + req->mode = SCSI_XFER_TO_DEV; + } else if (in_size) { + req->mode = SCSI_XFER_FROM_DEV; + } + + return 0; +} + +static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq) +{ + VirtIOSCSIReq *req = virtio_scsi_init_req(s, vq); + if (!virtqueue_pop(vq, &req->elem)) { + virtio_scsi_free_req(req); + return NULL; + } + return req; +} + +static void virtio_scsi_save_request(QEMUFile *f, SCSIRequest *sreq) +{ + VirtIOSCSIReq *req = sreq->hba_private; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(req->dev); + uint32_t n = virtio_queue_get_id(req->vq) - 2; + + assert(n < vs->conf.num_queues); + qemu_put_be32s(f, &n); + qemu_put_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem)); +} + +static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) +{ + SCSIBus *bus = sreq->bus; + VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSIReq *req; + uint32_t n; + + qemu_get_be32s(f, &n); + assert(n < vs->conf.num_queues); + req = virtio_scsi_init_req(s, vs->cmd_vqs[n]); + qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem)); + /* TODO: add a way for SCSIBusInfo's load_request to fail, + * and fail migration instead of asserting here. + * When we do, we might be able to re-enable NDEBUG below. + */ +#ifdef NDEBUG +#error building with NDEBUG is not supported +#endif + assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg)); + assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg)); + + if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, + sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) { + error_report("invalid SCSI request migration data"); + exit(1); + } + + scsi_req_ref(sreq); + req->sreq = sreq; + if (req->sreq->cmd.mode != SCSI_XFER_NONE) { + assert(req->sreq->cmd.mode == req->mode); + } + return req; +} + +typedef struct { + Notifier notifier; + VirtIOSCSIReq *tmf_req; +} VirtIOSCSICancelNotifier; + +static void virtio_scsi_cancel_notify(Notifier *notifier, void *data) +{ + VirtIOSCSICancelNotifier *n = container_of(notifier, + VirtIOSCSICancelNotifier, + notifier); + + if (--n->tmf_req->remaining == 0) { + virtio_scsi_complete_req(n->tmf_req); + } + g_slice_free(VirtIOSCSICancelNotifier, n); +} + +/* Return 0 if the request is ready to be completed and return to guest; + * -EINPROGRESS if the request is submitted and will be completed later, in the + * case of async cancellation. */ +static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) +{ + SCSIDevice *d = virtio_scsi_device_find(s, req->req.tmf.lun); + SCSIRequest *r, *next; + BusChild *kid; + int target; + int ret = 0; + + if (s->dataplane_started) { + assert(blk_get_aio_context(d->conf.blk) == s->ctx); + } + /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */ + req->resp.tmf.response = VIRTIO_SCSI_S_OK; + + virtio_tswap32s(VIRTIO_DEVICE(s), &req->req.tmf.subtype); + switch (req->req.tmf.subtype) { + case VIRTIO_SCSI_T_TMF_ABORT_TASK: + case VIRTIO_SCSI_T_TMF_QUERY_TASK: + if (!d) { + goto fail; + } + if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { + goto incorrect_lun; + } + QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) { + VirtIOSCSIReq *cmd_req = r->hba_private; + if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) { + break; + } + } + if (r) { + /* + * Assert that the request has not been completed yet, we + * check for it in the loop above. + */ + assert(r->hba_private); + if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) { + /* "If the specified command is present in the task set, then + * return a service response set to FUNCTION SUCCEEDED". + */ + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; + } else { + VirtIOSCSICancelNotifier *notifier; + + req->remaining = 1; + notifier = g_slice_new(VirtIOSCSICancelNotifier); + notifier->tmf_req = req; + notifier->notifier.notify = virtio_scsi_cancel_notify; + scsi_req_cancel_async(r, ¬ifier->notifier); + ret = -EINPROGRESS; + } + } + break; + + case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: + if (!d) { + goto fail; + } + if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { + goto incorrect_lun; + } + s->resetting++; + qdev_reset_all(&d->qdev); + s->resetting--; + break; + + case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: + case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: + case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET: + if (!d) { + goto fail; + } + if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { + goto incorrect_lun; + } + + /* Add 1 to "remaining" until virtio_scsi_do_tmf returns. + * This way, if the bus starts calling back to the notifiers + * even before we finish the loop, virtio_scsi_cancel_notify + * will not complete the TMF too early. + */ + req->remaining = 1; + QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) { + if (r->hba_private) { + if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) { + /* "If there is any command present in the task set, then + * return a service response set to FUNCTION SUCCEEDED". + */ + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; + break; + } else { + VirtIOSCSICancelNotifier *notifier; + + req->remaining++; + notifier = g_slice_new(VirtIOSCSICancelNotifier); + notifier->notifier.notify = virtio_scsi_cancel_notify; + notifier->tmf_req = req; + scsi_req_cancel_async(r, ¬ifier->notifier); + } + } + } + if (--req->remaining > 0) { + ret = -EINPROGRESS; + } + break; + + case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: + target = req->req.tmf.lun[1]; + s->resetting++; + QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + d = DO_UPCAST(SCSIDevice, qdev, kid->child); + if (d->channel == 0 && d->id == target) { + qdev_reset_all(&d->qdev); + } + } + s->resetting--; + break; + + case VIRTIO_SCSI_T_TMF_CLEAR_ACA: + default: + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; + break; + } + + return ret; + +incorrect_lun: + req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; + return ret; + +fail: + req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; + return ret; +} + +void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) +{ + VirtIODevice *vdev = (VirtIODevice *)s; + uint32_t type; + int r = 0; + + if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0, + &type, sizeof(type)) < sizeof(type)) { + virtio_scsi_bad_req(); + return; + } + + virtio_tswap32s(vdev, &type); + if (type == VIRTIO_SCSI_T_TMF) { + if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq), + sizeof(VirtIOSCSICtrlTMFResp)) < 0) { + virtio_scsi_bad_req(); + } else { + r = virtio_scsi_do_tmf(s, req); + } + + } else if (type == VIRTIO_SCSI_T_AN_QUERY || + type == VIRTIO_SCSI_T_AN_SUBSCRIBE) { + if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq), + sizeof(VirtIOSCSICtrlANResp)) < 0) { + virtio_scsi_bad_req(); + } else { + req->resp.an.event_actual = 0; + req->resp.an.response = VIRTIO_SCSI_S_OK; + } + } + if (r == 0) { + virtio_scsi_complete_req(req); + } else { + assert(r == -EINPROGRESS); + } +} + +static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + VirtIOSCSIReq *req; + + if (s->ctx && !s->dataplane_disabled) { + virtio_scsi_dataplane_start(s); + return; + } + while ((req = virtio_scsi_pop_req(s, vq))) { + virtio_scsi_handle_ctrl_req(s, req); + } +} + +static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) +{ + /* Sense data is not in req->resp and is copied separately + * in virtio_scsi_command_complete. + */ + req->resp_size = sizeof(VirtIOSCSICmdResp); + virtio_scsi_complete_req(req); +} + +static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status, + size_t resid) +{ + VirtIOSCSIReq *req = r->hba_private; + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + uint32_t sense_len; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + + if (r->io_canceled) { + return; + } + + req->resp.cmd.response = VIRTIO_SCSI_S_OK; + req->resp.cmd.status = status; + if (req->resp.cmd.status == GOOD) { + req->resp.cmd.resid = virtio_tswap32(vdev, resid); + } else { + req->resp.cmd.resid = 0; + sense_len = scsi_req_get_sense(r, sense, sizeof(sense)); + sense_len = MIN(sense_len, req->resp_iov.size - sizeof(req->resp.cmd)); + qemu_iovec_from_buf(&req->resp_iov, sizeof(req->resp.cmd), + sense, sense_len); + req->resp.cmd.sense_len = virtio_tswap32(vdev, sense_len); + } + virtio_scsi_complete_cmd_req(req); +} + +static int virtio_scsi_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, + uint8_t *buf, void *hba_private) +{ + VirtIOSCSIReq *req = hba_private; + + if (cmd->len == 0) { + cmd->len = MIN(VIRTIO_SCSI_CDB_DEFAULT_SIZE, SCSI_CMD_BUF_SIZE); + memcpy(cmd->buf, buf, cmd->len); + } + + /* Extract the direction and mode directly from the request, for + * host device passthrough. + */ + cmd->xfer = req->qsgl.size; + cmd->mode = req->mode; + return 0; +} + +static QEMUSGList *virtio_scsi_get_sg_list(SCSIRequest *r) +{ + VirtIOSCSIReq *req = r->hba_private; + + return &req->qsgl; +} + +static void virtio_scsi_request_cancelled(SCSIRequest *r) +{ + VirtIOSCSIReq *req = r->hba_private; + + if (!req) { + return; + } + if (req->dev->resetting) { + req->resp.cmd.response = VIRTIO_SCSI_S_RESET; + } else { + req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; + } + virtio_scsi_complete_cmd_req(req); +} + +static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req) +{ + req->resp.cmd.response = VIRTIO_SCSI_S_FAILURE; + virtio_scsi_complete_cmd_req(req); +} + +bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) +{ + VirtIOSCSICommon *vs = &s->parent_obj; + SCSIDevice *d; + int rc; + + rc = virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, + sizeof(VirtIOSCSICmdResp) + vs->sense_size); + if (rc < 0) { + if (rc == -ENOTSUP) { + virtio_scsi_fail_cmd_req(req); + } else { + virtio_scsi_bad_req(); + } + return false; + } + + d = virtio_scsi_device_find(s, req->req.cmd.lun); + if (!d) { + req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + virtio_scsi_complete_cmd_req(req); + return false; + } + if (s->dataplane_started) { + assert(blk_get_aio_context(d->conf.blk) == s->ctx); + } + req->sreq = scsi_req_new(d, req->req.cmd.tag, + virtio_scsi_get_lun(req->req.cmd.lun), + req->req.cmd.cdb, req); + + if (req->sreq->cmd.mode != SCSI_XFER_NONE + && (req->sreq->cmd.mode != req->mode || + req->sreq->cmd.xfer > req->qsgl.size)) { + req->resp.cmd.response = VIRTIO_SCSI_S_OVERRUN; + virtio_scsi_complete_cmd_req(req); + return false; + } + scsi_req_ref(req->sreq); + blk_io_plug(d->conf.blk); + return true; +} + +void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) +{ + SCSIRequest *sreq = req->sreq; + if (scsi_req_enqueue(sreq)) { + scsi_req_continue(sreq); + } + blk_io_unplug(sreq->dev->conf.blk); + scsi_req_unref(sreq); +} + +static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) +{ + /* use non-QOM casts in the data path */ + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + VirtIOSCSIReq *req, *next; + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + + if (s->ctx && !s->dataplane_disabled) { + virtio_scsi_dataplane_start(s); + return; + } + while ((req = virtio_scsi_pop_req(s, vq))) { + if (virtio_scsi_handle_cmd_req_prepare(s, req)) { + QTAILQ_INSERT_TAIL(&reqs, req, next); + } + } + + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { + virtio_scsi_handle_cmd_req_submit(s, req); + } +} + +static void virtio_scsi_get_config(VirtIODevice *vdev, + uint8_t *config) +{ + VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config; + VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(vdev); + + virtio_stl_p(vdev, &scsiconf->num_queues, s->conf.num_queues); + virtio_stl_p(vdev, &scsiconf->seg_max, 128 - 2); + virtio_stl_p(vdev, &scsiconf->max_sectors, s->conf.max_sectors); + virtio_stl_p(vdev, &scsiconf->cmd_per_lun, s->conf.cmd_per_lun); + virtio_stl_p(vdev, &scsiconf->event_info_size, sizeof(VirtIOSCSIEvent)); + virtio_stl_p(vdev, &scsiconf->sense_size, s->sense_size); + virtio_stl_p(vdev, &scsiconf->cdb_size, s->cdb_size); + virtio_stw_p(vdev, &scsiconf->max_channel, VIRTIO_SCSI_MAX_CHANNEL); + virtio_stw_p(vdev, &scsiconf->max_target, VIRTIO_SCSI_MAX_TARGET); + virtio_stl_p(vdev, &scsiconf->max_lun, VIRTIO_SCSI_MAX_LUN); +} + +static void virtio_scsi_set_config(VirtIODevice *vdev, + const uint8_t *config) +{ + VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + if ((uint32_t) virtio_ldl_p(vdev, &scsiconf->sense_size) >= 65536 || + (uint32_t) virtio_ldl_p(vdev, &scsiconf->cdb_size) >= 256) { + error_report("bad data written to virtio-scsi configuration space"); + exit(1); + } + + vs->sense_size = virtio_ldl_p(vdev, &scsiconf->sense_size); + vs->cdb_size = virtio_ldl_p(vdev, &scsiconf->cdb_size); +} + +static uint64_t virtio_scsi_get_features(VirtIODevice *vdev, + uint64_t requested_features, + Error **errp) +{ + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + + /* Firstly sync all virtio-scsi possible supported features */ + requested_features |= s->host_features; + return requested_features; +} + +static void virtio_scsi_reset(VirtIODevice *vdev) +{ + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + if (s->ctx) { + virtio_scsi_dataplane_stop(s); + } + s->resetting++; + qbus_reset_all(&s->bus.qbus); + s->resetting--; + + vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; + vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; + s->events_dropped = false; +} + +/* The device does not have anything to save beyond the virtio data. + * Request data is saved with callbacks from SCSI devices. + */ +static void virtio_scsi_save(QEMUFile *f, void *opaque) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + virtio_save(vdev, f); +} + +static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + int ret; + + ret = virtio_load(vdev, f, version_id); + if (ret) { + return ret; + } + return 0; +} + +void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, + uint32_t event, uint32_t reason) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSIReq *req; + VirtIOSCSIEvent *evt; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + if (s->dataplane_started) { + assert(s->ctx); + aio_context_acquire(s->ctx); + } + + if (s->dataplane_started) { + req = virtio_scsi_pop_req_vring(s, s->event_vring); + } else { + req = virtio_scsi_pop_req(s, vs->event_vq); + } + if (!req) { + s->events_dropped = true; + goto out; + } + + if (s->events_dropped) { + event |= VIRTIO_SCSI_T_EVENTS_MISSED; + s->events_dropped = false; + } + + if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) { + virtio_scsi_bad_req(); + } + + evt = &req->resp.event; + memset(evt, 0, sizeof(VirtIOSCSIEvent)); + evt->event = virtio_tswap32(vdev, event); + evt->reason = virtio_tswap32(vdev, reason); + if (!dev) { + assert(event == VIRTIO_SCSI_T_EVENTS_MISSED); + } else { + evt->lun[0] = 1; + evt->lun[1] = dev->id; + + /* Linux wants us to keep the same encoding we use for REPORT LUNS. */ + if (dev->lun >= 256) { + evt->lun[2] = (dev->lun >> 8) | 0x40; + } + evt->lun[3] = dev->lun & 0xFF; + } + virtio_scsi_complete_req(req); +out: + if (s->dataplane_started) { + aio_context_release(s->ctx); + } +} + +static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + + if (s->ctx && !s->dataplane_disabled) { + virtio_scsi_dataplane_start(s); + return; + } + if (s->events_dropped) { + virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); + } +} + +static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) +{ + VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + if (virtio_has_feature(vdev, VIRTIO_SCSI_F_CHANGE) && + dev->type != TYPE_ROM) { + virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, + sense.asc | (sense.ascq << 8)); + } +} + +static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); + + if (s->ctx && !s->dataplane_disabled) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; + } + blk_op_block_all(sd->conf.blk, s->blocker); + aio_context_acquire(s->ctx); + blk_set_aio_context(sd->conf.blk, s->ctx); + aio_context_release(s->ctx); + } + + if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { + virtio_scsi_push_event(s, sd, + VIRTIO_SCSI_T_TRANSPORT_RESET, + VIRTIO_SCSI_EVT_RESET_RESCAN); + } +} + +static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); + + if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { + virtio_scsi_push_event(s, sd, + VIRTIO_SCSI_T_TRANSPORT_RESET, + VIRTIO_SCSI_EVT_RESET_REMOVED); + } + + if (s->ctx) { + blk_op_unblock_all(sd->conf.blk, s->blocker); + } + qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); +} + +static struct SCSIBusInfo virtio_scsi_scsi_info = { + .tcq = true, + .max_channel = VIRTIO_SCSI_MAX_CHANNEL, + .max_target = VIRTIO_SCSI_MAX_TARGET, + .max_lun = VIRTIO_SCSI_MAX_LUN, + + .complete = virtio_scsi_command_complete, + .cancel = virtio_scsi_request_cancelled, + .change = virtio_scsi_change, + .parse_cdb = virtio_scsi_parse_cdb, + .get_sg_list = virtio_scsi_get_sg_list, + .save_request = virtio_scsi_save_request, + .load_request = virtio_scsi_load_request, +}; + +void virtio_scsi_common_realize(DeviceState *dev, Error **errp, + HandleOutput ctrl, HandleOutput evt, + HandleOutput cmd) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); + int i; + + virtio_init(vdev, "virtio-scsi", VIRTIO_ID_SCSI, + sizeof(VirtIOSCSIConfig)); + + if (s->conf.num_queues == 0 || + s->conf.num_queues > VIRTIO_QUEUE_MAX - 2) { + error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " + "must be a positive integer less than %d.", + s->conf.num_queues, VIRTIO_QUEUE_MAX - 2); + virtio_cleanup(vdev); + return; + } + s->cmd_vqs = g_new0(VirtQueue *, s->conf.num_queues); + s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; + s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; + + s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, + ctrl); + s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, + evt); + for (i = 0; i < s->conf.num_queues; i++) { + s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, + cmd); + } + + if (s->conf.iothread) { + virtio_scsi_set_iothread(VIRTIO_SCSI(s), s->conf.iothread); + } +} + +/* Disable dataplane thread during live migration since it does not + * update the dirty memory bitmap yet. + */ +static void virtio_scsi_migration_state_changed(Notifier *notifier, void *data) +{ + VirtIOSCSI *s = container_of(notifier, VirtIOSCSI, + migration_state_notifier); + MigrationState *mig = data; + + if (migration_in_setup(mig)) { + if (!s->dataplane_started) { + return; + } + virtio_scsi_dataplane_stop(s); + s->dataplane_disabled = true; + } else if (migration_has_finished(mig) || + migration_has_failed(mig)) { + if (s->dataplane_started) { + return; + } + blk_drain_all(); /* complete in-flight non-dataplane requests */ + s->dataplane_disabled = false; + } +} + +static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOSCSI *s = VIRTIO_SCSI(dev); + static int virtio_scsi_id; + Error *err = NULL; + + virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl, + virtio_scsi_handle_event, + virtio_scsi_handle_cmd); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + scsi_bus_new(&s->bus, sizeof(s->bus), dev, + &virtio_scsi_scsi_info, vdev->bus_name); + /* override default SCSI bus hotplug-handler, with virtio-scsi's one */ + qbus_set_hotplug_handler(BUS(&s->bus), dev, &error_abort); + + if (!dev->hotplugged) { + scsi_bus_legacy_handle_cmdline(&s->bus, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + } + + register_savevm(dev, "virtio-scsi", virtio_scsi_id++, 1, + virtio_scsi_save, virtio_scsi_load, s); + s->migration_state_notifier.notify = virtio_scsi_migration_state_changed; + add_migration_state_change_notifier(&s->migration_state_notifier); + + error_setg(&s->blocker, "block device is in use by data plane"); +} + +static void virtio_scsi_instance_init(Object *obj) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(obj); + + object_property_add_link(obj, "iothread", TYPE_IOTHREAD, + (Object **)&vs->conf.iothread, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); +} + +void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + + g_free(vs->cmd_vqs); + virtio_cleanup(vdev); +} + +static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIOSCSI *s = VIRTIO_SCSI(dev); + + error_free(s->blocker); + + unregister_savevm(dev, "virtio-scsi", s); + remove_migration_state_change_notifier(&s->migration_state_notifier); + + virtio_scsi_common_unrealize(dev, errp); +} + +static Property virtio_scsi_properties[] = { + DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, 1), + DEFINE_PROP_UINT32("max_sectors", VirtIOSCSI, parent_obj.conf.max_sectors, + 0xFFFF), + DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSI, parent_obj.conf.cmd_per_lun, + 128), + DEFINE_PROP_BIT("hotplug", VirtIOSCSI, host_features, + VIRTIO_SCSI_F_HOTPLUG, true), + DEFINE_PROP_BIT("param_change", VirtIOSCSI, host_features, + VIRTIO_SCSI_F_CHANGE, true), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_scsi_common_class_init(ObjectClass *klass, void *data) +{ + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + vdc->get_config = virtio_scsi_get_config; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static void virtio_scsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + + dc->props = virtio_scsi_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = virtio_scsi_device_realize; + vdc->unrealize = virtio_scsi_device_unrealize; + vdc->set_config = virtio_scsi_set_config; + vdc->get_features = virtio_scsi_get_features; + vdc->reset = virtio_scsi_reset; + hc->plug = virtio_scsi_hotplug; + hc->unplug = virtio_scsi_hotunplug; +} + +static const TypeInfo virtio_scsi_common_info = { + .name = TYPE_VIRTIO_SCSI_COMMON, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOSCSICommon), + .abstract = true, + .class_init = virtio_scsi_common_class_init, +}; + +static const TypeInfo virtio_scsi_info = { + .name = TYPE_VIRTIO_SCSI, + .parent = TYPE_VIRTIO_SCSI_COMMON, + .instance_size = sizeof(VirtIOSCSI), + .instance_init = virtio_scsi_instance_init, + .class_init = virtio_scsi_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_scsi_common_info); + type_register_static(&virtio_scsi_info); +} + +type_init(virtio_register_types) diff --git a/qemu/hw/scsi/vmw_pvscsi.c b/qemu/hw/scsi/vmw_pvscsi.c new file mode 100644 index 000000000..9c71f31fe --- /dev/null +++ b/qemu/hw/scsi/vmw_pvscsi.c @@ -0,0 +1,1219 @@ +/* + * QEMU VMWARE PVSCSI paravirtual SCSI bus + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Based on implementation by Paolo Bonzini + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * Dmitry Fleytman <dmitry@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * + * NOTE about MSI-X: + * MSI-X support has been removed for the moment because it leads Windows OS + * to crash on startup. The crash happens because Windows driver requires + * MSI-X shared memory to be part of the same BAR used for rings state + * registers, etc. This is not supported by QEMU infrastructure so separate + * BAR created from MSI-X purposes. Windows driver fails to deal with 2 BARs. + * + */ + +#include "hw/scsi/scsi.h" +#include <block/scsi.h> +#include "hw/pci/msi.h" +#include "vmw_pvscsi.h" +#include "trace.h" + + +#define PVSCSI_MSI_OFFSET (0x50) +#define PVSCSI_USE_64BIT (true) +#define PVSCSI_PER_VECTOR_MASK (false) + +#define PVSCSI_MAX_DEVS (64) +#define PVSCSI_MSIX_NUM_VECTORS (1) + +#define PVSCSI_MAX_CMD_DATA_WORDS \ + (sizeof(PVSCSICmdDescSetupRings)/sizeof(uint32_t)) + +#define RS_GET_FIELD(m, field) \ + (ldl_le_pci_dma(&container_of(m, PVSCSIState, rings)->parent_obj, \ + (m)->rs_pa + offsetof(struct PVSCSIRingsState, field))) +#define RS_SET_FIELD(m, field, val) \ + (stl_le_pci_dma(&container_of(m, PVSCSIState, rings)->parent_obj, \ + (m)->rs_pa + offsetof(struct PVSCSIRingsState, field), val)) + +#define TYPE_PVSCSI "pvscsi" +#define PVSCSI(obj) OBJECT_CHECK(PVSCSIState, (obj), TYPE_PVSCSI) + +typedef struct PVSCSIRingInfo { + uint64_t rs_pa; + uint32_t txr_len_mask; + uint32_t rxr_len_mask; + uint32_t msg_len_mask; + uint64_t req_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES]; + uint64_t cmp_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES]; + uint64_t msg_ring_pages_pa[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES]; + uint64_t consumed_ptr; + uint64_t filled_cmp_ptr; + uint64_t filled_msg_ptr; +} PVSCSIRingInfo; + +typedef struct PVSCSISGState { + hwaddr elemAddr; + hwaddr dataAddr; + uint32_t resid; +} PVSCSISGState; + +typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList; + +typedef struct { + PCIDevice parent_obj; + MemoryRegion io_space; + SCSIBus bus; + QEMUBH *completion_worker; + PVSCSIRequestList pending_queue; + PVSCSIRequestList completion_queue; + + uint64_t reg_interrupt_status; /* Interrupt status register value */ + uint64_t reg_interrupt_enabled; /* Interrupt mask register value */ + uint64_t reg_command_status; /* Command status register value */ + + /* Command data adoption mechanism */ + uint64_t curr_cmd; /* Last command arrived */ + uint32_t curr_cmd_data_cntr; /* Amount of data for last command */ + + /* Collector for current command data */ + uint32_t curr_cmd_data[PVSCSI_MAX_CMD_DATA_WORDS]; + + uint8_t rings_info_valid; /* Whether data rings initialized */ + uint8_t msg_ring_info_valid; /* Whether message ring initialized */ + uint8_t use_msg; /* Whether to use message ring */ + + uint8_t msi_used; /* Whether MSI support was installed successfully */ + + PVSCSIRingInfo rings; /* Data transfer rings manager */ + uint32_t resetting; /* Reset in progress */ +} PVSCSIState; + +typedef struct PVSCSIRequest { + SCSIRequest *sreq; + PVSCSIState *dev; + uint8_t sense_key; + uint8_t completed; + int lun; + QEMUSGList sgl; + PVSCSISGState sg; + struct PVSCSIRingReqDesc req; + struct PVSCSIRingCmpDesc cmp; + QTAILQ_ENTRY(PVSCSIRequest) next; +} PVSCSIRequest; + +/* Integer binary logarithm */ +static int +pvscsi_log2(uint32_t input) +{ + int log = 0; + assert(input > 0); + while (input >> ++log) { + } + return log; +} + +static void +pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri) +{ + int i; + uint32_t txr_len_log2, rxr_len_log2; + uint32_t req_ring_size, cmp_ring_size; + m->rs_pa = ri->ringsStatePPN << VMW_PAGE_SHIFT; + + req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; + cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE; + txr_len_log2 = pvscsi_log2(req_ring_size - 1); + rxr_len_log2 = pvscsi_log2(cmp_ring_size - 1); + + m->txr_len_mask = MASK(txr_len_log2); + m->rxr_len_mask = MASK(rxr_len_log2); + + m->consumed_ptr = 0; + m->filled_cmp_ptr = 0; + + for (i = 0; i < ri->reqRingNumPages; i++) { + m->req_ring_pages_pa[i] = ri->reqRingPPNs[i] << VMW_PAGE_SHIFT; + } + + for (i = 0; i < ri->cmpRingNumPages; i++) { + m->cmp_ring_pages_pa[i] = ri->cmpRingPPNs[i] << VMW_PAGE_SHIFT; + } + + RS_SET_FIELD(m, reqProdIdx, 0); + RS_SET_FIELD(m, reqConsIdx, 0); + RS_SET_FIELD(m, reqNumEntriesLog2, txr_len_log2); + + RS_SET_FIELD(m, cmpProdIdx, 0); + RS_SET_FIELD(m, cmpConsIdx, 0); + RS_SET_FIELD(m, cmpNumEntriesLog2, rxr_len_log2); + + trace_pvscsi_ring_init_data(txr_len_log2, rxr_len_log2); + + /* Flush ring state page changes */ + smp_wmb(); +} + +static void +pvscsi_ring_init_msg(PVSCSIRingInfo *m, PVSCSICmdDescSetupMsgRing *ri) +{ + int i; + uint32_t len_log2; + uint32_t ring_size; + + ring_size = ri->numPages * PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE; + len_log2 = pvscsi_log2(ring_size - 1); + + m->msg_len_mask = MASK(len_log2); + + m->filled_msg_ptr = 0; + + for (i = 0; i < ri->numPages; i++) { + m->msg_ring_pages_pa[i] = ri->ringPPNs[i] << VMW_PAGE_SHIFT; + } + + RS_SET_FIELD(m, msgProdIdx, 0); + RS_SET_FIELD(m, msgConsIdx, 0); + RS_SET_FIELD(m, msgNumEntriesLog2, len_log2); + + trace_pvscsi_ring_init_msg(len_log2); + + /* Flush ring state page changes */ + smp_wmb(); +} + +static void +pvscsi_ring_cleanup(PVSCSIRingInfo *mgr) +{ + mgr->rs_pa = 0; + mgr->txr_len_mask = 0; + mgr->rxr_len_mask = 0; + mgr->msg_len_mask = 0; + mgr->consumed_ptr = 0; + mgr->filled_cmp_ptr = 0; + mgr->filled_msg_ptr = 0; + memset(mgr->req_ring_pages_pa, 0, sizeof(mgr->req_ring_pages_pa)); + memset(mgr->cmp_ring_pages_pa, 0, sizeof(mgr->cmp_ring_pages_pa)); + memset(mgr->msg_ring_pages_pa, 0, sizeof(mgr->msg_ring_pages_pa)); +} + +static hwaddr +pvscsi_ring_pop_req_descr(PVSCSIRingInfo *mgr) +{ + uint32_t ready_ptr = RS_GET_FIELD(mgr, reqProdIdx); + + if (ready_ptr != mgr->consumed_ptr) { + uint32_t next_ready_ptr = + mgr->consumed_ptr++ & mgr->txr_len_mask; + uint32_t next_ready_page = + next_ready_ptr / PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; + uint32_t inpage_idx = + next_ready_ptr % PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; + + return mgr->req_ring_pages_pa[next_ready_page] + + inpage_idx * sizeof(PVSCSIRingReqDesc); + } else { + return 0; + } +} + +static void +pvscsi_ring_flush_req(PVSCSIRingInfo *mgr) +{ + RS_SET_FIELD(mgr, reqConsIdx, mgr->consumed_ptr); +} + +static hwaddr +pvscsi_ring_pop_cmp_descr(PVSCSIRingInfo *mgr) +{ + /* + * According to Linux driver code it explicitly verifies that number + * of requests being processed by device is less then the size of + * completion queue, so device may omit completion queue overflow + * conditions check. We assume that this is true for other (Windows) + * drivers as well. + */ + + uint32_t free_cmp_ptr = + mgr->filled_cmp_ptr++ & mgr->rxr_len_mask; + uint32_t free_cmp_page = + free_cmp_ptr / PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE; + uint32_t inpage_idx = + free_cmp_ptr % PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE; + return mgr->cmp_ring_pages_pa[free_cmp_page] + + inpage_idx * sizeof(PVSCSIRingCmpDesc); +} + +static hwaddr +pvscsi_ring_pop_msg_descr(PVSCSIRingInfo *mgr) +{ + uint32_t free_msg_ptr = + mgr->filled_msg_ptr++ & mgr->msg_len_mask; + uint32_t free_msg_page = + free_msg_ptr / PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE; + uint32_t inpage_idx = + free_msg_ptr % PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE; + return mgr->msg_ring_pages_pa[free_msg_page] + + inpage_idx * sizeof(PVSCSIRingMsgDesc); +} + +static void +pvscsi_ring_flush_cmp(PVSCSIRingInfo *mgr) +{ + /* Flush descriptor changes */ + smp_wmb(); + + trace_pvscsi_ring_flush_cmp(mgr->filled_cmp_ptr); + + RS_SET_FIELD(mgr, cmpProdIdx, mgr->filled_cmp_ptr); +} + +static bool +pvscsi_ring_msg_has_room(PVSCSIRingInfo *mgr) +{ + uint32_t prodIdx = RS_GET_FIELD(mgr, msgProdIdx); + uint32_t consIdx = RS_GET_FIELD(mgr, msgConsIdx); + + return (prodIdx - consIdx) < (mgr->msg_len_mask + 1); +} + +static void +pvscsi_ring_flush_msg(PVSCSIRingInfo *mgr) +{ + /* Flush descriptor changes */ + smp_wmb(); + + trace_pvscsi_ring_flush_msg(mgr->filled_msg_ptr); + + RS_SET_FIELD(mgr, msgProdIdx, mgr->filled_msg_ptr); +} + +static void +pvscsi_reset_state(PVSCSIState *s) +{ + s->curr_cmd = PVSCSI_CMD_FIRST; + s->curr_cmd_data_cntr = 0; + s->reg_command_status = PVSCSI_COMMAND_PROCESSING_SUCCEEDED; + s->reg_interrupt_status = 0; + pvscsi_ring_cleanup(&s->rings); + s->rings_info_valid = FALSE; + s->msg_ring_info_valid = FALSE; + QTAILQ_INIT(&s->pending_queue); + QTAILQ_INIT(&s->completion_queue); +} + +static void +pvscsi_update_irq_status(PVSCSIState *s) +{ + PCIDevice *d = PCI_DEVICE(s); + bool should_raise = s->reg_interrupt_enabled & s->reg_interrupt_status; + + trace_pvscsi_update_irq_level(should_raise, s->reg_interrupt_enabled, + s->reg_interrupt_status); + + if (s->msi_used && msi_enabled(d)) { + if (should_raise) { + trace_pvscsi_update_irq_msi(); + msi_notify(d, PVSCSI_VECTOR_COMPLETION); + } + return; + } + + pci_set_irq(d, !!should_raise); +} + +static void +pvscsi_raise_completion_interrupt(PVSCSIState *s) +{ + s->reg_interrupt_status |= PVSCSI_INTR_CMPL_0; + + /* Memory barrier to flush interrupt status register changes*/ + smp_wmb(); + + pvscsi_update_irq_status(s); +} + +static void +pvscsi_raise_message_interrupt(PVSCSIState *s) +{ + s->reg_interrupt_status |= PVSCSI_INTR_MSG_0; + + /* Memory barrier to flush interrupt status register changes*/ + smp_wmb(); + + pvscsi_update_irq_status(s); +} + +static void +pvscsi_cmp_ring_put(PVSCSIState *s, struct PVSCSIRingCmpDesc *cmp_desc) +{ + hwaddr cmp_descr_pa; + + cmp_descr_pa = pvscsi_ring_pop_cmp_descr(&s->rings); + trace_pvscsi_cmp_ring_put(cmp_descr_pa); + cpu_physical_memory_write(cmp_descr_pa, (void *)cmp_desc, + sizeof(*cmp_desc)); +} + +static void +pvscsi_msg_ring_put(PVSCSIState *s, struct PVSCSIRingMsgDesc *msg_desc) +{ + hwaddr msg_descr_pa; + + msg_descr_pa = pvscsi_ring_pop_msg_descr(&s->rings); + trace_pvscsi_msg_ring_put(msg_descr_pa); + cpu_physical_memory_write(msg_descr_pa, (void *)msg_desc, + sizeof(*msg_desc)); +} + +static void +pvscsi_process_completion_queue(void *opaque) +{ + PVSCSIState *s = opaque; + PVSCSIRequest *pvscsi_req; + bool has_completed = false; + + while (!QTAILQ_EMPTY(&s->completion_queue)) { + pvscsi_req = QTAILQ_FIRST(&s->completion_queue); + QTAILQ_REMOVE(&s->completion_queue, pvscsi_req, next); + pvscsi_cmp_ring_put(s, &pvscsi_req->cmp); + g_free(pvscsi_req); + has_completed = true; + } + + if (has_completed) { + pvscsi_ring_flush_cmp(&s->rings); + pvscsi_raise_completion_interrupt(s); + } +} + +static void +pvscsi_reset_adapter(PVSCSIState *s) +{ + s->resetting++; + qbus_reset_all_fn(&s->bus); + s->resetting--; + pvscsi_process_completion_queue(s); + assert(QTAILQ_EMPTY(&s->pending_queue)); + pvscsi_reset_state(s); +} + +static void +pvscsi_schedule_completion_processing(PVSCSIState *s) +{ + /* Try putting more complete requests on the ring. */ + if (!QTAILQ_EMPTY(&s->completion_queue)) { + qemu_bh_schedule(s->completion_worker); + } +} + +static void +pvscsi_complete_request(PVSCSIState *s, PVSCSIRequest *r) +{ + assert(!r->completed); + + trace_pvscsi_complete_request(r->cmp.context, r->cmp.dataLen, + r->sense_key); + if (r->sreq != NULL) { + scsi_req_unref(r->sreq); + r->sreq = NULL; + } + r->completed = 1; + QTAILQ_REMOVE(&s->pending_queue, r, next); + QTAILQ_INSERT_TAIL(&s->completion_queue, r, next); + pvscsi_schedule_completion_processing(s); +} + +static QEMUSGList *pvscsi_get_sg_list(SCSIRequest *r) +{ + PVSCSIRequest *req = r->hba_private; + + trace_pvscsi_get_sg_list(req->sgl.nsg, req->sgl.size); + + return &req->sgl; +} + +static void +pvscsi_get_next_sg_elem(PVSCSISGState *sg) +{ + struct PVSCSISGElement elem; + + cpu_physical_memory_read(sg->elemAddr, (void *)&elem, sizeof(elem)); + if ((elem.flags & ~PVSCSI_KNOWN_FLAGS) != 0) { + /* + * There is PVSCSI_SGE_FLAG_CHAIN_ELEMENT flag described in + * header file but its value is unknown. This flag requires + * additional processing, so we put warning here to catch it + * some day and make proper implementation + */ + trace_pvscsi_get_next_sg_elem(elem.flags); + } + + sg->elemAddr += sizeof(elem); + sg->dataAddr = elem.addr; + sg->resid = elem.length; +} + +static void +pvscsi_write_sense(PVSCSIRequest *r, uint8_t *sense, int len) +{ + r->cmp.senseLen = MIN(r->req.senseLen, len); + r->sense_key = sense[(sense[0] & 2) ? 1 : 2]; + cpu_physical_memory_write(r->req.senseAddr, sense, r->cmp.senseLen); +} + +static void +pvscsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid) +{ + PVSCSIRequest *pvscsi_req = req->hba_private; + PVSCSIState *s; + + if (!pvscsi_req) { + trace_pvscsi_command_complete_not_found(req->tag); + return; + } + s = pvscsi_req->dev; + + if (resid) { + /* Short transfer. */ + trace_pvscsi_command_complete_data_run(); + pvscsi_req->cmp.hostStatus = BTSTAT_DATARUN; + } + + pvscsi_req->cmp.scsiStatus = status; + if (pvscsi_req->cmp.scsiStatus == CHECK_CONDITION) { + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + int sense_len = + scsi_req_get_sense(pvscsi_req->sreq, sense, sizeof(sense)); + + trace_pvscsi_command_complete_sense_len(sense_len); + pvscsi_write_sense(pvscsi_req, sense, sense_len); + } + qemu_sglist_destroy(&pvscsi_req->sgl); + pvscsi_complete_request(s, pvscsi_req); +} + +static void +pvscsi_send_msg(PVSCSIState *s, SCSIDevice *dev, uint32_t msg_type) +{ + if (s->msg_ring_info_valid && pvscsi_ring_msg_has_room(&s->rings)) { + PVSCSIMsgDescDevStatusChanged msg = {0}; + + msg.type = msg_type; + msg.bus = dev->channel; + msg.target = dev->id; + msg.lun[1] = dev->lun; + + pvscsi_msg_ring_put(s, (PVSCSIRingMsgDesc *)&msg); + pvscsi_ring_flush_msg(&s->rings); + pvscsi_raise_message_interrupt(s); + } +} + +static void +pvscsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) +{ + PVSCSIState *s = PVSCSI(hotplug_dev); + + pvscsi_send_msg(s, SCSI_DEVICE(dev), PVSCSI_MSG_DEV_ADDED); +} + +static void +pvscsi_hot_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) +{ + PVSCSIState *s = PVSCSI(hotplug_dev); + + pvscsi_send_msg(s, SCSI_DEVICE(dev), PVSCSI_MSG_DEV_REMOVED); + qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); +} + +static void +pvscsi_request_cancelled(SCSIRequest *req) +{ + PVSCSIRequest *pvscsi_req = req->hba_private; + PVSCSIState *s = pvscsi_req->dev; + + if (pvscsi_req->completed) { + return; + } + + if (pvscsi_req->dev->resetting) { + pvscsi_req->cmp.hostStatus = BTSTAT_BUSRESET; + } else { + pvscsi_req->cmp.hostStatus = BTSTAT_ABORTQUEUE; + } + + pvscsi_complete_request(s, pvscsi_req); +} + +static SCSIDevice* +pvscsi_device_find(PVSCSIState *s, int channel, int target, + uint8_t *requested_lun, uint8_t *target_lun) +{ + if (requested_lun[0] || requested_lun[2] || requested_lun[3] || + requested_lun[4] || requested_lun[5] || requested_lun[6] || + requested_lun[7] || (target > PVSCSI_MAX_DEVS)) { + return NULL; + } else { + *target_lun = requested_lun[1]; + return scsi_device_find(&s->bus, channel, target, *target_lun); + } +} + +static PVSCSIRequest * +pvscsi_queue_pending_descriptor(PVSCSIState *s, SCSIDevice **d, + struct PVSCSIRingReqDesc *descr) +{ + PVSCSIRequest *pvscsi_req; + uint8_t lun; + + pvscsi_req = g_malloc0(sizeof(*pvscsi_req)); + pvscsi_req->dev = s; + pvscsi_req->req = *descr; + pvscsi_req->cmp.context = pvscsi_req->req.context; + QTAILQ_INSERT_TAIL(&s->pending_queue, pvscsi_req, next); + + *d = pvscsi_device_find(s, descr->bus, descr->target, descr->lun, &lun); + if (*d) { + pvscsi_req->lun = lun; + } + + return pvscsi_req; +} + +static void +pvscsi_convert_sglist(PVSCSIRequest *r) +{ + int chunk_size; + uint64_t data_length = r->req.dataLen; + PVSCSISGState sg = r->sg; + while (data_length) { + while (!sg.resid) { + pvscsi_get_next_sg_elem(&sg); + trace_pvscsi_convert_sglist(r->req.context, r->sg.dataAddr, + r->sg.resid); + } + assert(data_length > 0); + chunk_size = MIN((unsigned) data_length, sg.resid); + if (chunk_size) { + qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size); + } + + sg.dataAddr += chunk_size; + data_length -= chunk_size; + sg.resid -= chunk_size; + } +} + +static void +pvscsi_build_sglist(PVSCSIState *s, PVSCSIRequest *r) +{ + PCIDevice *d = PCI_DEVICE(s); + + pci_dma_sglist_init(&r->sgl, d, 1); + if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) { + pvscsi_convert_sglist(r); + } else { + qemu_sglist_add(&r->sgl, r->req.dataAddr, r->req.dataLen); + } +} + +static void +pvscsi_process_request_descriptor(PVSCSIState *s, + struct PVSCSIRingReqDesc *descr) +{ + SCSIDevice *d; + PVSCSIRequest *r = pvscsi_queue_pending_descriptor(s, &d, descr); + int64_t n; + + trace_pvscsi_process_req_descr(descr->cdb[0], descr->context); + + if (!d) { + r->cmp.hostStatus = BTSTAT_SELTIMEO; + trace_pvscsi_process_req_descr_unknown_device(); + pvscsi_complete_request(s, r); + return; + } + + if (descr->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) { + r->sg.elemAddr = descr->dataAddr; + } + + r->sreq = scsi_req_new(d, descr->context, r->lun, descr->cdb, r); + if (r->sreq->cmd.mode == SCSI_XFER_FROM_DEV && + (descr->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) { + r->cmp.hostStatus = BTSTAT_BADMSG; + trace_pvscsi_process_req_descr_invalid_dir(); + scsi_req_cancel(r->sreq); + return; + } + if (r->sreq->cmd.mode == SCSI_XFER_TO_DEV && + (descr->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) { + r->cmp.hostStatus = BTSTAT_BADMSG; + trace_pvscsi_process_req_descr_invalid_dir(); + scsi_req_cancel(r->sreq); + return; + } + + pvscsi_build_sglist(s, r); + n = scsi_req_enqueue(r->sreq); + + if (n) { + scsi_req_continue(r->sreq); + } +} + +static void +pvscsi_process_io(PVSCSIState *s) +{ + PVSCSIRingReqDesc descr; + hwaddr next_descr_pa; + + assert(s->rings_info_valid); + while ((next_descr_pa = pvscsi_ring_pop_req_descr(&s->rings)) != 0) { + + /* Only read after production index verification */ + smp_rmb(); + + trace_pvscsi_process_io(next_descr_pa); + cpu_physical_memory_read(next_descr_pa, &descr, sizeof(descr)); + pvscsi_process_request_descriptor(s, &descr); + } + + pvscsi_ring_flush_req(&s->rings); +} + +static void +pvscsi_dbg_dump_tx_rings_config(PVSCSICmdDescSetupRings *rc) +{ + int i; + trace_pvscsi_tx_rings_ppn("Rings State", rc->ringsStatePPN); + + trace_pvscsi_tx_rings_num_pages("Request Ring", rc->reqRingNumPages); + for (i = 0; i < rc->reqRingNumPages; i++) { + trace_pvscsi_tx_rings_ppn("Request Ring", rc->reqRingPPNs[i]); + } + + trace_pvscsi_tx_rings_num_pages("Confirm Ring", rc->cmpRingNumPages); + for (i = 0; i < rc->cmpRingNumPages; i++) { + trace_pvscsi_tx_rings_ppn("Confirm Ring", rc->reqRingPPNs[i]); + } +} + +static uint64_t +pvscsi_on_cmd_config(PVSCSIState *s) +{ + trace_pvscsi_on_cmd_noimpl("PVSCSI_CMD_CONFIG"); + return PVSCSI_COMMAND_PROCESSING_FAILED; +} + +static uint64_t +pvscsi_on_cmd_unplug(PVSCSIState *s) +{ + trace_pvscsi_on_cmd_noimpl("PVSCSI_CMD_DEVICE_UNPLUG"); + return PVSCSI_COMMAND_PROCESSING_FAILED; +} + +static uint64_t +pvscsi_on_issue_scsi(PVSCSIState *s) +{ + trace_pvscsi_on_cmd_noimpl("PVSCSI_CMD_ISSUE_SCSI"); + return PVSCSI_COMMAND_PROCESSING_FAILED; +} + +static uint64_t +pvscsi_on_cmd_setup_rings(PVSCSIState *s) +{ + PVSCSICmdDescSetupRings *rc = + (PVSCSICmdDescSetupRings *) s->curr_cmd_data; + + trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_RINGS"); + + pvscsi_dbg_dump_tx_rings_config(rc); + pvscsi_ring_init_data(&s->rings, rc); + s->rings_info_valid = TRUE; + return PVSCSI_COMMAND_PROCESSING_SUCCEEDED; +} + +static uint64_t +pvscsi_on_cmd_abort(PVSCSIState *s) +{ + PVSCSICmdDescAbortCmd *cmd = (PVSCSICmdDescAbortCmd *) s->curr_cmd_data; + PVSCSIRequest *r, *next; + + trace_pvscsi_on_cmd_abort(cmd->context, cmd->target); + + QTAILQ_FOREACH_SAFE(r, &s->pending_queue, next, next) { + if (r->req.context == cmd->context) { + break; + } + } + if (r) { + assert(!r->completed); + r->cmp.hostStatus = BTSTAT_ABORTQUEUE; + scsi_req_cancel(r->sreq); + } + + return PVSCSI_COMMAND_PROCESSING_SUCCEEDED; +} + +static uint64_t +pvscsi_on_cmd_unknown(PVSCSIState *s) +{ + trace_pvscsi_on_cmd_unknown_data(s->curr_cmd_data[0]); + return PVSCSI_COMMAND_PROCESSING_FAILED; +} + +static uint64_t +pvscsi_on_cmd_reset_device(PVSCSIState *s) +{ + uint8_t target_lun = 0; + struct PVSCSICmdDescResetDevice *cmd = + (struct PVSCSICmdDescResetDevice *) s->curr_cmd_data; + SCSIDevice *sdev; + + sdev = pvscsi_device_find(s, 0, cmd->target, cmd->lun, &target_lun); + + trace_pvscsi_on_cmd_reset_dev(cmd->target, (int) target_lun, sdev); + + if (sdev != NULL) { + s->resetting++; + device_reset(&sdev->qdev); + s->resetting--; + return PVSCSI_COMMAND_PROCESSING_SUCCEEDED; + } + + return PVSCSI_COMMAND_PROCESSING_FAILED; +} + +static uint64_t +pvscsi_on_cmd_reset_bus(PVSCSIState *s) +{ + trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_RESET_BUS"); + + s->resetting++; + qbus_reset_all_fn(&s->bus); + s->resetting--; + return PVSCSI_COMMAND_PROCESSING_SUCCEEDED; +} + +static uint64_t +pvscsi_on_cmd_setup_msg_ring(PVSCSIState *s) +{ + PVSCSICmdDescSetupMsgRing *rc = + (PVSCSICmdDescSetupMsgRing *) s->curr_cmd_data; + + trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_MSG_RING"); + + if (!s->use_msg) { + return PVSCSI_COMMAND_PROCESSING_FAILED; + } + + if (s->rings_info_valid) { + pvscsi_ring_init_msg(&s->rings, rc); + s->msg_ring_info_valid = TRUE; + } + return sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t); +} + +static uint64_t +pvscsi_on_cmd_adapter_reset(PVSCSIState *s) +{ + trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_ADAPTER_RESET"); + + pvscsi_reset_adapter(s); + return PVSCSI_COMMAND_PROCESSING_SUCCEEDED; +} + +static const struct { + int data_size; + uint64_t (*handler_fn)(PVSCSIState *s); +} pvscsi_commands[] = { + [PVSCSI_CMD_FIRST] = { + .data_size = 0, + .handler_fn = pvscsi_on_cmd_unknown, + }, + + /* Not implemented, data size defined based on what arrives on windows */ + [PVSCSI_CMD_CONFIG] = { + .data_size = 6 * sizeof(uint32_t), + .handler_fn = pvscsi_on_cmd_config, + }, + + /* Command not implemented, data size is unknown */ + [PVSCSI_CMD_ISSUE_SCSI] = { + .data_size = 0, + .handler_fn = pvscsi_on_issue_scsi, + }, + + /* Command not implemented, data size is unknown */ + [PVSCSI_CMD_DEVICE_UNPLUG] = { + .data_size = 0, + .handler_fn = pvscsi_on_cmd_unplug, + }, + + [PVSCSI_CMD_SETUP_RINGS] = { + .data_size = sizeof(PVSCSICmdDescSetupRings), + .handler_fn = pvscsi_on_cmd_setup_rings, + }, + + [PVSCSI_CMD_RESET_DEVICE] = { + .data_size = sizeof(struct PVSCSICmdDescResetDevice), + .handler_fn = pvscsi_on_cmd_reset_device, + }, + + [PVSCSI_CMD_RESET_BUS] = { + .data_size = 0, + .handler_fn = pvscsi_on_cmd_reset_bus, + }, + + [PVSCSI_CMD_SETUP_MSG_RING] = { + .data_size = sizeof(PVSCSICmdDescSetupMsgRing), + .handler_fn = pvscsi_on_cmd_setup_msg_ring, + }, + + [PVSCSI_CMD_ADAPTER_RESET] = { + .data_size = 0, + .handler_fn = pvscsi_on_cmd_adapter_reset, + }, + + [PVSCSI_CMD_ABORT_CMD] = { + .data_size = sizeof(struct PVSCSICmdDescAbortCmd), + .handler_fn = pvscsi_on_cmd_abort, + }, +}; + +static void +pvscsi_do_command_processing(PVSCSIState *s) +{ + size_t bytes_arrived = s->curr_cmd_data_cntr * sizeof(uint32_t); + + assert(s->curr_cmd < PVSCSI_CMD_LAST); + if (bytes_arrived >= pvscsi_commands[s->curr_cmd].data_size) { + s->reg_command_status = pvscsi_commands[s->curr_cmd].handler_fn(s); + s->curr_cmd = PVSCSI_CMD_FIRST; + s->curr_cmd_data_cntr = 0; + } +} + +static void +pvscsi_on_command_data(PVSCSIState *s, uint32_t value) +{ + size_t bytes_arrived = s->curr_cmd_data_cntr * sizeof(uint32_t); + + assert(bytes_arrived < sizeof(s->curr_cmd_data)); + s->curr_cmd_data[s->curr_cmd_data_cntr++] = value; + + pvscsi_do_command_processing(s); +} + +static void +pvscsi_on_command(PVSCSIState *s, uint64_t cmd_id) +{ + if ((cmd_id > PVSCSI_CMD_FIRST) && (cmd_id < PVSCSI_CMD_LAST)) { + s->curr_cmd = cmd_id; + } else { + s->curr_cmd = PVSCSI_CMD_FIRST; + trace_pvscsi_on_cmd_unknown(cmd_id); + } + + s->curr_cmd_data_cntr = 0; + s->reg_command_status = PVSCSI_COMMAND_NOT_ENOUGH_DATA; + + pvscsi_do_command_processing(s); +} + +static void +pvscsi_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PVSCSIState *s = opaque; + + switch (addr) { + case PVSCSI_REG_OFFSET_COMMAND: + pvscsi_on_command(s, val); + break; + + case PVSCSI_REG_OFFSET_COMMAND_DATA: + pvscsi_on_command_data(s, (uint32_t) val); + break; + + case PVSCSI_REG_OFFSET_INTR_STATUS: + trace_pvscsi_io_write("PVSCSI_REG_OFFSET_INTR_STATUS", val); + s->reg_interrupt_status &= ~val; + pvscsi_update_irq_status(s); + pvscsi_schedule_completion_processing(s); + break; + + case PVSCSI_REG_OFFSET_INTR_MASK: + trace_pvscsi_io_write("PVSCSI_REG_OFFSET_INTR_MASK", val); + s->reg_interrupt_enabled = val; + pvscsi_update_irq_status(s); + break; + + case PVSCSI_REG_OFFSET_KICK_NON_RW_IO: + trace_pvscsi_io_write("PVSCSI_REG_OFFSET_KICK_NON_RW_IO", val); + pvscsi_process_io(s); + break; + + case PVSCSI_REG_OFFSET_KICK_RW_IO: + trace_pvscsi_io_write("PVSCSI_REG_OFFSET_KICK_RW_IO", val); + pvscsi_process_io(s); + break; + + case PVSCSI_REG_OFFSET_DEBUG: + trace_pvscsi_io_write("PVSCSI_REG_OFFSET_DEBUG", val); + break; + + default: + trace_pvscsi_io_write_unknown(addr, size, val); + break; + } + +} + +static uint64_t +pvscsi_io_read(void *opaque, hwaddr addr, unsigned size) +{ + PVSCSIState *s = opaque; + + switch (addr) { + case PVSCSI_REG_OFFSET_INTR_STATUS: + trace_pvscsi_io_read("PVSCSI_REG_OFFSET_INTR_STATUS", + s->reg_interrupt_status); + return s->reg_interrupt_status; + + case PVSCSI_REG_OFFSET_INTR_MASK: + trace_pvscsi_io_read("PVSCSI_REG_OFFSET_INTR_MASK", + s->reg_interrupt_status); + return s->reg_interrupt_enabled; + + case PVSCSI_REG_OFFSET_COMMAND_STATUS: + trace_pvscsi_io_read("PVSCSI_REG_OFFSET_COMMAND_STATUS", + s->reg_interrupt_status); + return s->reg_command_status; + + default: + trace_pvscsi_io_read_unknown(addr, size); + return 0; + } +} + + +static bool +pvscsi_init_msi(PVSCSIState *s) +{ + int res; + PCIDevice *d = PCI_DEVICE(s); + + res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS, + PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK); + if (res < 0) { + trace_pvscsi_init_msi_fail(res); + s->msi_used = false; + } else { + s->msi_used = true; + } + + return s->msi_used; +} + +static void +pvscsi_cleanup_msi(PVSCSIState *s) +{ + PCIDevice *d = PCI_DEVICE(s); + + if (s->msi_used) { + msi_uninit(d); + } +} + +static const MemoryRegionOps pvscsi_ops = { + .read = pvscsi_io_read, + .write = pvscsi_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const struct SCSIBusInfo pvscsi_scsi_info = { + .tcq = true, + .max_target = PVSCSI_MAX_DEVS, + .max_channel = 0, + .max_lun = 0, + + .get_sg_list = pvscsi_get_sg_list, + .complete = pvscsi_command_complete, + .cancel = pvscsi_request_cancelled, +}; + +static int +pvscsi_init(PCIDevice *pci_dev) +{ + PVSCSIState *s = PVSCSI(pci_dev); + + trace_pvscsi_state("init"); + + /* PCI subsystem ID */ + pci_dev->config[PCI_SUBSYSTEM_ID] = 0x00; + pci_dev->config[PCI_SUBSYSTEM_ID + 1] = 0x10; + + /* PCI latency timer = 255 */ + pci_dev->config[PCI_LATENCY_TIMER] = 0xff; + + /* Interrupt pin A */ + pci_config_set_interrupt_pin(pci_dev->config, 1); + + memory_region_init_io(&s->io_space, OBJECT(s), &pvscsi_ops, s, + "pvscsi-io", PVSCSI_MEM_SPACE_SIZE); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->io_space); + + pvscsi_init_msi(s); + + s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); + if (!s->completion_worker) { + pvscsi_cleanup_msi(s); + return -ENOMEM; + } + + scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(pci_dev), + &pvscsi_scsi_info, NULL); + /* override default SCSI bus hotplug-handler, with pvscsi's one */ + qbus_set_hotplug_handler(BUS(&s->bus), DEVICE(s), &error_abort); + pvscsi_reset_state(s); + + return 0; +} + +static void +pvscsi_uninit(PCIDevice *pci_dev) +{ + PVSCSIState *s = PVSCSI(pci_dev); + + trace_pvscsi_state("uninit"); + qemu_bh_delete(s->completion_worker); + + pvscsi_cleanup_msi(s); +} + +static void +pvscsi_reset(DeviceState *dev) +{ + PCIDevice *d = PCI_DEVICE(dev); + PVSCSIState *s = PVSCSI(d); + + trace_pvscsi_state("reset"); + pvscsi_reset_adapter(s); +} + +static void +pvscsi_pre_save(void *opaque) +{ + PVSCSIState *s = (PVSCSIState *) opaque; + + trace_pvscsi_state("presave"); + + assert(QTAILQ_EMPTY(&s->pending_queue)); + assert(QTAILQ_EMPTY(&s->completion_queue)); +} + +static int +pvscsi_post_load(void *opaque, int version_id) +{ + trace_pvscsi_state("postload"); + return 0; +} + +static const VMStateDescription vmstate_pvscsi = { + .name = "pvscsi", + .version_id = 0, + .minimum_version_id = 0, + .pre_save = pvscsi_pre_save, + .post_load = pvscsi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState), + VMSTATE_UINT8(msi_used, PVSCSIState), + VMSTATE_UINT32(resetting, PVSCSIState), + VMSTATE_UINT64(reg_interrupt_status, PVSCSIState), + VMSTATE_UINT64(reg_interrupt_enabled, PVSCSIState), + VMSTATE_UINT64(reg_command_status, PVSCSIState), + VMSTATE_UINT64(curr_cmd, PVSCSIState), + VMSTATE_UINT32(curr_cmd_data_cntr, PVSCSIState), + VMSTATE_UINT32_ARRAY(curr_cmd_data, PVSCSIState, + ARRAY_SIZE(((PVSCSIState *)NULL)->curr_cmd_data)), + VMSTATE_UINT8(rings_info_valid, PVSCSIState), + VMSTATE_UINT8(msg_ring_info_valid, PVSCSIState), + VMSTATE_UINT8(use_msg, PVSCSIState), + + VMSTATE_UINT64(rings.rs_pa, PVSCSIState), + VMSTATE_UINT32(rings.txr_len_mask, PVSCSIState), + VMSTATE_UINT32(rings.rxr_len_mask, PVSCSIState), + VMSTATE_UINT64_ARRAY(rings.req_ring_pages_pa, PVSCSIState, + PVSCSI_SETUP_RINGS_MAX_NUM_PAGES), + VMSTATE_UINT64_ARRAY(rings.cmp_ring_pages_pa, PVSCSIState, + PVSCSI_SETUP_RINGS_MAX_NUM_PAGES), + VMSTATE_UINT64(rings.consumed_ptr, PVSCSIState), + VMSTATE_UINT64(rings.filled_cmp_ptr, PVSCSIState), + + VMSTATE_END_OF_LIST() + } +}; + +static Property pvscsi_properties[] = { + DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pvscsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + + k->init = pvscsi_init; + k->exit = pvscsi_uninit; + k->vendor_id = PCI_VENDOR_ID_VMWARE; + k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI; + k->class_id = PCI_CLASS_STORAGE_SCSI; + k->subsystem_id = 0x1000; + dc->reset = pvscsi_reset; + dc->vmsd = &vmstate_pvscsi; + dc->props = pvscsi_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + hc->unplug = pvscsi_hot_unplug; + hc->plug = pvscsi_hotplug; +} + +static const TypeInfo pvscsi_info = { + .name = TYPE_PVSCSI, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PVSCSIState), + .class_init = pvscsi_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static void +pvscsi_register_types(void) +{ + type_register_static(&pvscsi_info); +} + +type_init(pvscsi_register_types); diff --git a/qemu/hw/scsi/vmw_pvscsi.h b/qemu/hw/scsi/vmw_pvscsi.h new file mode 100644 index 000000000..17fcf6627 --- /dev/null +++ b/qemu/hw/scsi/vmw_pvscsi.h @@ -0,0 +1,434 @@ +/* + * VMware PVSCSI header file + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Maintained by: Arvind Kumar <arvindkumar@vmware.com> + * + */ + +#ifndef VMW_PVSCSI_H +#define VMW_PVSCSI_H + +#define VMW_PAGE_SIZE (4096) +#define VMW_PAGE_SHIFT (12) + +#define MASK(n) ((1 << (n)) - 1) /* make an n-bit mask */ + +/* + * host adapter status/error codes + */ +enum HostBusAdapterStatus { + BTSTAT_SUCCESS = 0x00, /* CCB complete normally with no errors */ + BTSTAT_LINKED_COMMAND_COMPLETED = 0x0a, + BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b, + BTSTAT_DATA_UNDERRUN = 0x0c, + BTSTAT_SELTIMEO = 0x11, /* SCSI selection timeout */ + BTSTAT_DATARUN = 0x12, /* data overrun/underrun */ + BTSTAT_BUSFREE = 0x13, /* unexpected bus free */ + BTSTAT_INVPHASE = 0x14, /* invalid bus phase or sequence */ + /* requested by target */ + BTSTAT_LUNMISMATCH = 0x17, /* linked CCB has different LUN */ + /* from first CCB */ + BTSTAT_SENSFAILED = 0x1b, /* auto request sense failed */ + BTSTAT_TAGREJECT = 0x1c, /* SCSI II tagged queueing message */ + /* rejected by target */ + BTSTAT_BADMSG = 0x1d, /* unsupported message received by */ + /* the host adapter */ + BTSTAT_HAHARDWARE = 0x20, /* host adapter hardware failed */ + BTSTAT_NORESPONSE = 0x21, /* target did not respond to SCSI ATN, */ + /* sent a SCSI RST */ + BTSTAT_SENTRST = 0x22, /* host adapter asserted a SCSI RST */ + BTSTAT_RECVRST = 0x23, /* other SCSI devices asserted a SCSI RST */ + BTSTAT_DISCONNECT = 0x24, /* target device reconnected improperly */ + /* (w/o tag) */ + BTSTAT_BUSRESET = 0x25, /* host adapter issued BUS device reset */ + BTSTAT_ABORTQUEUE = 0x26, /* abort queue generated */ + BTSTAT_HASOFTWARE = 0x27, /* host adapter software error */ + BTSTAT_HATIMEOUT = 0x30, /* host adapter hardware timeout error */ + BTSTAT_SCSIPARITY = 0x34, /* SCSI parity error detected */ +}; + +/* + * Register offsets. + * + * These registers are accessible both via i/o space and mm i/o. + */ + +enum PVSCSIRegOffset { + PVSCSI_REG_OFFSET_COMMAND = 0x0, + PVSCSI_REG_OFFSET_COMMAND_DATA = 0x4, + PVSCSI_REG_OFFSET_COMMAND_STATUS = 0x8, + PVSCSI_REG_OFFSET_LAST_STS_0 = 0x100, + PVSCSI_REG_OFFSET_LAST_STS_1 = 0x104, + PVSCSI_REG_OFFSET_LAST_STS_2 = 0x108, + PVSCSI_REG_OFFSET_LAST_STS_3 = 0x10c, + PVSCSI_REG_OFFSET_INTR_STATUS = 0x100c, + PVSCSI_REG_OFFSET_INTR_MASK = 0x2010, + PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014, + PVSCSI_REG_OFFSET_DEBUG = 0x3018, + PVSCSI_REG_OFFSET_KICK_RW_IO = 0x4018, +}; + +/* + * Virtual h/w commands. + */ + +enum PVSCSICommands { + PVSCSI_CMD_FIRST = 0, /* has to be first */ + + PVSCSI_CMD_ADAPTER_RESET = 1, + PVSCSI_CMD_ISSUE_SCSI = 2, + PVSCSI_CMD_SETUP_RINGS = 3, + PVSCSI_CMD_RESET_BUS = 4, + PVSCSI_CMD_RESET_DEVICE = 5, + PVSCSI_CMD_ABORT_CMD = 6, + PVSCSI_CMD_CONFIG = 7, + PVSCSI_CMD_SETUP_MSG_RING = 8, + PVSCSI_CMD_DEVICE_UNPLUG = 9, + + PVSCSI_CMD_LAST = 10 /* has to be last */ +}; + +#define PVSCSI_COMMAND_PROCESSING_SUCCEEDED (0) +#define PVSCSI_COMMAND_PROCESSING_FAILED (-1) +#define PVSCSI_COMMAND_NOT_ENOUGH_DATA (-2) + +/* + * Command descriptor for PVSCSI_CMD_RESET_DEVICE -- + */ + +struct PVSCSICmdDescResetDevice { + uint32_t target; + uint8_t lun[8]; +} QEMU_PACKED; + +typedef struct PVSCSICmdDescResetDevice PVSCSICmdDescResetDevice; + +/* + * Command descriptor for PVSCSI_CMD_ABORT_CMD -- + * + * - currently does not support specifying the LUN. + * - pad should be 0. + */ + +struct PVSCSICmdDescAbortCmd { + uint64_t context; + uint32_t target; + uint32_t pad; +} QEMU_PACKED; + +typedef struct PVSCSICmdDescAbortCmd PVSCSICmdDescAbortCmd; + +/* + * Command descriptor for PVSCSI_CMD_SETUP_RINGS -- + * + * Notes: + * - reqRingNumPages and cmpRingNumPages need to be power of two. + * - reqRingNumPages and cmpRingNumPages need to be different from 0, + * - reqRingNumPages and cmpRingNumPages need to be inferior to + * PVSCSI_SETUP_RINGS_MAX_NUM_PAGES. + */ + +#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 32 +struct PVSCSICmdDescSetupRings { + uint32_t reqRingNumPages; + uint32_t cmpRingNumPages; + uint64_t ringsStatePPN; + uint64_t reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES]; + uint64_t cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES]; +} QEMU_PACKED; + +typedef struct PVSCSICmdDescSetupRings PVSCSICmdDescSetupRings; + +/* + * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING -- + * + * Notes: + * - this command was not supported in the initial revision of the h/w + * interface. Before using it, you need to check that it is supported by + * writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then + * immediately after read the 'command status' register: + * * a value of -1 means that the cmd is NOT supported, + * * a value != -1 means that the cmd IS supported. + * If it's supported the 'command status' register should return: + * sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t). + * - this command should be issued _after_ the usual SETUP_RINGS so that the + * RingsState page is already setup. If not, the command is a nop. + * - numPages needs to be a power of two, + * - numPages needs to be different from 0, + * - pad should be zero. + */ + +#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES 16 + +struct PVSCSICmdDescSetupMsgRing { + uint32_t numPages; + uint32_t pad; + uint64_t ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES]; +} QEMU_PACKED; + +typedef struct PVSCSICmdDescSetupMsgRing PVSCSICmdDescSetupMsgRing; + +enum PVSCSIMsgType { + PVSCSI_MSG_DEV_ADDED = 0, + PVSCSI_MSG_DEV_REMOVED = 1, + PVSCSI_MSG_LAST = 2, +}; + +/* + * Msg descriptor. + * + * sizeof(struct PVSCSIRingMsgDesc) == 128. + * + * - type is of type enum PVSCSIMsgType. + * - the content of args depend on the type of event being delivered. + */ + +struct PVSCSIRingMsgDesc { + uint32_t type; + uint32_t args[31]; +} QEMU_PACKED; + +typedef struct PVSCSIRingMsgDesc PVSCSIRingMsgDesc; + +struct PVSCSIMsgDescDevStatusChanged { + uint32_t type; /* PVSCSI_MSG_DEV _ADDED / _REMOVED */ + uint32_t bus; + uint32_t target; + uint8_t lun[8]; + uint32_t pad[27]; +} QEMU_PACKED; + +typedef struct PVSCSIMsgDescDevStatusChanged PVSCSIMsgDescDevStatusChanged; + +/* + * Rings state. + * + * - the fields: + * . msgProdIdx, + * . msgConsIdx, + * . msgNumEntriesLog2, + * .. are only used once the SETUP_MSG_RING cmd has been issued. + * - 'pad' helps to ensure that the msg related fields are on their own + * cache-line. + */ + +struct PVSCSIRingsState { + uint32_t reqProdIdx; + uint32_t reqConsIdx; + uint32_t reqNumEntriesLog2; + + uint32_t cmpProdIdx; + uint32_t cmpConsIdx; + uint32_t cmpNumEntriesLog2; + + uint8_t pad[104]; + + uint32_t msgProdIdx; + uint32_t msgConsIdx; + uint32_t msgNumEntriesLog2; +} QEMU_PACKED; + +typedef struct PVSCSIRingsState PVSCSIRingsState; + +/* + * Request descriptor. + * + * sizeof(RingReqDesc) = 128 + * + * - context: is a unique identifier of a command. It could normally be any + * 64bit value, however we currently store it in the serialNumber variable + * of struct SCSI_Command, so we have the following restrictions due to the + * way this field is handled in the vmkernel storage stack: + * * this value can't be 0, + * * the upper 32bit need to be 0 since serialNumber is as a uint32_t. + * Currently tracked as PR 292060. + * - dataLen: contains the total number of bytes that need to be transferred. + * - dataAddr: + * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first + * s/g table segment, each s/g segment is entirely contained on a single + * page of physical memory, + * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of + * the buffer used for the DMA transfer, + * - flags: + * * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above, + * * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved, + * * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory, + * * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device, + * * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than + * 16bytes. To be specified. + * - vcpuHint: vcpuId of the processor that will be most likely waiting for the + * completion of the i/o. For guest OSes that use lowest priority message + * delivery mode (such as windows), we use this "hint" to deliver the + * completion action to the proper vcpu. For now, we can use the vcpuId of + * the processor that initiated the i/o as a likely candidate for the vcpu + * that will be waiting for the completion.. + * - bus should be 0: we currently only support bus 0 for now. + * - unused should be zero'd. + */ + +#define PVSCSI_FLAG_CMD_WITH_SG_LIST (1 << 0) +#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB (1 << 1) +#define PVSCSI_FLAG_CMD_DIR_NONE (1 << 2) +#define PVSCSI_FLAG_CMD_DIR_TOHOST (1 << 3) +#define PVSCSI_FLAG_CMD_DIR_TODEVICE (1 << 4) + +#define PVSCSI_KNOWN_FLAGS \ + (PVSCSI_FLAG_CMD_WITH_SG_LIST | \ + PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB | \ + PVSCSI_FLAG_CMD_DIR_NONE | \ + PVSCSI_FLAG_CMD_DIR_TOHOST | \ + PVSCSI_FLAG_CMD_DIR_TODEVICE) + +struct PVSCSIRingReqDesc { + uint64_t context; + uint64_t dataAddr; + uint64_t dataLen; + uint64_t senseAddr; + uint32_t senseLen; + uint32_t flags; + uint8_t cdb[16]; + uint8_t cdbLen; + uint8_t lun[8]; + uint8_t tag; + uint8_t bus; + uint8_t target; + uint8_t vcpuHint; + uint8_t unused[59]; +} QEMU_PACKED; + +typedef struct PVSCSIRingReqDesc PVSCSIRingReqDesc; + +/* + * Scatter-gather list management. + * + * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the + * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g + * table segment. + * + * - each segment of the s/g table contain a succession of struct + * PVSCSISGElement. + * - each segment is entirely contained on a single physical page of memory. + * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in + * PVSCSISGElement.flags and in this case: + * * addr is the PA of the next s/g segment, + * * length is undefined, assumed to be 0. + */ + +struct PVSCSISGElement { + uint64_t addr; + uint32_t length; + uint32_t flags; +} QEMU_PACKED; + +typedef struct PVSCSISGElement PVSCSISGElement; + +/* + * Completion descriptor. + * + * sizeof(RingCmpDesc) = 32 + * + * - context: identifier of the command. The same thing that was specified + * under "context" as part of struct RingReqDesc at initiation time, + * - dataLen: number of bytes transferred for the actual i/o operation, + * - senseLen: number of bytes written into the sense buffer, + * - hostStatus: adapter status, + * - scsiStatus: device status, + * - pad should be zero. + */ + +struct PVSCSIRingCmpDesc { + uint64_t context; + uint64_t dataLen; + uint32_t senseLen; + uint16_t hostStatus; + uint16_t scsiStatus; + uint32_t pad[2]; +} QEMU_PACKED; + +typedef struct PVSCSIRingCmpDesc PVSCSIRingCmpDesc; + +/* + * Interrupt status / IRQ bits. + */ + +#define PVSCSI_INTR_CMPL_0 (1 << 0) +#define PVSCSI_INTR_CMPL_1 (1 << 1) +#define PVSCSI_INTR_CMPL_MASK MASK(2) + +#define PVSCSI_INTR_MSG_0 (1 << 2) +#define PVSCSI_INTR_MSG_1 (1 << 3) +#define PVSCSI_INTR_MSG_MASK (MASK(2) << 2) + +#define PVSCSI_INTR_ALL_SUPPORTED MASK(4) + +/* + * Number of MSI-X vectors supported. + */ +#define PVSCSI_MAX_INTRS 24 + +/* + * Enumeration of supported MSI-X vectors + */ +#define PVSCSI_VECTOR_COMPLETION 0 + +/* + * Misc constants for the rings. + */ + +#define PVSCSI_MAX_NUM_PAGES_REQ_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES +#define PVSCSI_MAX_NUM_PAGES_CMP_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES +#define PVSCSI_MAX_NUM_PAGES_MSG_RING PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES + +#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \ + (VMW_PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc)) + +#define PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE \ + (VMW_PAGE_SIZE / sizeof(PVSCSIRingCmpDesc)) + +#define PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE \ + (VMW_PAGE_SIZE / sizeof(PVSCSIRingMsgDesc)) + +#define PVSCSI_MAX_REQ_QUEUE_DEPTH \ + (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE) + +#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES 1 +#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1 +#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES 2 +#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES 2 +#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES 2 + +enum PVSCSIMemSpace { + PVSCSI_MEM_SPACE_COMMAND_PAGE = 0, + PVSCSI_MEM_SPACE_INTR_STATUS_PAGE = 1, + PVSCSI_MEM_SPACE_MISC_PAGE = 2, + PVSCSI_MEM_SPACE_KICK_IO_PAGE = 4, + PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE = 6, + PVSCSI_MEM_SPACE_MSIX_PBA_PAGE = 7, +}; + +#define PVSCSI_MEM_SPACE_NUM_PAGES \ + (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES + \ + PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES + \ + PVSCSI_MEM_SPACE_MISC_NUM_PAGES + \ + PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES + \ + PVSCSI_MEM_SPACE_MSIX_NUM_PAGES) + +#define PVSCSI_MEM_SPACE_SIZE (PVSCSI_MEM_SPACE_NUM_PAGES * VMW_PAGE_SIZE) + +#endif /* VMW_PVSCSI_H */ |