diff options
Diffstat (limited to 'qemu/hw/block')
-rw-r--r-- | qemu/hw/block/Makefile.objs | 15 | ||||
-rw-r--r-- | qemu/hw/block/block.c | 91 | ||||
-rw-r--r-- | qemu/hw/block/cdrom.c | 155 | ||||
-rw-r--r-- | qemu/hw/block/dataplane/Makefile.objs | 1 | ||||
-rw-r--r-- | qemu/hw/block/dataplane/virtio-blk.c | 341 | ||||
-rw-r--r-- | qemu/hw/block/dataplane/virtio-blk.h | 30 | ||||
-rw-r--r-- | qemu/hw/block/ecc.c | 90 | ||||
-rw-r--r-- | qemu/hw/block/fdc.c | 2529 | ||||
-rw-r--r-- | qemu/hw/block/hd-geometry.c | 165 | ||||
-rw-r--r-- | qemu/hw/block/m25p80.c | 711 | ||||
-rw-r--r-- | qemu/hw/block/nand.c | 799 | ||||
-rw-r--r-- | qemu/hw/block/nvme.c | 967 | ||||
-rw-r--r-- | qemu/hw/block/nvme.h | 712 | ||||
-rw-r--r-- | qemu/hw/block/onenand.c | 848 | ||||
-rw-r--r-- | qemu/hw/block/pflash_cfi01.c | 954 | ||||
-rw-r--r-- | qemu/hw/block/pflash_cfi02.c | 795 | ||||
-rw-r--r-- | qemu/hw/block/tc58128.c | 180 | ||||
-rw-r--r-- | qemu/hw/block/virtio-blk.c | 1012 | ||||
-rw-r--r-- | qemu/hw/block/xen_blkif.h | 115 | ||||
-rw-r--r-- | qemu/hw/block/xen_disk.c | 1106 |
20 files changed, 11616 insertions, 0 deletions
diff --git a/qemu/hw/block/Makefile.objs b/qemu/hw/block/Makefile.objs new file mode 100644 index 000000000..d4c3ab758 --- /dev/null +++ b/qemu/hw/block/Makefile.objs @@ -0,0 +1,15 @@ +common-obj-y += block.o cdrom.o hd-geometry.o +common-obj-$(CONFIG_FDC) += fdc.o +common-obj-$(CONFIG_SSI_M25P80) += m25p80.o +common-obj-$(CONFIG_NAND) += nand.o +common-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o +common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o +common-obj-$(CONFIG_XEN_BACKEND) += xen_disk.o +common-obj-$(CONFIG_ECC) += ecc.o +common-obj-$(CONFIG_ONENAND) += onenand.o +common-obj-$(CONFIG_NVME_PCI) += nvme.o + +obj-$(CONFIG_SH4) += tc58128.o + +obj-$(CONFIG_VIRTIO) += virtio-blk.o +obj-$(CONFIG_VIRTIO) += dataplane/ diff --git a/qemu/hw/block/block.c b/qemu/hw/block/block.c new file mode 100644 index 000000000..f7243e5b9 --- /dev/null +++ b/qemu/hw/block/block.c @@ -0,0 +1,91 @@ +/* + * Common code for block device models + * + * Copyright (C) 2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "sysemu/blockdev.h" +#include "sysemu/block-backend.h" +#include "hw/block/block.h" +#include "qemu/error-report.h" + +void blkconf_serial(BlockConf *conf, char **serial) +{ + DriveInfo *dinfo; + + if (!*serial) { + /* try to fall back to value set with legacy -drive serial=... */ + dinfo = blk_legacy_dinfo(conf->blk); + if (dinfo) { + *serial = g_strdup(dinfo->serial); + } + } +} + +void blkconf_blocksizes(BlockConf *conf) +{ + BlockBackend *blk = conf->blk; + BlockSizes blocksizes; + int backend_ret; + + backend_ret = blk_probe_blocksizes(blk, &blocksizes); + /* fill in detected values if they are not defined via qemu command line */ + if (!conf->physical_block_size) { + if (!backend_ret) { + conf->physical_block_size = blocksizes.phys; + } else { + conf->physical_block_size = BDRV_SECTOR_SIZE; + } + } + if (!conf->logical_block_size) { + if (!backend_ret) { + conf->logical_block_size = blocksizes.log; + } else { + conf->logical_block_size = BDRV_SECTOR_SIZE; + } + } +} + +void blkconf_geometry(BlockConf *conf, int *ptrans, + unsigned cyls_max, unsigned heads_max, unsigned secs_max, + Error **errp) +{ + DriveInfo *dinfo; + + if (!conf->cyls && !conf->heads && !conf->secs) { + /* try to fall back to value set with legacy -drive cyls=... */ + dinfo = blk_legacy_dinfo(conf->blk); + if (dinfo) { + conf->cyls = dinfo->cyls; + conf->heads = dinfo->heads; + conf->secs = dinfo->secs; + if (ptrans) { + *ptrans = dinfo->trans; + } + } + } + if (!conf->cyls && !conf->heads && !conf->secs) { + hd_geometry_guess(conf->blk, + &conf->cyls, &conf->heads, &conf->secs, + ptrans); + } else if (ptrans && *ptrans == BIOS_ATA_TRANSLATION_AUTO) { + *ptrans = hd_bios_chs_auto_trans(conf->cyls, conf->heads, conf->secs); + } + if (conf->cyls || conf->heads || conf->secs) { + if (conf->cyls < 1 || conf->cyls > cyls_max) { + error_setg(errp, "cyls must be between 1 and %u", cyls_max); + return; + } + if (conf->heads < 1 || conf->heads > heads_max) { + error_setg(errp, "heads must be between 1 and %u", heads_max); + return; + } + if (conf->secs < 1 || conf->secs > secs_max) { + error_setg(errp, "secs must be between 1 and %u", secs_max); + return; + } + } +} diff --git a/qemu/hw/block/cdrom.c b/qemu/hw/block/cdrom.c new file mode 100644 index 000000000..4e1019c89 --- /dev/null +++ b/qemu/hw/block/cdrom.c @@ -0,0 +1,155 @@ +/* + * QEMU ATAPI CD-ROM Emulator + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* ??? Most of the ATAPI emulation is still in ide.c. It should be moved + here. */ + +#include "qemu-common.h" +#include "hw/scsi/scsi.h" + +static void lba_to_msf(uint8_t *buf, int lba) +{ + lba += 150; + buf[0] = (lba / 75) / 60; + buf[1] = (lba / 75) % 60; + buf[2] = lba % 75; +} + +/* same toc as bochs. Return -1 if error or the toc length */ +/* XXX: check this */ +int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track) +{ + uint8_t *q; + int len; + + if (start_track > 1 && start_track != 0xaa) + return -1; + q = buf + 2; + *q++ = 1; /* first session */ + *q++ = 1; /* last session */ + if (start_track <= 1) { + *q++ = 0; /* reserved */ + *q++ = 0x14; /* ADR, control */ + *q++ = 1; /* track number */ + *q++ = 0; /* reserved */ + if (msf) { + *q++ = 0; /* reserved */ + lba_to_msf(q, 0); + q += 3; + } else { + /* sector 0 */ + stl_be_p(q, 0); + q += 4; + } + } + /* lead out track */ + *q++ = 0; /* reserved */ + *q++ = 0x16; /* ADR, control */ + *q++ = 0xaa; /* track number */ + *q++ = 0; /* reserved */ + if (msf) { + *q++ = 0; /* reserved */ + lba_to_msf(q, nb_sectors); + q += 3; + } else { + stl_be_p(q, nb_sectors); + q += 4; + } + len = q - buf; + stw_be_p(buf, len - 2); + return len; +} + +/* mostly same info as PearPc */ +int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num) +{ + uint8_t *q; + int len; + + q = buf + 2; + *q++ = 1; /* first session */ + *q++ = 1; /* last session */ + + *q++ = 1; /* session number */ + *q++ = 0x14; /* data track */ + *q++ = 0; /* track number */ + *q++ = 0xa0; /* lead-in */ + *q++ = 0; /* min */ + *q++ = 0; /* sec */ + *q++ = 0; /* frame */ + *q++ = 0; + *q++ = 1; /* first track */ + *q++ = 0x00; /* disk type */ + *q++ = 0x00; + + *q++ = 1; /* session number */ + *q++ = 0x14; /* data track */ + *q++ = 0; /* track number */ + *q++ = 0xa1; + *q++ = 0; /* min */ + *q++ = 0; /* sec */ + *q++ = 0; /* frame */ + *q++ = 0; + *q++ = 1; /* last track */ + *q++ = 0x00; + *q++ = 0x00; + + *q++ = 1; /* session number */ + *q++ = 0x14; /* data track */ + *q++ = 0; /* track number */ + *q++ = 0xa2; /* lead-out */ + *q++ = 0; /* min */ + *q++ = 0; /* sec */ + *q++ = 0; /* frame */ + if (msf) { + *q++ = 0; /* reserved */ + lba_to_msf(q, nb_sectors); + q += 3; + } else { + stl_be_p(q, nb_sectors); + q += 4; + } + + *q++ = 1; /* session number */ + *q++ = 0x14; /* ADR, control */ + *q++ = 0; /* track number */ + *q++ = 1; /* point */ + *q++ = 0; /* min */ + *q++ = 0; /* sec */ + *q++ = 0; /* frame */ + if (msf) { + *q++ = 0; + lba_to_msf(q, 0); + q += 3; + } else { + *q++ = 0; + *q++ = 0; + *q++ = 0; + *q++ = 0; + } + + len = q - buf; + stw_be_p(buf, len - 2); + return len; +} diff --git a/qemu/hw/block/dataplane/Makefile.objs b/qemu/hw/block/dataplane/Makefile.objs new file mode 100644 index 000000000..e786f6642 --- /dev/null +++ b/qemu/hw/block/dataplane/Makefile.objs @@ -0,0 +1 @@ +obj-y += virtio-blk.o diff --git a/qemu/hw/block/dataplane/virtio-blk.c b/qemu/hw/block/dataplane/virtio-blk.c new file mode 100644 index 000000000..6106e4615 --- /dev/null +++ b/qemu/hw/block/dataplane/virtio-blk.c @@ -0,0 +1,341 @@ +/* + * Dedicated thread for virtio-blk I/O processing + * + * Copyright 2012 IBM, Corp. + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "trace.h" +#include "qemu/iov.h" +#include "qemu/thread.h" +#include "qemu/error-report.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/dataplane/vring.h" +#include "hw/virtio/dataplane/vring-accessors.h" +#include "sysemu/block-backend.h" +#include "hw/virtio/virtio-blk.h" +#include "virtio-blk.h" +#include "block/aio.h" +#include "hw/virtio/virtio-bus.h" +#include "qom/object_interfaces.h" + +struct VirtIOBlockDataPlane { + bool started; + bool starting; + bool stopping; + bool disabled; + + VirtIOBlkConf *conf; + + VirtIODevice *vdev; + Vring vring; /* virtqueue vring */ + EventNotifier *guest_notifier; /* irq */ + QEMUBH *bh; /* bh for guest notification */ + + /* Note that these EventNotifiers are assigned by value. This is + * fine as long as you do not call event_notifier_cleanup on them + * (because you don't own the file descriptor or handle; you just + * use it). + */ + IOThread *iothread; + IOThread internal_iothread_obj; + AioContext *ctx; + EventNotifier host_notifier; /* doorbell */ + + /* Operation blocker on BDS */ + Error *blocker; + void (*saved_complete_request)(struct VirtIOBlockReq *req, + unsigned char status); +}; + +/* Raise an interrupt to signal guest, if necessary */ +static void notify_guest(VirtIOBlockDataPlane *s) +{ + if (!vring_should_notify(s->vdev, &s->vring)) { + return; + } + + event_notifier_set(s->guest_notifier); +} + +static void notify_guest_bh(void *opaque) +{ + VirtIOBlockDataPlane *s = opaque; + + notify_guest(s); +} + +static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) +{ + VirtIOBlockDataPlane *s = req->dev->dataplane; + stb_p(&req->in->status, status); + + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); + + /* Suppress notification to guest by BH and its scheduled + * flag because requests are completed as a batch after io + * plug & unplug is introduced, and the BH can still be + * executed in dataplane aio context even after it is + * stopped, so needn't worry about notification loss with BH. + */ + qemu_bh_schedule(s->bh); +} + +static void handle_notify(EventNotifier *e) +{ + VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, + host_notifier); + VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); + + event_notifier_test_and_clear(&s->host_notifier); + blk_io_plug(s->conf->conf.blk); + for (;;) { + MultiReqBuffer mrb = {}; + int ret; + + /* Disable guest->host notifies to avoid unnecessary vmexits */ + vring_disable_notification(s->vdev, &s->vring); + + for (;;) { + VirtIOBlockReq *req = virtio_blk_alloc_request(vblk); + + ret = vring_pop(s->vdev, &s->vring, &req->elem); + if (ret < 0) { + virtio_blk_free_request(req); + break; /* no more requests */ + } + + trace_virtio_blk_data_plane_process_request(s, req->elem.out_num, + req->elem.in_num, + req->elem.index); + + virtio_blk_handle_request(req, &mrb); + } + + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->conf->conf.blk, &mrb); + } + + if (likely(ret == -EAGAIN)) { /* vring emptied */ + /* Re-enable guest->host notifies and stop processing the vring. + * But if the guest has snuck in more descriptors, keep processing. + */ + if (vring_enable_notification(s->vdev, &s->vring)) { + break; + } + } else { /* fatal error */ + break; + } + } + blk_io_unplug(s->conf->conf.blk); +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + VirtIOBlockDataPlane **dataplane, + Error **errp) +{ + VirtIOBlockDataPlane *s; + Error *local_err = NULL; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + + *dataplane = NULL; + + if (!conf->data_plane && !conf->iothread) { + return; + } + + /* Don't try if transport does not support notifiers. */ + if (!k->set_guest_notifiers || !k->set_host_notifier) { + error_setg(errp, + "device is incompatible with x-data-plane " + "(transport does not support notifiers)"); + return; + } + + /* If dataplane is (re-)enabled while the guest is running there could be + * block jobs that can conflict. + */ + if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, + &local_err)) { + error_setg(errp, "cannot start dataplane thread: %s", + error_get_pretty(local_err)); + error_free(local_err); + return; + } + + s = g_new0(VirtIOBlockDataPlane, 1); + s->vdev = vdev; + s->conf = conf; + + if (conf->iothread) { + s->iothread = conf->iothread; + object_ref(OBJECT(s->iothread)); + } else { + /* Create per-device IOThread if none specified. This is for + * x-data-plane option compatibility. If x-data-plane is removed we + * can drop this. + */ + object_initialize(&s->internal_iothread_obj, + sizeof(s->internal_iothread_obj), + TYPE_IOTHREAD); + user_creatable_complete(OBJECT(&s->internal_iothread_obj), &error_abort); + s->iothread = &s->internal_iothread_obj; + } + s->ctx = iothread_get_aio_context(s->iothread); + s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); + + error_setg(&s->blocker, "block device is in use by data plane"); + blk_op_block_all(conf->conf.blk, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_RESIZE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_CHANGE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_SOURCE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_TARGET, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EJECT, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, + s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_MIRROR, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_STREAM, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_REPLACE, s->blocker); + + *dataplane = s; +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) +{ + if (!s) { + return; + } + + virtio_blk_data_plane_stop(s); + blk_op_unblock_all(s->conf->conf.blk, s->blocker); + error_free(s->blocker); + qemu_bh_delete(s->bh); + object_unref(OBJECT(s->iothread)); + g_free(s); +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); + VirtQueue *vq; + int r; + + if (s->started || s->disabled) { + return; + } + + if (s->starting) { + return; + } + + s->starting = true; + + vq = virtio_get_queue(s->vdev, 0); + if (!vring_setup(&s->vring, s->vdev, 0)) { + goto fail_vring; + } + + /* Set up guest notifier (irq) */ + r = k->set_guest_notifiers(qbus->parent, 1, true); + if (r != 0) { + fprintf(stderr, "virtio-blk failed to set guest notifier (%d), " + "ensure -enable-kvm is set\n", r); + goto fail_guest_notifiers; + } + s->guest_notifier = virtio_queue_get_guest_notifier(vq); + + /* Set up virtqueue notify */ + r = k->set_host_notifier(qbus->parent, 0, true); + if (r != 0) { + fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); + goto fail_host_notifier; + } + s->host_notifier = *virtio_queue_get_host_notifier(vq); + + s->saved_complete_request = vblk->complete_request; + vblk->complete_request = complete_request_vring; + + s->starting = false; + s->started = true; + trace_virtio_blk_data_plane_start(s); + + blk_set_aio_context(s->conf->conf.blk, s->ctx); + + /* Kick right away to begin processing requests already in vring */ + event_notifier_set(virtio_queue_get_host_notifier(vq)); + + /* Get this show started by hooking up our callbacks */ + aio_context_acquire(s->ctx); + aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); + aio_context_release(s->ctx); + return; + + fail_host_notifier: + k->set_guest_notifiers(qbus->parent, 1, false); + fail_guest_notifiers: + vring_teardown(&s->vring, s->vdev, 0); + s->disabled = true; + fail_vring: + s->starting = false; +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); + + + /* Better luck next time. */ + if (s->disabled) { + s->disabled = false; + return; + } + if (!s->started || s->stopping) { + return; + } + s->stopping = true; + vblk->complete_request = s->saved_complete_request; + trace_virtio_blk_data_plane_stop(s); + + aio_context_acquire(s->ctx); + + /* Stop notifications for new requests from guest */ + aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); + + /* Drain and switch bs back to the QEMU main loop */ + blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); + + aio_context_release(s->ctx); + + /* Sync vring state back to virtqueue so that non-dataplane request + * processing can continue when we disable the host notifier below. + */ + vring_teardown(&s->vring, s->vdev, 0); + + k->set_host_notifier(qbus->parent, 0, false); + + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, 1, false); + + s->started = false; + s->stopping = false; +} diff --git a/qemu/hw/block/dataplane/virtio-blk.h b/qemu/hw/block/dataplane/virtio-blk.h new file mode 100644 index 000000000..c88d40e72 --- /dev/null +++ b/qemu/hw/block/dataplane/virtio-blk.h @@ -0,0 +1,30 @@ +/* + * Dedicated thread for virtio-blk I/O processing + * + * Copyright 2012 IBM, Corp. + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_DATAPLANE_VIRTIO_BLK_H +#define HW_DATAPLANE_VIRTIO_BLK_H + +#include "hw/virtio/virtio.h" + +typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; + +void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + VirtIOBlockDataPlane **dataplane, + Error **errp); +void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s); + +#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ diff --git a/qemu/hw/block/ecc.c b/qemu/hw/block/ecc.c new file mode 100644 index 000000000..10bb23308 --- /dev/null +++ b/qemu/hw/block/ecc.c @@ -0,0 +1,90 @@ +/* + * Calculate Error-correcting Codes. Used by NAND Flash controllers + * (not by NAND chips). + * + * Copyright (c) 2006 Openedhand Ltd. + * Written by Andrzej Zaborowski <balrog@zabor.org> + * + * This code is licensed under the GNU GPL v2. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "hw/hw.h" +#include "hw/block/flash.h" + +/* + * Pre-calculated 256-way 1 byte column parity. Table borrowed from Linux. + */ +static const uint8_t nand_ecc_precalc_table[] = { + 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, + 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00, + 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, + 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, + 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, + 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, + 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, + 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, + 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, + 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, + 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, + 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, + 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, + 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, + 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, + 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, + 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, + 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, + 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, + 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, + 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, + 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, + 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, + 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, + 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, + 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, + 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, + 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, + 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, + 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, + 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, + 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00, +}; + +/* Update ECC parity count. */ +uint8_t ecc_digest(ECCState *s, uint8_t sample) +{ + uint8_t idx = nand_ecc_precalc_table[sample]; + + s->cp ^= idx & 0x3f; + if (idx & 0x40) { + s->lp[0] ^= ~s->count; + s->lp[1] ^= s->count; + } + s->count ++; + + return sample; +} + +/* Reinitialise the counters. */ +void ecc_reset(ECCState *s) +{ + s->lp[0] = 0x0000; + s->lp[1] = 0x0000; + s->cp = 0x00; + s->count = 0; +} + +/* Save/restore */ +VMStateDescription vmstate_ecc_state = { + .name = "ecc-state", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT8(cp, ECCState), + VMSTATE_UINT16_ARRAY(lp, ECCState, 2), + VMSTATE_UINT16(count, ECCState), + VMSTATE_END_OF_LIST(), + }, +}; diff --git a/qemu/hw/block/fdc.c b/qemu/hw/block/fdc.c new file mode 100644 index 000000000..5e1b67ee4 --- /dev/null +++ b/qemu/hw/block/fdc.c @@ -0,0 +1,2529 @@ +/* + * QEMU Floppy disk emulator (Intel 82078) + * + * Copyright (c) 2003, 2007 Jocelyn Mayer + * Copyright (c) 2008 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/* + * The controller is used in Sun4m systems in a slightly different + * way. There are changes in DOR register and DMA is not available. + */ + +#include "hw/hw.h" +#include "hw/block/fdc.h" +#include "qemu/error-report.h" +#include "qemu/timer.h" +#include "hw/isa/isa.h" +#include "hw/sysbus.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "sysemu/sysemu.h" +#include "qemu/log.h" + +/********************************************************/ +/* debug Floppy devices */ +//#define DEBUG_FLOPPY + +#ifdef DEBUG_FLOPPY +#define FLOPPY_DPRINTF(fmt, ...) \ + do { printf("FLOPPY: " fmt , ## __VA_ARGS__); } while (0) +#else +#define FLOPPY_DPRINTF(fmt, ...) +#endif + +/********************************************************/ +/* Floppy drive emulation */ + +typedef enum FDriveRate { + FDRIVE_RATE_500K = 0x00, /* 500 Kbps */ + FDRIVE_RATE_300K = 0x01, /* 300 Kbps */ + FDRIVE_RATE_250K = 0x02, /* 250 Kbps */ + FDRIVE_RATE_1M = 0x03, /* 1 Mbps */ +} FDriveRate; + +typedef struct FDFormat { + FDriveType drive; + uint8_t last_sect; + uint8_t max_track; + uint8_t max_head; + FDriveRate rate; +} FDFormat; + +static const FDFormat fd_formats[] = { + /* First entry is default format */ + /* 1.44 MB 3"1/2 floppy disks */ + { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, }, + /* 2.88 MB 3"1/2 floppy disks */ + { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, }, + { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, }, + { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, }, + { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, }, + { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, }, + /* 720 kB 3"1/2 floppy disks */ + { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, }, + /* 1.2 MB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, }, + { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, }, + /* 720 kB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, }, + /* 360 kB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, }, + { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, }, + { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, }, + { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, }, + /* 320 kB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, }, + { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, }, + /* 360 kB must match 5"1/4 better than 3"1/2... */ + { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, }, + /* end */ + { FDRIVE_DRV_NONE, -1, -1, 0, 0, }, +}; + +static void pick_geometry(BlockBackend *blk, int *nb_heads, + int *max_track, int *last_sect, + FDriveType drive_in, FDriveType *drive, + FDriveRate *rate) +{ + const FDFormat *parse; + uint64_t nb_sectors, size; + int i, first_match, match; + + blk_get_geometry(blk, &nb_sectors); + match = -1; + first_match = -1; + for (i = 0; ; i++) { + parse = &fd_formats[i]; + if (parse->drive == FDRIVE_DRV_NONE) { + break; + } + if (drive_in == parse->drive || + drive_in == FDRIVE_DRV_NONE) { + size = (parse->max_head + 1) * parse->max_track * + parse->last_sect; + if (nb_sectors == size) { + match = i; + break; + } + if (first_match == -1) { + first_match = i; + } + } + } + if (match == -1) { + if (first_match == -1) { + match = 1; + } else { + match = first_match; + } + parse = &fd_formats[match]; + } + *nb_heads = parse->max_head + 1; + *max_track = parse->max_track; + *last_sect = parse->last_sect; + *drive = parse->drive; + *rate = parse->rate; +} + +#define GET_CUR_DRV(fdctrl) ((fdctrl)->cur_drv) +#define SET_CUR_DRV(fdctrl, drive) ((fdctrl)->cur_drv = (drive)) + +/* Will always be a fixed parameter for us */ +#define FD_SECTOR_LEN 512 +#define FD_SECTOR_SC 2 /* Sector size code */ +#define FD_RESET_SENSEI_COUNT 4 /* Number of sense interrupts on RESET */ + +typedef struct FDCtrl FDCtrl; + +/* Floppy disk drive emulation */ +typedef enum FDiskFlags { + FDISK_DBL_SIDES = 0x01, +} FDiskFlags; + +typedef struct FDrive { + FDCtrl *fdctrl; + BlockBackend *blk; + /* Drive status */ + FDriveType drive; + uint8_t perpendicular; /* 2.88 MB access mode */ + /* Position */ + uint8_t head; + uint8_t track; + uint8_t sect; + /* Media */ + FDiskFlags flags; + uint8_t last_sect; /* Nb sector per track */ + uint8_t max_track; /* Nb of tracks */ + uint16_t bps; /* Bytes per sector */ + uint8_t ro; /* Is read-only */ + uint8_t media_changed; /* Is media changed */ + uint8_t media_rate; /* Data rate of medium */ +} FDrive; + +static void fd_init(FDrive *drv) +{ + /* Drive */ + drv->drive = FDRIVE_DRV_NONE; + drv->perpendicular = 0; + /* Disk */ + drv->last_sect = 0; + drv->max_track = 0; +} + +#define NUM_SIDES(drv) ((drv)->flags & FDISK_DBL_SIDES ? 2 : 1) + +static int fd_sector_calc(uint8_t head, uint8_t track, uint8_t sect, + uint8_t last_sect, uint8_t num_sides) +{ + return (((track * num_sides) + head) * last_sect) + sect - 1; +} + +/* Returns current position, in sectors, for given drive */ +static int fd_sector(FDrive *drv) +{ + return fd_sector_calc(drv->head, drv->track, drv->sect, drv->last_sect, + NUM_SIDES(drv)); +} + +/* Seek to a new position: + * returns 0 if already on right track + * returns 1 if track changed + * returns 2 if track is invalid + * returns 3 if sector is invalid + * returns 4 if seek is disabled + */ +static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect, + int enable_seek) +{ + uint32_t sector; + int ret; + + if (track > drv->max_track || + (head != 0 && (drv->flags & FDISK_DBL_SIDES) == 0)) { + FLOPPY_DPRINTF("try to read %d %02x %02x (max=%d %d %02x %02x)\n", + head, track, sect, 1, + (drv->flags & FDISK_DBL_SIDES) == 0 ? 0 : 1, + drv->max_track, drv->last_sect); + return 2; + } + if (sect > drv->last_sect) { + FLOPPY_DPRINTF("try to read %d %02x %02x (max=%d %d %02x %02x)\n", + head, track, sect, 1, + (drv->flags & FDISK_DBL_SIDES) == 0 ? 0 : 1, + drv->max_track, drv->last_sect); + return 3; + } + sector = fd_sector_calc(head, track, sect, drv->last_sect, NUM_SIDES(drv)); + ret = 0; + if (sector != fd_sector(drv)) { +#if 0 + if (!enable_seek) { + FLOPPY_DPRINTF("error: no implicit seek %d %02x %02x" + " (max=%d %02x %02x)\n", + head, track, sect, 1, drv->max_track, + drv->last_sect); + return 4; + } +#endif + drv->head = head; + if (drv->track != track) { + if (drv->blk != NULL && blk_is_inserted(drv->blk)) { + drv->media_changed = 0; + } + ret = 1; + } + drv->track = track; + drv->sect = sect; + } + + if (drv->blk == NULL || !blk_is_inserted(drv->blk)) { + ret = 2; + } + + return ret; +} + +/* Set drive back to track 0 */ +static void fd_recalibrate(FDrive *drv) +{ + FLOPPY_DPRINTF("recalibrate\n"); + fd_seek(drv, 0, 0, 1, 1); +} + +/* Revalidate a disk drive after a disk change */ +static void fd_revalidate(FDrive *drv) +{ + int nb_heads, max_track, last_sect, ro; + FDriveType drive; + FDriveRate rate; + + FLOPPY_DPRINTF("revalidate\n"); + if (drv->blk != NULL) { + ro = blk_is_read_only(drv->blk); + pick_geometry(drv->blk, &nb_heads, &max_track, + &last_sect, drv->drive, &drive, &rate); + if (!blk_is_inserted(drv->blk)) { + FLOPPY_DPRINTF("No disk in drive\n"); + } else { + FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads, + max_track, last_sect, ro ? "ro" : "rw"); + } + if (nb_heads == 1) { + drv->flags &= ~FDISK_DBL_SIDES; + } else { + drv->flags |= FDISK_DBL_SIDES; + } + drv->max_track = max_track; + drv->last_sect = last_sect; + drv->ro = ro; + drv->drive = drive; + drv->media_rate = rate; + } else { + FLOPPY_DPRINTF("No drive connected\n"); + drv->last_sect = 0; + drv->max_track = 0; + drv->flags &= ~FDISK_DBL_SIDES; + } +} + +/********************************************************/ +/* Intel 82078 floppy disk controller emulation */ + +static void fdctrl_reset(FDCtrl *fdctrl, int do_irq); +static void fdctrl_to_command_phase(FDCtrl *fdctrl); +static int fdctrl_transfer_handler (void *opaque, int nchan, + int dma_pos, int dma_len); +static void fdctrl_raise_irq(FDCtrl *fdctrl); +static FDrive *get_cur_drv(FDCtrl *fdctrl); + +static uint32_t fdctrl_read_statusA(FDCtrl *fdctrl); +static uint32_t fdctrl_read_statusB(FDCtrl *fdctrl); +static uint32_t fdctrl_read_dor(FDCtrl *fdctrl); +static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_tape(FDCtrl *fdctrl); +static void fdctrl_write_tape(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_main_status(FDCtrl *fdctrl); +static void fdctrl_write_rate(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_data(FDCtrl *fdctrl); +static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_dir(FDCtrl *fdctrl); +static void fdctrl_write_ccr(FDCtrl *fdctrl, uint32_t value); + +enum { + FD_DIR_WRITE = 0, + FD_DIR_READ = 1, + FD_DIR_SCANE = 2, + FD_DIR_SCANL = 3, + FD_DIR_SCANH = 4, + FD_DIR_VERIFY = 5, +}; + +enum { + FD_STATE_MULTI = 0x01, /* multi track flag */ + FD_STATE_FORMAT = 0x02, /* format flag */ +}; + +enum { + FD_REG_SRA = 0x00, + FD_REG_SRB = 0x01, + FD_REG_DOR = 0x02, + FD_REG_TDR = 0x03, + FD_REG_MSR = 0x04, + FD_REG_DSR = 0x04, + FD_REG_FIFO = 0x05, + FD_REG_DIR = 0x07, + FD_REG_CCR = 0x07, +}; + +enum { + FD_CMD_READ_TRACK = 0x02, + FD_CMD_SPECIFY = 0x03, + FD_CMD_SENSE_DRIVE_STATUS = 0x04, + FD_CMD_WRITE = 0x05, + FD_CMD_READ = 0x06, + FD_CMD_RECALIBRATE = 0x07, + FD_CMD_SENSE_INTERRUPT_STATUS = 0x08, + FD_CMD_WRITE_DELETED = 0x09, + FD_CMD_READ_ID = 0x0a, + FD_CMD_READ_DELETED = 0x0c, + FD_CMD_FORMAT_TRACK = 0x0d, + FD_CMD_DUMPREG = 0x0e, + FD_CMD_SEEK = 0x0f, + FD_CMD_VERSION = 0x10, + FD_CMD_SCAN_EQUAL = 0x11, + FD_CMD_PERPENDICULAR_MODE = 0x12, + FD_CMD_CONFIGURE = 0x13, + FD_CMD_LOCK = 0x14, + FD_CMD_VERIFY = 0x16, + FD_CMD_POWERDOWN_MODE = 0x17, + FD_CMD_PART_ID = 0x18, + FD_CMD_SCAN_LOW_OR_EQUAL = 0x19, + FD_CMD_SCAN_HIGH_OR_EQUAL = 0x1d, + FD_CMD_SAVE = 0x2e, + FD_CMD_OPTION = 0x33, + FD_CMD_RESTORE = 0x4e, + FD_CMD_DRIVE_SPECIFICATION_COMMAND = 0x8e, + FD_CMD_RELATIVE_SEEK_OUT = 0x8f, + FD_CMD_FORMAT_AND_WRITE = 0xcd, + FD_CMD_RELATIVE_SEEK_IN = 0xcf, +}; + +enum { + FD_CONFIG_PRETRK = 0xff, /* Pre-compensation set to track 0 */ + FD_CONFIG_FIFOTHR = 0x0f, /* FIFO threshold set to 1 byte */ + FD_CONFIG_POLL = 0x10, /* Poll enabled */ + FD_CONFIG_EFIFO = 0x20, /* FIFO disabled */ + FD_CONFIG_EIS = 0x40, /* No implied seeks */ +}; + +enum { + FD_SR0_DS0 = 0x01, + FD_SR0_DS1 = 0x02, + FD_SR0_HEAD = 0x04, + FD_SR0_EQPMT = 0x10, + FD_SR0_SEEK = 0x20, + FD_SR0_ABNTERM = 0x40, + FD_SR0_INVCMD = 0x80, + FD_SR0_RDYCHG = 0xc0, +}; + +enum { + FD_SR1_MA = 0x01, /* Missing address mark */ + FD_SR1_NW = 0x02, /* Not writable */ + FD_SR1_EC = 0x80, /* End of cylinder */ +}; + +enum { + FD_SR2_SNS = 0x04, /* Scan not satisfied */ + FD_SR2_SEH = 0x08, /* Scan equal hit */ +}; + +enum { + FD_SRA_DIR = 0x01, + FD_SRA_nWP = 0x02, + FD_SRA_nINDX = 0x04, + FD_SRA_HDSEL = 0x08, + FD_SRA_nTRK0 = 0x10, + FD_SRA_STEP = 0x20, + FD_SRA_nDRV2 = 0x40, + FD_SRA_INTPEND = 0x80, +}; + +enum { + FD_SRB_MTR0 = 0x01, + FD_SRB_MTR1 = 0x02, + FD_SRB_WGATE = 0x04, + FD_SRB_RDATA = 0x08, + FD_SRB_WDATA = 0x10, + FD_SRB_DR0 = 0x20, +}; + +enum { +#if MAX_FD == 4 + FD_DOR_SELMASK = 0x03, +#else + FD_DOR_SELMASK = 0x01, +#endif + FD_DOR_nRESET = 0x04, + FD_DOR_DMAEN = 0x08, + FD_DOR_MOTEN0 = 0x10, + FD_DOR_MOTEN1 = 0x20, + FD_DOR_MOTEN2 = 0x40, + FD_DOR_MOTEN3 = 0x80, +}; + +enum { +#if MAX_FD == 4 + FD_TDR_BOOTSEL = 0x0c, +#else + FD_TDR_BOOTSEL = 0x04, +#endif +}; + +enum { + FD_DSR_DRATEMASK= 0x03, + FD_DSR_PWRDOWN = 0x40, + FD_DSR_SWRESET = 0x80, +}; + +enum { + FD_MSR_DRV0BUSY = 0x01, + FD_MSR_DRV1BUSY = 0x02, + FD_MSR_DRV2BUSY = 0x04, + FD_MSR_DRV3BUSY = 0x08, + FD_MSR_CMDBUSY = 0x10, + FD_MSR_NONDMA = 0x20, + FD_MSR_DIO = 0x40, + FD_MSR_RQM = 0x80, +}; + +enum { + FD_DIR_DSKCHG = 0x80, +}; + +/* + * See chapter 5.0 "Controller phases" of the spec: + * + * Command phase: + * The host writes a command and its parameters into the FIFO. The command + * phase is completed when all parameters for the command have been supplied, + * and execution phase is entered. + * + * Execution phase: + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO + * contains the payload now, otherwise it's unused. When all bytes of the + * required data have been transferred, the state is switched to either result + * phase (if the command produces status bytes) or directly back into the + * command phase for the next command. + * + * Result phase: + * The host reads out the FIFO, which contains one or more result bytes now. + */ +enum { + /* Only for migration: reconstruct phase from registers like qemu 2.3 */ + FD_PHASE_RECONSTRUCT = 0, + + FD_PHASE_COMMAND = 1, + FD_PHASE_EXECUTION = 2, + FD_PHASE_RESULT = 3, +}; + +#define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) +#define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) + +struct FDCtrl { + MemoryRegion iomem; + qemu_irq irq; + /* Controller state */ + QEMUTimer *result_timer; + int dma_chann; + uint8_t phase; + /* Controller's identification */ + uint8_t version; + /* HW */ + uint8_t sra; + uint8_t srb; + uint8_t dor; + uint8_t dor_vmstate; /* only used as temp during vmstate */ + uint8_t tdr; + uint8_t dsr; + uint8_t msr; + uint8_t cur_drv; + uint8_t status0; + uint8_t status1; + uint8_t status2; + /* Command FIFO */ + uint8_t *fifo; + int32_t fifo_size; + uint32_t data_pos; + uint32_t data_len; + uint8_t data_state; + uint8_t data_dir; + uint8_t eot; /* last wanted sector */ + /* States kept only to be returned back */ + /* precompensation */ + uint8_t precomp_trk; + uint8_t config; + uint8_t lock; + /* Power down config (also with status regB access mode */ + uint8_t pwrd; + /* Floppy drives */ + uint8_t num_floppies; + FDrive drives[MAX_FD]; + int reset_sensei; + uint32_t check_media_rate; + /* Timers state */ + uint8_t timer0; + uint8_t timer1; +}; + +#define TYPE_SYSBUS_FDC "base-sysbus-fdc" +#define SYSBUS_FDC(obj) OBJECT_CHECK(FDCtrlSysBus, (obj), TYPE_SYSBUS_FDC) + +typedef struct FDCtrlSysBus { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + struct FDCtrl state; +} FDCtrlSysBus; + +#define ISA_FDC(obj) OBJECT_CHECK(FDCtrlISABus, (obj), TYPE_ISA_FDC) + +typedef struct FDCtrlISABus { + ISADevice parent_obj; + + uint32_t iobase; + uint32_t irq; + uint32_t dma; + struct FDCtrl state; + int32_t bootindexA; + int32_t bootindexB; +} FDCtrlISABus; + +static uint32_t fdctrl_read (void *opaque, uint32_t reg) +{ + FDCtrl *fdctrl = opaque; + uint32_t retval; + + reg &= 7; + switch (reg) { + case FD_REG_SRA: + retval = fdctrl_read_statusA(fdctrl); + break; + case FD_REG_SRB: + retval = fdctrl_read_statusB(fdctrl); + break; + case FD_REG_DOR: + retval = fdctrl_read_dor(fdctrl); + break; + case FD_REG_TDR: + retval = fdctrl_read_tape(fdctrl); + break; + case FD_REG_MSR: + retval = fdctrl_read_main_status(fdctrl); + break; + case FD_REG_FIFO: + retval = fdctrl_read_data(fdctrl); + break; + case FD_REG_DIR: + retval = fdctrl_read_dir(fdctrl); + break; + default: + retval = (uint32_t)(-1); + break; + } + FLOPPY_DPRINTF("read reg%d: 0x%02x\n", reg & 7, retval); + + return retval; +} + +static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value) +{ + FDCtrl *fdctrl = opaque; + + FLOPPY_DPRINTF("write reg%d: 0x%02x\n", reg & 7, value); + + reg &= 7; + switch (reg) { + case FD_REG_DOR: + fdctrl_write_dor(fdctrl, value); + break; + case FD_REG_TDR: + fdctrl_write_tape(fdctrl, value); + break; + case FD_REG_DSR: + fdctrl_write_rate(fdctrl, value); + break; + case FD_REG_FIFO: + fdctrl_write_data(fdctrl, value); + break; + case FD_REG_CCR: + fdctrl_write_ccr(fdctrl, value); + break; + default: + break; + } +} + +static uint64_t fdctrl_read_mem (void *opaque, hwaddr reg, + unsigned ize) +{ + return fdctrl_read(opaque, (uint32_t)reg); +} + +static void fdctrl_write_mem (void *opaque, hwaddr reg, + uint64_t value, unsigned size) +{ + fdctrl_write(opaque, (uint32_t)reg, value); +} + +static const MemoryRegionOps fdctrl_mem_ops = { + .read = fdctrl_read_mem, + .write = fdctrl_write_mem, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps fdctrl_mem_strict_ops = { + .read = fdctrl_read_mem, + .write = fdctrl_write_mem, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static bool fdrive_media_changed_needed(void *opaque) +{ + FDrive *drive = opaque; + + return (drive->blk != NULL && drive->media_changed != 1); +} + +static const VMStateDescription vmstate_fdrive_media_changed = { + .name = "fdrive/media_changed", + .version_id = 1, + .minimum_version_id = 1, + .needed = fdrive_media_changed_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(media_changed, FDrive), + VMSTATE_END_OF_LIST() + } +}; + +static bool fdrive_media_rate_needed(void *opaque) +{ + FDrive *drive = opaque; + + return drive->fdctrl->check_media_rate; +} + +static const VMStateDescription vmstate_fdrive_media_rate = { + .name = "fdrive/media_rate", + .version_id = 1, + .minimum_version_id = 1, + .needed = fdrive_media_rate_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(media_rate, FDrive), + VMSTATE_END_OF_LIST() + } +}; + +static bool fdrive_perpendicular_needed(void *opaque) +{ + FDrive *drive = opaque; + + return drive->perpendicular != 0; +} + +static const VMStateDescription vmstate_fdrive_perpendicular = { + .name = "fdrive/perpendicular", + .version_id = 1, + .minimum_version_id = 1, + .needed = fdrive_perpendicular_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(perpendicular, FDrive), + VMSTATE_END_OF_LIST() + } +}; + +static int fdrive_post_load(void *opaque, int version_id) +{ + fd_revalidate(opaque); + return 0; +} + +static const VMStateDescription vmstate_fdrive = { + .name = "fdrive", + .version_id = 1, + .minimum_version_id = 1, + .post_load = fdrive_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(head, FDrive), + VMSTATE_UINT8(track, FDrive), + VMSTATE_UINT8(sect, FDrive), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_fdrive_media_changed, + &vmstate_fdrive_media_rate, + &vmstate_fdrive_perpendicular, + NULL + } +}; + +/* + * Reconstructs the phase from register values according to the logic that was + * implemented in qemu 2.3. This is the default value that is used if the phase + * subsection is not present on migration. + * + * Don't change this function to reflect newer qemu versions, it is part of + * the migration ABI. + */ +static int reconstruct_phase(FDCtrl *fdctrl) +{ + if (fdctrl->msr & FD_MSR_NONDMA) { + return FD_PHASE_EXECUTION; + } else if ((fdctrl->msr & FD_MSR_RQM) == 0) { + /* qemu 2.3 disabled RQM only during DMA transfers */ + return FD_PHASE_EXECUTION; + } else if (fdctrl->msr & FD_MSR_DIO) { + return FD_PHASE_RESULT; + } else { + return FD_PHASE_COMMAND; + } +} + +static void fdc_pre_save(void *opaque) +{ + FDCtrl *s = opaque; + + s->dor_vmstate = s->dor | GET_CUR_DRV(s); +} + +static int fdc_pre_load(void *opaque) +{ + FDCtrl *s = opaque; + s->phase = FD_PHASE_RECONSTRUCT; + return 0; +} + +static int fdc_post_load(void *opaque, int version_id) +{ + FDCtrl *s = opaque; + + SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK); + s->dor = s->dor_vmstate & ~FD_DOR_SELMASK; + + if (s->phase == FD_PHASE_RECONSTRUCT) { + s->phase = reconstruct_phase(s); + } + + return 0; +} + +static bool fdc_reset_sensei_needed(void *opaque) +{ + FDCtrl *s = opaque; + + return s->reset_sensei != 0; +} + +static const VMStateDescription vmstate_fdc_reset_sensei = { + .name = "fdc/reset_sensei", + .version_id = 1, + .minimum_version_id = 1, + .needed = fdc_reset_sensei_needed, + .fields = (VMStateField[]) { + VMSTATE_INT32(reset_sensei, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static bool fdc_result_timer_needed(void *opaque) +{ + FDCtrl *s = opaque; + + return timer_pending(s->result_timer); +} + +static const VMStateDescription vmstate_fdc_result_timer = { + .name = "fdc/result_timer", + .version_id = 1, + .minimum_version_id = 1, + .needed = fdc_result_timer_needed, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(result_timer, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static bool fdc_phase_needed(void *opaque) +{ + FDCtrl *fdctrl = opaque; + + return reconstruct_phase(fdctrl) != fdctrl->phase; +} + +static const VMStateDescription vmstate_fdc_phase = { + .name = "fdc/phase", + .version_id = 1, + .minimum_version_id = 1, + .needed = fdc_phase_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(phase, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_fdc = { + .name = "fdc", + .version_id = 2, + .minimum_version_id = 2, + .pre_save = fdc_pre_save, + .pre_load = fdc_pre_load, + .post_load = fdc_post_load, + .fields = (VMStateField[]) { + /* Controller State */ + VMSTATE_UINT8(sra, FDCtrl), + VMSTATE_UINT8(srb, FDCtrl), + VMSTATE_UINT8(dor_vmstate, FDCtrl), + VMSTATE_UINT8(tdr, FDCtrl), + VMSTATE_UINT8(dsr, FDCtrl), + VMSTATE_UINT8(msr, FDCtrl), + VMSTATE_UINT8(status0, FDCtrl), + VMSTATE_UINT8(status1, FDCtrl), + VMSTATE_UINT8(status2, FDCtrl), + /* Command FIFO */ + VMSTATE_VARRAY_INT32(fifo, FDCtrl, fifo_size, 0, vmstate_info_uint8, + uint8_t), + VMSTATE_UINT32(data_pos, FDCtrl), + VMSTATE_UINT32(data_len, FDCtrl), + VMSTATE_UINT8(data_state, FDCtrl), + VMSTATE_UINT8(data_dir, FDCtrl), + VMSTATE_UINT8(eot, FDCtrl), + /* States kept only to be returned back */ + VMSTATE_UINT8(timer0, FDCtrl), + VMSTATE_UINT8(timer1, FDCtrl), + VMSTATE_UINT8(precomp_trk, FDCtrl), + VMSTATE_UINT8(config, FDCtrl), + VMSTATE_UINT8(lock, FDCtrl), + VMSTATE_UINT8(pwrd, FDCtrl), + VMSTATE_UINT8_EQUAL(num_floppies, FDCtrl), + VMSTATE_STRUCT_ARRAY(drives, FDCtrl, MAX_FD, 1, + vmstate_fdrive, FDrive), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_fdc_reset_sensei, + &vmstate_fdc_result_timer, + &vmstate_fdc_phase, + NULL + } +}; + +static void fdctrl_external_reset_sysbus(DeviceState *d) +{ + FDCtrlSysBus *sys = SYSBUS_FDC(d); + FDCtrl *s = &sys->state; + + fdctrl_reset(s, 0); +} + +static void fdctrl_external_reset_isa(DeviceState *d) +{ + FDCtrlISABus *isa = ISA_FDC(d); + FDCtrl *s = &isa->state; + + fdctrl_reset(s, 0); +} + +static void fdctrl_handle_tc(void *opaque, int irq, int level) +{ + //FDCtrl *s = opaque; + + if (level) { + // XXX + FLOPPY_DPRINTF("TC pulsed\n"); + } +} + +/* Change IRQ state */ +static void fdctrl_reset_irq(FDCtrl *fdctrl) +{ + fdctrl->status0 = 0; + if (!(fdctrl->sra & FD_SRA_INTPEND)) + return; + FLOPPY_DPRINTF("Reset interrupt\n"); + qemu_set_irq(fdctrl->irq, 0); + fdctrl->sra &= ~FD_SRA_INTPEND; +} + +static void fdctrl_raise_irq(FDCtrl *fdctrl) +{ + if (!(fdctrl->sra & FD_SRA_INTPEND)) { + qemu_set_irq(fdctrl->irq, 1); + fdctrl->sra |= FD_SRA_INTPEND; + } + + fdctrl->reset_sensei = 0; + FLOPPY_DPRINTF("Set interrupt status to 0x%02x\n", fdctrl->status0); +} + +/* Reset controller */ +static void fdctrl_reset(FDCtrl *fdctrl, int do_irq) +{ + int i; + + FLOPPY_DPRINTF("reset controller\n"); + fdctrl_reset_irq(fdctrl); + /* Initialise controller */ + fdctrl->sra = 0; + fdctrl->srb = 0xc0; + if (!fdctrl->drives[1].blk) { + fdctrl->sra |= FD_SRA_nDRV2; + } + fdctrl->cur_drv = 0; + fdctrl->dor = FD_DOR_nRESET; + fdctrl->dor |= (fdctrl->dma_chann != -1) ? FD_DOR_DMAEN : 0; + fdctrl->msr = FD_MSR_RQM; + fdctrl->reset_sensei = 0; + timer_del(fdctrl->result_timer); + /* FIFO state */ + fdctrl->data_pos = 0; + fdctrl->data_len = 0; + fdctrl->data_state = 0; + fdctrl->data_dir = FD_DIR_WRITE; + for (i = 0; i < MAX_FD; i++) + fd_recalibrate(&fdctrl->drives[i]); + fdctrl_to_command_phase(fdctrl); + if (do_irq) { + fdctrl->status0 |= FD_SR0_RDYCHG; + fdctrl_raise_irq(fdctrl); + fdctrl->reset_sensei = FD_RESET_SENSEI_COUNT; + } +} + +static inline FDrive *drv0(FDCtrl *fdctrl) +{ + return &fdctrl->drives[(fdctrl->tdr & FD_TDR_BOOTSEL) >> 2]; +} + +static inline FDrive *drv1(FDCtrl *fdctrl) +{ + if ((fdctrl->tdr & FD_TDR_BOOTSEL) < (1 << 2)) + return &fdctrl->drives[1]; + else + return &fdctrl->drives[0]; +} + +#if MAX_FD == 4 +static inline FDrive *drv2(FDCtrl *fdctrl) +{ + if ((fdctrl->tdr & FD_TDR_BOOTSEL) < (2 << 2)) + return &fdctrl->drives[2]; + else + return &fdctrl->drives[1]; +} + +static inline FDrive *drv3(FDCtrl *fdctrl) +{ + if ((fdctrl->tdr & FD_TDR_BOOTSEL) < (3 << 2)) + return &fdctrl->drives[3]; + else + return &fdctrl->drives[2]; +} +#endif + +static FDrive *get_cur_drv(FDCtrl *fdctrl) +{ + switch (fdctrl->cur_drv) { + case 0: return drv0(fdctrl); + case 1: return drv1(fdctrl); +#if MAX_FD == 4 + case 2: return drv2(fdctrl); + case 3: return drv3(fdctrl); +#endif + default: return NULL; + } +} + +/* Status A register : 0x00 (read-only) */ +static uint32_t fdctrl_read_statusA(FDCtrl *fdctrl) +{ + uint32_t retval = fdctrl->sra; + + FLOPPY_DPRINTF("status register A: 0x%02x\n", retval); + + return retval; +} + +/* Status B register : 0x01 (read-only) */ +static uint32_t fdctrl_read_statusB(FDCtrl *fdctrl) +{ + uint32_t retval = fdctrl->srb; + + FLOPPY_DPRINTF("status register B: 0x%02x\n", retval); + + return retval; +} + +/* Digital output register : 0x02 */ +static uint32_t fdctrl_read_dor(FDCtrl *fdctrl) +{ + uint32_t retval = fdctrl->dor; + + /* Selected drive */ + retval |= fdctrl->cur_drv; + FLOPPY_DPRINTF("digital output register: 0x%02x\n", retval); + + return retval; +} + +static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) +{ + FLOPPY_DPRINTF("digital output register set to 0x%02x\n", value); + + /* Motors */ + if (value & FD_DOR_MOTEN0) + fdctrl->srb |= FD_SRB_MTR0; + else + fdctrl->srb &= ~FD_SRB_MTR0; + if (value & FD_DOR_MOTEN1) + fdctrl->srb |= FD_SRB_MTR1; + else + fdctrl->srb &= ~FD_SRB_MTR1; + + /* Drive */ + if (value & 1) + fdctrl->srb |= FD_SRB_DR0; + else + fdctrl->srb &= ~FD_SRB_DR0; + + /* Reset */ + if (!(value & FD_DOR_nRESET)) { + if (fdctrl->dor & FD_DOR_nRESET) { + FLOPPY_DPRINTF("controller enter RESET state\n"); + } + } else { + if (!(fdctrl->dor & FD_DOR_nRESET)) { + FLOPPY_DPRINTF("controller out of RESET state\n"); + fdctrl_reset(fdctrl, 1); + fdctrl->dsr &= ~FD_DSR_PWRDOWN; + } + } + /* Selected drive */ + fdctrl->cur_drv = value & FD_DOR_SELMASK; + + fdctrl->dor = value; +} + +/* Tape drive register : 0x03 */ +static uint32_t fdctrl_read_tape(FDCtrl *fdctrl) +{ + uint32_t retval = fdctrl->tdr; + + FLOPPY_DPRINTF("tape drive register: 0x%02x\n", retval); + + return retval; +} + +static void fdctrl_write_tape(FDCtrl *fdctrl, uint32_t value) +{ + /* Reset mode */ + if (!(fdctrl->dor & FD_DOR_nRESET)) { + FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); + return; + } + FLOPPY_DPRINTF("tape drive register set to 0x%02x\n", value); + /* Disk boot selection indicator */ + fdctrl->tdr = value & FD_TDR_BOOTSEL; + /* Tape indicators: never allow */ +} + +/* Main status register : 0x04 (read) */ +static uint32_t fdctrl_read_main_status(FDCtrl *fdctrl) +{ + uint32_t retval = fdctrl->msr; + + fdctrl->dsr &= ~FD_DSR_PWRDOWN; + fdctrl->dor |= FD_DOR_nRESET; + + FLOPPY_DPRINTF("main status register: 0x%02x\n", retval); + + return retval; +} + +/* Data select rate register : 0x04 (write) */ +static void fdctrl_write_rate(FDCtrl *fdctrl, uint32_t value) +{ + /* Reset mode */ + if (!(fdctrl->dor & FD_DOR_nRESET)) { + FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); + return; + } + FLOPPY_DPRINTF("select rate register set to 0x%02x\n", value); + /* Reset: autoclear */ + if (value & FD_DSR_SWRESET) { + fdctrl->dor &= ~FD_DOR_nRESET; + fdctrl_reset(fdctrl, 1); + fdctrl->dor |= FD_DOR_nRESET; + } + if (value & FD_DSR_PWRDOWN) { + fdctrl_reset(fdctrl, 1); + } + fdctrl->dsr = value; +} + +/* Configuration control register: 0x07 (write) */ +static void fdctrl_write_ccr(FDCtrl *fdctrl, uint32_t value) +{ + /* Reset mode */ + if (!(fdctrl->dor & FD_DOR_nRESET)) { + FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); + return; + } + FLOPPY_DPRINTF("configuration control register set to 0x%02x\n", value); + + /* Only the rate selection bits used in AT mode, and we + * store those in the DSR. + */ + fdctrl->dsr = (fdctrl->dsr & ~FD_DSR_DRATEMASK) | + (value & FD_DSR_DRATEMASK); +} + +static int fdctrl_media_changed(FDrive *drv) +{ + return drv->media_changed; +} + +/* Digital input register : 0x07 (read-only) */ +static uint32_t fdctrl_read_dir(FDCtrl *fdctrl) +{ + uint32_t retval = 0; + + if (fdctrl_media_changed(get_cur_drv(fdctrl))) { + retval |= FD_DIR_DSKCHG; + } + if (retval != 0) { + FLOPPY_DPRINTF("Floppy digital input register: 0x%02x\n", retval); + } + + return retval; +} + +/* Clear the FIFO and update the state for receiving the next command */ +static void fdctrl_to_command_phase(FDCtrl *fdctrl) +{ + fdctrl->phase = FD_PHASE_COMMAND; + fdctrl->data_dir = FD_DIR_WRITE; + fdctrl->data_pos = 0; + fdctrl->data_len = 1; /* Accept command byte, adjust for params later */ + fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO); + fdctrl->msr |= FD_MSR_RQM; +} + +/* Update the state to allow the guest to read out the command status. + * @fifo_len is the number of result bytes to be read out. */ +static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len) +{ + fdctrl->phase = FD_PHASE_RESULT; + fdctrl->data_dir = FD_DIR_READ; + fdctrl->data_len = fifo_len; + fdctrl->data_pos = 0; + fdctrl->msr |= FD_MSR_CMDBUSY | FD_MSR_RQM | FD_MSR_DIO; +} + +/* Set an error: unimplemented/unknown command */ +static void fdctrl_unimplemented(FDCtrl *fdctrl, int direction) +{ + qemu_log_mask(LOG_UNIMP, "fdc: unimplemented command 0x%02x\n", + fdctrl->fifo[0]); + fdctrl->fifo[0] = FD_SR0_INVCMD; + fdctrl_to_result_phase(fdctrl, 1); +} + +/* Seek to next sector + * returns 0 when end of track reached (for DBL_SIDES on head 1) + * otherwise returns 1 + */ +static int fdctrl_seek_to_next_sect(FDCtrl *fdctrl, FDrive *cur_drv) +{ + FLOPPY_DPRINTF("seek to next sector (%d %02x %02x => %d)\n", + cur_drv->head, cur_drv->track, cur_drv->sect, + fd_sector(cur_drv)); + /* XXX: cur_drv->sect >= cur_drv->last_sect should be an + error in fact */ + uint8_t new_head = cur_drv->head; + uint8_t new_track = cur_drv->track; + uint8_t new_sect = cur_drv->sect; + + int ret = 1; + + if (new_sect >= cur_drv->last_sect || + new_sect == fdctrl->eot) { + new_sect = 1; + if (FD_MULTI_TRACK(fdctrl->data_state)) { + if (new_head == 0 && + (cur_drv->flags & FDISK_DBL_SIDES) != 0) { + new_head = 1; + } else { + new_head = 0; + new_track++; + fdctrl->status0 |= FD_SR0_SEEK; + if ((cur_drv->flags & FDISK_DBL_SIDES) == 0) { + ret = 0; + } + } + } else { + fdctrl->status0 |= FD_SR0_SEEK; + new_track++; + ret = 0; + } + if (ret == 1) { + FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n", + new_head, new_track, new_sect, fd_sector(cur_drv)); + } + } else { + new_sect++; + } + fd_seek(cur_drv, new_head, new_track, new_sect, 1); + return ret; +} + +/* Callback for transfer end (stop or abort) */ +static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0, + uint8_t status1, uint8_t status2) +{ + FDrive *cur_drv; + cur_drv = get_cur_drv(fdctrl); + + fdctrl->status0 &= ~(FD_SR0_DS0 | FD_SR0_DS1 | FD_SR0_HEAD); + fdctrl->status0 |= GET_CUR_DRV(fdctrl); + if (cur_drv->head) { + fdctrl->status0 |= FD_SR0_HEAD; + } + fdctrl->status0 |= status0; + + FLOPPY_DPRINTF("transfer status: %02x %02x %02x (%02x)\n", + status0, status1, status2, fdctrl->status0); + fdctrl->fifo[0] = fdctrl->status0; + fdctrl->fifo[1] = status1; + fdctrl->fifo[2] = status2; + fdctrl->fifo[3] = cur_drv->track; + fdctrl->fifo[4] = cur_drv->head; + fdctrl->fifo[5] = cur_drv->sect; + fdctrl->fifo[6] = FD_SECTOR_SC; + fdctrl->data_dir = FD_DIR_READ; + if (!(fdctrl->msr & FD_MSR_NONDMA)) { + DMA_release_DREQ(fdctrl->dma_chann); + } + fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO; + fdctrl->msr &= ~FD_MSR_NONDMA; + + fdctrl_to_result_phase(fdctrl, 7); + fdctrl_raise_irq(fdctrl); +} + +/* Prepare a data transfer (either DMA or FIFO) */ +static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + uint8_t kh, kt, ks; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + kt = fdctrl->fifo[2]; + kh = fdctrl->fifo[3]; + ks = fdctrl->fifo[4]; + FLOPPY_DPRINTF("Start transfer at %d %d %02x %02x (%d)\n", + GET_CUR_DRV(fdctrl), kh, kt, ks, + fd_sector_calc(kh, kt, ks, cur_drv->last_sect, + NUM_SIDES(cur_drv))); + switch (fd_seek(cur_drv, kh, kt, ks, fdctrl->config & FD_CONFIG_EIS)) { + case 2: + /* sect too big */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + case 3: + /* track too big */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_EC, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + case 4: + /* No seek enabled */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + case 1: + fdctrl->status0 |= FD_SR0_SEEK; + break; + default: + break; + } + + /* Check the data rate. If the programmed data rate does not match + * the currently inserted medium, the operation has to fail. */ + if (fdctrl->check_media_rate && + (fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) { + FLOPPY_DPRINTF("data rate mismatch (fdc=%d, media=%d)\n", + fdctrl->dsr & FD_DSR_DRATEMASK, cur_drv->media_rate); + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + } + + /* Set the FIFO state */ + fdctrl->data_dir = direction; + fdctrl->data_pos = 0; + assert(fdctrl->msr & FD_MSR_CMDBUSY); + if (fdctrl->fifo[0] & 0x80) + fdctrl->data_state |= FD_STATE_MULTI; + else + fdctrl->data_state &= ~FD_STATE_MULTI; + if (fdctrl->fifo[5] == 0) { + fdctrl->data_len = fdctrl->fifo[8]; + } else { + int tmp; + fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); + tmp = (fdctrl->fifo[6] - ks + 1); + if (fdctrl->fifo[0] & 0x80) + tmp += fdctrl->fifo[6]; + fdctrl->data_len *= tmp; + } + fdctrl->eot = fdctrl->fifo[6]; + if (fdctrl->dor & FD_DOR_DMAEN) { + int dma_mode; + /* DMA transfer are enabled. Check if DMA channel is well programmed */ + dma_mode = DMA_get_channel_mode(fdctrl->dma_chann); + dma_mode = (dma_mode >> 2) & 3; + FLOPPY_DPRINTF("dma_mode=%d direction=%d (%d - %d)\n", + dma_mode, direction, + (128 << fdctrl->fifo[5]) * + (cur_drv->last_sect - ks + 1), fdctrl->data_len); + if (((direction == FD_DIR_SCANE || direction == FD_DIR_SCANL || + direction == FD_DIR_SCANH) && dma_mode == 0) || + (direction == FD_DIR_WRITE && dma_mode == 2) || + (direction == FD_DIR_READ && dma_mode == 1) || + (direction == FD_DIR_VERIFY)) { + /* No access is allowed until DMA transfer has completed */ + fdctrl->msr &= ~FD_MSR_RQM; + if (direction != FD_DIR_VERIFY) { + /* Now, we just have to wait for the DMA controller to + * recall us... + */ + DMA_hold_DREQ(fdctrl->dma_chann); + DMA_schedule(fdctrl->dma_chann); + } else { + /* Start transfer */ + fdctrl_transfer_handler(fdctrl, fdctrl->dma_chann, 0, + fdctrl->data_len); + } + return; + } else { + FLOPPY_DPRINTF("bad dma_mode=%d direction=%d\n", dma_mode, + direction); + } + } + FLOPPY_DPRINTF("start non-DMA transfer\n"); + fdctrl->msr |= FD_MSR_NONDMA | FD_MSR_RQM; + if (direction != FD_DIR_WRITE) + fdctrl->msr |= FD_MSR_DIO; + /* IO based transfer: calculate len */ + fdctrl_raise_irq(fdctrl); +} + +/* Prepare a transfer of deleted data */ +static void fdctrl_start_transfer_del(FDCtrl *fdctrl, int direction) +{ + qemu_log_mask(LOG_UNIMP, "fdctrl_start_transfer_del() unimplemented\n"); + + /* We don't handle deleted data, + * so we don't return *ANYTHING* + */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); +} + +/* handlers for DMA transfers */ +static int fdctrl_transfer_handler (void *opaque, int nchan, + int dma_pos, int dma_len) +{ + FDCtrl *fdctrl; + FDrive *cur_drv; + int len, start_pos, rel_pos; + uint8_t status0 = 0x00, status1 = 0x00, status2 = 0x00; + + fdctrl = opaque; + if (fdctrl->msr & FD_MSR_RQM) { + FLOPPY_DPRINTF("Not in DMA transfer mode !\n"); + return 0; + } + cur_drv = get_cur_drv(fdctrl); + if (fdctrl->data_dir == FD_DIR_SCANE || fdctrl->data_dir == FD_DIR_SCANL || + fdctrl->data_dir == FD_DIR_SCANH) + status2 = FD_SR2_SNS; + if (dma_len > fdctrl->data_len) + dma_len = fdctrl->data_len; + if (cur_drv->blk == NULL) { + if (fdctrl->data_dir == FD_DIR_WRITE) + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); + else + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); + len = 0; + goto transfer_error; + } + rel_pos = fdctrl->data_pos % FD_SECTOR_LEN; + for (start_pos = fdctrl->data_pos; fdctrl->data_pos < dma_len;) { + len = dma_len - fdctrl->data_pos; + if (len + rel_pos > FD_SECTOR_LEN) + len = FD_SECTOR_LEN - rel_pos; + FLOPPY_DPRINTF("copy %d bytes (%d %d %d) %d pos %d %02x " + "(%d-0x%08x 0x%08x)\n", len, dma_len, fdctrl->data_pos, + fdctrl->data_len, GET_CUR_DRV(fdctrl), cur_drv->head, + cur_drv->track, cur_drv->sect, fd_sector(cur_drv), + fd_sector(cur_drv) * FD_SECTOR_LEN); + if (fdctrl->data_dir != FD_DIR_WRITE || + len < FD_SECTOR_LEN || rel_pos != 0) { + /* READ & SCAN commands and realign to a sector for WRITE */ + if (blk_read(cur_drv->blk, fd_sector(cur_drv), + fdctrl->fifo, 1) < 0) { + FLOPPY_DPRINTF("Floppy: error getting sector %d\n", + fd_sector(cur_drv)); + /* Sure, image size is too small... */ + memset(fdctrl->fifo, 0, FD_SECTOR_LEN); + } + } + switch (fdctrl->data_dir) { + case FD_DIR_READ: + /* READ commands */ + DMA_write_memory (nchan, fdctrl->fifo + rel_pos, + fdctrl->data_pos, len); + break; + case FD_DIR_WRITE: + /* WRITE commands */ + if (cur_drv->ro) { + /* Handle readonly medium early, no need to do DMA, touch the + * LED or attempt any writes. A real floppy doesn't attempt + * to write to readonly media either. */ + fdctrl_stop_transfer(fdctrl, + FD_SR0_ABNTERM | FD_SR0_SEEK, FD_SR1_NW, + 0x00); + goto transfer_error; + } + + DMA_read_memory (nchan, fdctrl->fifo + rel_pos, + fdctrl->data_pos, len); + if (blk_write(cur_drv->blk, fd_sector(cur_drv), + fdctrl->fifo, 1) < 0) { + FLOPPY_DPRINTF("error writing sector %d\n", + fd_sector(cur_drv)); + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); + goto transfer_error; + } + break; + case FD_DIR_VERIFY: + /* VERIFY commands */ + break; + default: + /* SCAN commands */ + { + uint8_t tmpbuf[FD_SECTOR_LEN]; + int ret; + DMA_read_memory (nchan, tmpbuf, fdctrl->data_pos, len); + ret = memcmp(tmpbuf, fdctrl->fifo + rel_pos, len); + if (ret == 0) { + status2 = FD_SR2_SEH; + goto end_transfer; + } + if ((ret < 0 && fdctrl->data_dir == FD_DIR_SCANL) || + (ret > 0 && fdctrl->data_dir == FD_DIR_SCANH)) { + status2 = 0x00; + goto end_transfer; + } + } + break; + } + fdctrl->data_pos += len; + rel_pos = fdctrl->data_pos % FD_SECTOR_LEN; + if (rel_pos == 0) { + /* Seek to next sector */ + if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) + break; + } + } + end_transfer: + len = fdctrl->data_pos - start_pos; + FLOPPY_DPRINTF("end transfer %d %d %d\n", + fdctrl->data_pos, len, fdctrl->data_len); + if (fdctrl->data_dir == FD_DIR_SCANE || + fdctrl->data_dir == FD_DIR_SCANL || + fdctrl->data_dir == FD_DIR_SCANH) + status2 = FD_SR2_SEH; + fdctrl->data_len -= len; + fdctrl_stop_transfer(fdctrl, status0, status1, status2); + transfer_error: + + return len; +} + +/* Data register : 0x05 */ +static uint32_t fdctrl_read_data(FDCtrl *fdctrl) +{ + FDrive *cur_drv; + uint32_t retval = 0; + uint32_t pos; + + cur_drv = get_cur_drv(fdctrl); + fdctrl->dsr &= ~FD_DSR_PWRDOWN; + if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { + FLOPPY_DPRINTF("error: controller not ready for reading\n"); + return 0; + } + + /* If data_len spans multiple sectors, the current position in the FIFO + * wraps around while fdctrl->data_pos is the real position in the whole + * request. */ + pos = fdctrl->data_pos; + pos %= FD_SECTOR_LEN; + + switch (fdctrl->phase) { + case FD_PHASE_EXECUTION: + assert(fdctrl->msr & FD_MSR_NONDMA); + if (pos == 0) { + if (fdctrl->data_pos != 0) + if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) { + FLOPPY_DPRINTF("error seeking to next sector %d\n", + fd_sector(cur_drv)); + return 0; + } + if (blk_read(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) + < 0) { + FLOPPY_DPRINTF("error getting sector %d\n", + fd_sector(cur_drv)); + /* Sure, image size is too small... */ + memset(fdctrl->fifo, 0, FD_SECTOR_LEN); + } + } + + if (++fdctrl->data_pos == fdctrl->data_len) { + fdctrl->msr &= ~FD_MSR_RQM; + fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); + } + break; + + case FD_PHASE_RESULT: + assert(!(fdctrl->msr & FD_MSR_NONDMA)); + if (++fdctrl->data_pos == fdctrl->data_len) { + fdctrl->msr &= ~FD_MSR_RQM; + fdctrl_to_command_phase(fdctrl); + fdctrl_reset_irq(fdctrl); + } + break; + + case FD_PHASE_COMMAND: + default: + abort(); + } + + retval = fdctrl->fifo[pos]; + FLOPPY_DPRINTF("data register: 0x%02x\n", retval); + + return retval; +} + +static void fdctrl_format_sector(FDCtrl *fdctrl) +{ + FDrive *cur_drv; + uint8_t kh, kt, ks; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + kt = fdctrl->fifo[6]; + kh = fdctrl->fifo[7]; + ks = fdctrl->fifo[8]; + FLOPPY_DPRINTF("format sector at %d %d %02x %02x (%d)\n", + GET_CUR_DRV(fdctrl), kh, kt, ks, + fd_sector_calc(kh, kt, ks, cur_drv->last_sect, + NUM_SIDES(cur_drv))); + switch (fd_seek(cur_drv, kh, kt, ks, fdctrl->config & FD_CONFIG_EIS)) { + case 2: + /* sect too big */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + case 3: + /* track too big */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_EC, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + case 4: + /* No seek enabled */ + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); + fdctrl->fifo[3] = kt; + fdctrl->fifo[4] = kh; + fdctrl->fifo[5] = ks; + return; + case 1: + fdctrl->status0 |= FD_SR0_SEEK; + break; + default: + break; + } + memset(fdctrl->fifo, 0, FD_SECTOR_LEN); + if (cur_drv->blk == NULL || + blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) < 0) { + FLOPPY_DPRINTF("error formatting sector %d\n", fd_sector(cur_drv)); + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); + } else { + if (cur_drv->sect == cur_drv->last_sect) { + fdctrl->data_state &= ~FD_STATE_FORMAT; + /* Last sector done */ + fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); + } else { + /* More to do */ + fdctrl->data_pos = 0; + fdctrl->data_len = 4; + } + } +} + +static void fdctrl_handle_lock(FDCtrl *fdctrl, int direction) +{ + fdctrl->lock = (fdctrl->fifo[0] & 0x80) ? 1 : 0; + fdctrl->fifo[0] = fdctrl->lock << 4; + fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + + /* Drives position */ + fdctrl->fifo[0] = drv0(fdctrl)->track; + fdctrl->fifo[1] = drv1(fdctrl)->track; +#if MAX_FD == 4 + fdctrl->fifo[2] = drv2(fdctrl)->track; + fdctrl->fifo[3] = drv3(fdctrl)->track; +#else + fdctrl->fifo[2] = 0; + fdctrl->fifo[3] = 0; +#endif + /* timers */ + fdctrl->fifo[4] = fdctrl->timer0; + fdctrl->fifo[5] = (fdctrl->timer1 << 1) | (fdctrl->dor & FD_DOR_DMAEN ? 1 : 0); + fdctrl->fifo[6] = cur_drv->last_sect; + fdctrl->fifo[7] = (fdctrl->lock << 7) | + (cur_drv->perpendicular << 2); + fdctrl->fifo[8] = fdctrl->config; + fdctrl->fifo[9] = fdctrl->precomp_trk; + fdctrl_to_result_phase(fdctrl, 10); +} + +static void fdctrl_handle_version(FDCtrl *fdctrl, int direction) +{ + /* Controller's version */ + fdctrl->fifo[0] = fdctrl->version; + fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_partid(FDCtrl *fdctrl, int direction) +{ + fdctrl->fifo[0] = 0x41; /* Stepping 1 */ + fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + + /* Drives position */ + drv0(fdctrl)->track = fdctrl->fifo[3]; + drv1(fdctrl)->track = fdctrl->fifo[4]; +#if MAX_FD == 4 + drv2(fdctrl)->track = fdctrl->fifo[5]; + drv3(fdctrl)->track = fdctrl->fifo[6]; +#endif + /* timers */ + fdctrl->timer0 = fdctrl->fifo[7]; + fdctrl->timer1 = fdctrl->fifo[8]; + cur_drv->last_sect = fdctrl->fifo[9]; + fdctrl->lock = fdctrl->fifo[10] >> 7; + cur_drv->perpendicular = (fdctrl->fifo[10] >> 2) & 0xF; + fdctrl->config = fdctrl->fifo[11]; + fdctrl->precomp_trk = fdctrl->fifo[12]; + fdctrl->pwrd = fdctrl->fifo[13]; + fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_save(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + + fdctrl->fifo[0] = 0; + fdctrl->fifo[1] = 0; + /* Drives position */ + fdctrl->fifo[2] = drv0(fdctrl)->track; + fdctrl->fifo[3] = drv1(fdctrl)->track; +#if MAX_FD == 4 + fdctrl->fifo[4] = drv2(fdctrl)->track; + fdctrl->fifo[5] = drv3(fdctrl)->track; +#else + fdctrl->fifo[4] = 0; + fdctrl->fifo[5] = 0; +#endif + /* timers */ + fdctrl->fifo[6] = fdctrl->timer0; + fdctrl->fifo[7] = fdctrl->timer1; + fdctrl->fifo[8] = cur_drv->last_sect; + fdctrl->fifo[9] = (fdctrl->lock << 7) | + (cur_drv->perpendicular << 2); + fdctrl->fifo[10] = fdctrl->config; + fdctrl->fifo[11] = fdctrl->precomp_trk; + fdctrl->fifo[12] = fdctrl->pwrd; + fdctrl->fifo[13] = 0; + fdctrl->fifo[14] = 0; + fdctrl_to_result_phase(fdctrl, 15); +} + +static void fdctrl_handle_readid(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + + cur_drv->head = (fdctrl->fifo[1] >> 2) & 1; + timer_mod(fdctrl->result_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() / 50)); +} + +static void fdctrl_handle_format_track(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + fdctrl->data_state |= FD_STATE_FORMAT; + if (fdctrl->fifo[0] & 0x80) + fdctrl->data_state |= FD_STATE_MULTI; + else + fdctrl->data_state &= ~FD_STATE_MULTI; + cur_drv->bps = + fdctrl->fifo[2] > 7 ? 16384 : 128 << fdctrl->fifo[2]; +#if 0 + cur_drv->last_sect = + cur_drv->flags & FDISK_DBL_SIDES ? fdctrl->fifo[3] : + fdctrl->fifo[3] / 2; +#else + cur_drv->last_sect = fdctrl->fifo[3]; +#endif + /* TODO: implement format using DMA expected by the Bochs BIOS + * and Linux fdformat (read 3 bytes per sector via DMA and fill + * the sector with the specified fill byte + */ + fdctrl->data_state &= ~FD_STATE_FORMAT; + fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); +} + +static void fdctrl_handle_specify(FDCtrl *fdctrl, int direction) +{ + fdctrl->timer0 = (fdctrl->fifo[1] >> 4) & 0xF; + fdctrl->timer1 = fdctrl->fifo[2] >> 1; + if (fdctrl->fifo[2] & 1) + fdctrl->dor &= ~FD_DOR_DMAEN; + else + fdctrl->dor |= FD_DOR_DMAEN; + /* No result back */ + fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + cur_drv->head = (fdctrl->fifo[1] >> 2) & 1; + /* 1 Byte status back */ + fdctrl->fifo[0] = (cur_drv->ro << 6) | + (cur_drv->track == 0 ? 0x10 : 0x00) | + (cur_drv->head << 2) | + GET_CUR_DRV(fdctrl) | + 0x28; + fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + fd_recalibrate(cur_drv); + fdctrl_to_command_phase(fdctrl); + /* Raise Interrupt */ + fdctrl->status0 |= FD_SR0_SEEK; + fdctrl_raise_irq(fdctrl); +} + +static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + + if (fdctrl->reset_sensei > 0) { + fdctrl->fifo[0] = + FD_SR0_RDYCHG + FD_RESET_SENSEI_COUNT - fdctrl->reset_sensei; + fdctrl->reset_sensei--; + } else if (!(fdctrl->sra & FD_SRA_INTPEND)) { + fdctrl->fifo[0] = FD_SR0_INVCMD; + fdctrl_to_result_phase(fdctrl, 1); + return; + } else { + fdctrl->fifo[0] = + (fdctrl->status0 & ~(FD_SR0_HEAD | FD_SR0_DS1 | FD_SR0_DS0)) + | GET_CUR_DRV(fdctrl); + } + + fdctrl->fifo[1] = cur_drv->track; + fdctrl_to_result_phase(fdctrl, 2); + fdctrl_reset_irq(fdctrl); + fdctrl->status0 = FD_SR0_RDYCHG; +} + +static void fdctrl_handle_seek(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + fdctrl_to_command_phase(fdctrl); + /* The seek command just sends step pulses to the drive and doesn't care if + * there is a medium inserted of if it's banging the head against the drive. + */ + fd_seek(cur_drv, cur_drv->head, fdctrl->fifo[2], cur_drv->sect, 1); + /* Raise Interrupt */ + fdctrl->status0 |= FD_SR0_SEEK; + fdctrl_raise_irq(fdctrl); +} + +static void fdctrl_handle_perpendicular_mode(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + + if (fdctrl->fifo[1] & 0x80) + cur_drv->perpendicular = fdctrl->fifo[1] & 0x7; + /* No result back */ + fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction) +{ + fdctrl->config = fdctrl->fifo[2]; + fdctrl->precomp_trk = fdctrl->fifo[3]; + /* No result back */ + fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction) +{ + fdctrl->pwrd = fdctrl->fifo[1]; + fdctrl->fifo[0] = fdctrl->fifo[1]; + fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_option(FDCtrl *fdctrl, int direction) +{ + /* No result back */ + fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv = get_cur_drv(fdctrl); + uint32_t pos; + + pos = fdctrl->data_pos - 1; + pos %= FD_SECTOR_LEN; + if (fdctrl->fifo[pos] & 0x80) { + /* Command parameters done */ + if (fdctrl->fifo[pos] & 0x40) { + fdctrl->fifo[0] = fdctrl->fifo[1]; + fdctrl->fifo[2] = 0; + fdctrl->fifo[3] = 0; + fdctrl_to_result_phase(fdctrl, 4); + } else { + fdctrl_to_command_phase(fdctrl); + } + } else if (fdctrl->data_len > 7) { + /* ERROR */ + fdctrl->fifo[0] = 0x80 | + (cur_drv->head << 2) | GET_CUR_DRV(fdctrl); + fdctrl_to_result_phase(fdctrl, 1); + } +} + +static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + if (fdctrl->fifo[2] + cur_drv->track >= cur_drv->max_track) { + fd_seek(cur_drv, cur_drv->head, cur_drv->max_track - 1, + cur_drv->sect, 1); + } else { + fd_seek(cur_drv, cur_drv->head, + cur_drv->track + fdctrl->fifo[2], cur_drv->sect, 1); + } + fdctrl_to_command_phase(fdctrl); + /* Raise Interrupt */ + fdctrl->status0 |= FD_SR0_SEEK; + fdctrl_raise_irq(fdctrl); +} + +static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction) +{ + FDrive *cur_drv; + + SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); + cur_drv = get_cur_drv(fdctrl); + if (fdctrl->fifo[2] > cur_drv->track) { + fd_seek(cur_drv, cur_drv->head, 0, cur_drv->sect, 1); + } else { + fd_seek(cur_drv, cur_drv->head, + cur_drv->track - fdctrl->fifo[2], cur_drv->sect, 1); + } + fdctrl_to_command_phase(fdctrl); + /* Raise Interrupt */ + fdctrl->status0 |= FD_SR0_SEEK; + fdctrl_raise_irq(fdctrl); +} + +/* + * Handlers for the execution phase of each command + */ +typedef struct FDCtrlCommand { + uint8_t value; + uint8_t mask; + const char* name; + int parameters; + void (*handler)(FDCtrl *fdctrl, int direction); + int direction; +} FDCtrlCommand; + +static const FDCtrlCommand handlers[] = { + { FD_CMD_READ, 0x1f, "READ", 8, fdctrl_start_transfer, FD_DIR_READ }, + { FD_CMD_WRITE, 0x3f, "WRITE", 8, fdctrl_start_transfer, FD_DIR_WRITE }, + { FD_CMD_SEEK, 0xff, "SEEK", 2, fdctrl_handle_seek }, + { FD_CMD_SENSE_INTERRUPT_STATUS, 0xff, "SENSE INTERRUPT STATUS", 0, fdctrl_handle_sense_interrupt_status }, + { FD_CMD_RECALIBRATE, 0xff, "RECALIBRATE", 1, fdctrl_handle_recalibrate }, + { FD_CMD_FORMAT_TRACK, 0xbf, "FORMAT TRACK", 5, fdctrl_handle_format_track }, + { FD_CMD_READ_TRACK, 0xbf, "READ TRACK", 8, fdctrl_start_transfer, FD_DIR_READ }, + { FD_CMD_RESTORE, 0xff, "RESTORE", 17, fdctrl_handle_restore }, /* part of READ DELETED DATA */ + { FD_CMD_SAVE, 0xff, "SAVE", 0, fdctrl_handle_save }, /* part of READ DELETED DATA */ + { FD_CMD_READ_DELETED, 0x1f, "READ DELETED DATA", 8, fdctrl_start_transfer_del, FD_DIR_READ }, + { FD_CMD_SCAN_EQUAL, 0x1f, "SCAN EQUAL", 8, fdctrl_start_transfer, FD_DIR_SCANE }, + { FD_CMD_VERIFY, 0x1f, "VERIFY", 8, fdctrl_start_transfer, FD_DIR_VERIFY }, + { FD_CMD_SCAN_LOW_OR_EQUAL, 0x1f, "SCAN LOW OR EQUAL", 8, fdctrl_start_transfer, FD_DIR_SCANL }, + { FD_CMD_SCAN_HIGH_OR_EQUAL, 0x1f, "SCAN HIGH OR EQUAL", 8, fdctrl_start_transfer, FD_DIR_SCANH }, + { FD_CMD_WRITE_DELETED, 0x3f, "WRITE DELETED DATA", 8, fdctrl_start_transfer_del, FD_DIR_WRITE }, + { FD_CMD_READ_ID, 0xbf, "READ ID", 1, fdctrl_handle_readid }, + { FD_CMD_SPECIFY, 0xff, "SPECIFY", 2, fdctrl_handle_specify }, + { FD_CMD_SENSE_DRIVE_STATUS, 0xff, "SENSE DRIVE STATUS", 1, fdctrl_handle_sense_drive_status }, + { FD_CMD_PERPENDICULAR_MODE, 0xff, "PERPENDICULAR MODE", 1, fdctrl_handle_perpendicular_mode }, + { FD_CMD_CONFIGURE, 0xff, "CONFIGURE", 3, fdctrl_handle_configure }, + { FD_CMD_POWERDOWN_MODE, 0xff, "POWERDOWN MODE", 2, fdctrl_handle_powerdown_mode }, + { FD_CMD_OPTION, 0xff, "OPTION", 1, fdctrl_handle_option }, + { FD_CMD_DRIVE_SPECIFICATION_COMMAND, 0xff, "DRIVE SPECIFICATION COMMAND", 5, fdctrl_handle_drive_specification_command }, + { FD_CMD_RELATIVE_SEEK_OUT, 0xff, "RELATIVE SEEK OUT", 2, fdctrl_handle_relative_seek_out }, + { FD_CMD_FORMAT_AND_WRITE, 0xff, "FORMAT AND WRITE", 10, fdctrl_unimplemented }, + { FD_CMD_RELATIVE_SEEK_IN, 0xff, "RELATIVE SEEK IN", 2, fdctrl_handle_relative_seek_in }, + { FD_CMD_LOCK, 0x7f, "LOCK", 0, fdctrl_handle_lock }, + { FD_CMD_DUMPREG, 0xff, "DUMPREG", 0, fdctrl_handle_dumpreg }, + { FD_CMD_VERSION, 0xff, "VERSION", 0, fdctrl_handle_version }, + { FD_CMD_PART_ID, 0xff, "PART ID", 0, fdctrl_handle_partid }, + { FD_CMD_WRITE, 0x1f, "WRITE (BeOS)", 8, fdctrl_start_transfer, FD_DIR_WRITE }, /* not in specification ; BeOS 4.5 bug */ + { 0, 0, "unknown", 0, fdctrl_unimplemented }, /* default handler */ +}; +/* Associate command to an index in the 'handlers' array */ +static uint8_t command_to_handler[256]; + +static const FDCtrlCommand *get_command(uint8_t cmd) +{ + int idx; + + idx = command_to_handler[cmd]; + FLOPPY_DPRINTF("%s command\n", handlers[idx].name); + return &handlers[idx]; +} + +static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) +{ + FDrive *cur_drv; + const FDCtrlCommand *cmd; + uint32_t pos; + + /* Reset mode */ + if (!(fdctrl->dor & FD_DOR_nRESET)) { + FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); + return; + } + if (!(fdctrl->msr & FD_MSR_RQM) || (fdctrl->msr & FD_MSR_DIO)) { + FLOPPY_DPRINTF("error: controller not ready for writing\n"); + return; + } + fdctrl->dsr &= ~FD_DSR_PWRDOWN; + + FLOPPY_DPRINTF("%s: %02x\n", __func__, value); + + /* If data_len spans multiple sectors, the current position in the FIFO + * wraps around while fdctrl->data_pos is the real position in the whole + * request. */ + pos = fdctrl->data_pos++; + pos %= FD_SECTOR_LEN; + fdctrl->fifo[pos] = value; + + if (fdctrl->data_pos == fdctrl->data_len) { + fdctrl->msr &= ~FD_MSR_RQM; + } + + switch (fdctrl->phase) { + case FD_PHASE_EXECUTION: + /* For DMA requests, RQM should be cleared during execution phase, so + * we would have errored out above. */ + assert(fdctrl->msr & FD_MSR_NONDMA); + + /* FIFO data write */ + if (pos == FD_SECTOR_LEN - 1 || + fdctrl->data_pos == fdctrl->data_len) { + cur_drv = get_cur_drv(fdctrl); + if (blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) + < 0) { + FLOPPY_DPRINTF("error writing sector %d\n", + fd_sector(cur_drv)); + break; + } + if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) { + FLOPPY_DPRINTF("error seeking to next sector %d\n", + fd_sector(cur_drv)); + break; + } + } + + /* Switch to result phase when done with the transfer */ + if (fdctrl->data_pos == fdctrl->data_len) { + fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); + } + break; + + case FD_PHASE_COMMAND: + assert(!(fdctrl->msr & FD_MSR_NONDMA)); + assert(fdctrl->data_pos < FD_SECTOR_LEN); + + if (pos == 0) { + /* The first byte specifies the command. Now we start reading + * as many parameters as this command requires. */ + cmd = get_command(value); + fdctrl->data_len = cmd->parameters + 1; + if (cmd->parameters) { + fdctrl->msr |= FD_MSR_RQM; + } + fdctrl->msr |= FD_MSR_CMDBUSY; + } + + if (fdctrl->data_pos == fdctrl->data_len) { + /* We have all parameters now, execute the command */ + fdctrl->phase = FD_PHASE_EXECUTION; + + if (fdctrl->data_state & FD_STATE_FORMAT) { + fdctrl_format_sector(fdctrl); + break; + } + + cmd = get_command(fdctrl->fifo[0]); + FLOPPY_DPRINTF("Calling handler for '%s'\n", cmd->name); + cmd->handler(fdctrl, cmd->direction); + } + break; + + case FD_PHASE_RESULT: + default: + abort(); + } +} + +static void fdctrl_result_timer(void *opaque) +{ + FDCtrl *fdctrl = opaque; + FDrive *cur_drv = get_cur_drv(fdctrl); + + /* Pretend we are spinning. + * This is needed for Coherent, which uses READ ID to check for + * sector interleaving. + */ + if (cur_drv->last_sect != 0) { + cur_drv->sect = (cur_drv->sect % cur_drv->last_sect) + 1; + } + /* READ_ID can't automatically succeed! */ + if (fdctrl->check_media_rate && + (fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) { + FLOPPY_DPRINTF("read id rate mismatch (fdc=%d, media=%d)\n", + fdctrl->dsr & FD_DSR_DRATEMASK, cur_drv->media_rate); + fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); + } else { + fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); + } +} + +static void fdctrl_change_cb(void *opaque, bool load) +{ + FDrive *drive = opaque; + + drive->media_changed = 1; + fd_revalidate(drive); +} + +static const BlockDevOps fdctrl_block_ops = { + .change_media_cb = fdctrl_change_cb, +}; + +/* Init functions */ +static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp) +{ + unsigned int i; + FDrive *drive; + + for (i = 0; i < MAX_FD; i++) { + drive = &fdctrl->drives[i]; + drive->fdctrl = fdctrl; + + if (drive->blk) { + if (blk_get_on_error(drive->blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { + error_setg(errp, "fdc doesn't support drive option werror"); + return; + } + if (blk_get_on_error(drive->blk, 1) != BLOCKDEV_ON_ERROR_REPORT) { + error_setg(errp, "fdc doesn't support drive option rerror"); + return; + } + } + + fd_init(drive); + fdctrl_change_cb(drive, 0); + if (drive->blk) { + blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive); + } + } +} + +ISADevice *fdctrl_init_isa(ISABus *bus, DriveInfo **fds) +{ + DeviceState *dev; + ISADevice *isadev; + + isadev = isa_try_create(bus, TYPE_ISA_FDC); + if (!isadev) { + return NULL; + } + dev = DEVICE(isadev); + + if (fds[0]) { + qdev_prop_set_drive_nofail(dev, "driveA", blk_by_legacy_dinfo(fds[0])); + } + if (fds[1]) { + qdev_prop_set_drive_nofail(dev, "driveB", blk_by_legacy_dinfo(fds[1])); + } + qdev_init_nofail(dev); + + return isadev; +} + +void fdctrl_init_sysbus(qemu_irq irq, int dma_chann, + hwaddr mmio_base, DriveInfo **fds) +{ + FDCtrl *fdctrl; + DeviceState *dev; + SysBusDevice *sbd; + FDCtrlSysBus *sys; + + dev = qdev_create(NULL, "sysbus-fdc"); + sys = SYSBUS_FDC(dev); + fdctrl = &sys->state; + fdctrl->dma_chann = dma_chann; /* FIXME */ + if (fds[0]) { + qdev_prop_set_drive_nofail(dev, "driveA", blk_by_legacy_dinfo(fds[0])); + } + if (fds[1]) { + qdev_prop_set_drive_nofail(dev, "driveB", blk_by_legacy_dinfo(fds[1])); + } + qdev_init_nofail(dev); + sbd = SYS_BUS_DEVICE(dev); + sysbus_connect_irq(sbd, 0, irq); + sysbus_mmio_map(sbd, 0, mmio_base); +} + +void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, + DriveInfo **fds, qemu_irq *fdc_tc) +{ + DeviceState *dev; + FDCtrlSysBus *sys; + + dev = qdev_create(NULL, "SUNW,fdtwo"); + if (fds[0]) { + qdev_prop_set_drive_nofail(dev, "drive", blk_by_legacy_dinfo(fds[0])); + } + qdev_init_nofail(dev); + sys = SYSBUS_FDC(dev); + sysbus_connect_irq(SYS_BUS_DEVICE(sys), 0, irq); + sysbus_mmio_map(SYS_BUS_DEVICE(sys), 0, io_base); + *fdc_tc = qdev_get_gpio_in(dev, 0); +} + +static void fdctrl_realize_common(FDCtrl *fdctrl, Error **errp) +{ + int i, j; + static int command_tables_inited = 0; + + /* Fill 'command_to_handler' lookup table */ + if (!command_tables_inited) { + command_tables_inited = 1; + for (i = ARRAY_SIZE(handlers) - 1; i >= 0; i--) { + for (j = 0; j < sizeof(command_to_handler); j++) { + if ((j & handlers[i].mask) == handlers[i].value) { + command_to_handler[j] = i; + } + } + } + } + + FLOPPY_DPRINTF("init controller\n"); + fdctrl->fifo = qemu_memalign(512, FD_SECTOR_LEN); + fdctrl->fifo_size = 512; + fdctrl->result_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + fdctrl_result_timer, fdctrl); + + fdctrl->version = 0x90; /* Intel 82078 controller */ + fdctrl->config = FD_CONFIG_EIS | FD_CONFIG_EFIFO; /* Implicit seek, polling & FIFO enabled */ + fdctrl->num_floppies = MAX_FD; + + if (fdctrl->dma_chann != -1) { + DMA_register_channel(fdctrl->dma_chann, &fdctrl_transfer_handler, fdctrl); + } + fdctrl_connect_drives(fdctrl, errp); +} + +static const MemoryRegionPortio fdc_portio_list[] = { + { 1, 5, 1, .read = fdctrl_read, .write = fdctrl_write }, + { 7, 1, 1, .read = fdctrl_read, .write = fdctrl_write }, + PORTIO_END_OF_LIST(), +}; + +static void isabus_fdc_realize(DeviceState *dev, Error **errp) +{ + ISADevice *isadev = ISA_DEVICE(dev); + FDCtrlISABus *isa = ISA_FDC(dev); + FDCtrl *fdctrl = &isa->state; + Error *err = NULL; + + isa_register_portio_list(isadev, isa->iobase, fdc_portio_list, fdctrl, + "fdc"); + + isa_init_irq(isadev, &fdctrl->irq, isa->irq); + fdctrl->dma_chann = isa->dma; + + qdev_set_legacy_instance_id(dev, isa->iobase, 2); + fdctrl_realize_common(fdctrl, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } +} + +static void sysbus_fdc_initfn(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + FDCtrlSysBus *sys = SYSBUS_FDC(obj); + FDCtrl *fdctrl = &sys->state; + + fdctrl->dma_chann = -1; + + memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_ops, fdctrl, + "fdc", 0x08); + sysbus_init_mmio(sbd, &fdctrl->iomem); +} + +static void sun4m_fdc_initfn(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + FDCtrlSysBus *sys = SYSBUS_FDC(obj); + FDCtrl *fdctrl = &sys->state; + + memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_strict_ops, + fdctrl, "fdctrl", 0x08); + sysbus_init_mmio(sbd, &fdctrl->iomem); +} + +static void sysbus_fdc_common_initfn(Object *obj) +{ + DeviceState *dev = DEVICE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + FDCtrlSysBus *sys = SYSBUS_FDC(obj); + FDCtrl *fdctrl = &sys->state; + + qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */ + + sysbus_init_irq(sbd, &fdctrl->irq); + qdev_init_gpio_in(dev, fdctrl_handle_tc, 1); +} + +static void sysbus_fdc_common_realize(DeviceState *dev, Error **errp) +{ + FDCtrlSysBus *sys = SYSBUS_FDC(dev); + FDCtrl *fdctrl = &sys->state; + + fdctrl_realize_common(fdctrl, errp); +} + +FDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i) +{ + FDCtrlISABus *isa = ISA_FDC(fdc); + + return isa->state.drives[i].drive; +} + +static const VMStateDescription vmstate_isa_fdc ={ + .name = "fdc", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(state, FDCtrlISABus, 0, vmstate_fdc, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static Property isa_fdc_properties[] = { + DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0), + DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6), + DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2), + DEFINE_PROP_DRIVE("driveA", FDCtrlISABus, state.drives[0].blk), + DEFINE_PROP_DRIVE("driveB", FDCtrlISABus, state.drives[1].blk), + DEFINE_PROP_BIT("check_media_rate", FDCtrlISABus, state.check_media_rate, + 0, true), + DEFINE_PROP_END_OF_LIST(), +}; + +static void isabus_fdc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = isabus_fdc_realize; + dc->fw_name = "fdc"; + dc->reset = fdctrl_external_reset_isa; + dc->vmsd = &vmstate_isa_fdc; + dc->props = isa_fdc_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static void isabus_fdc_instance_init(Object *obj) +{ + FDCtrlISABus *isa = ISA_FDC(obj); + + device_add_bootindex_property(obj, &isa->bootindexA, + "bootindexA", "/floppy@0", + DEVICE(obj), NULL); + device_add_bootindex_property(obj, &isa->bootindexB, + "bootindexB", "/floppy@1", + DEVICE(obj), NULL); +} + +static const TypeInfo isa_fdc_info = { + .name = TYPE_ISA_FDC, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(FDCtrlISABus), + .class_init = isabus_fdc_class_init, + .instance_init = isabus_fdc_instance_init, +}; + +static const VMStateDescription vmstate_sysbus_fdc ={ + .name = "fdc", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(state, FDCtrlSysBus, 0, vmstate_fdc, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static Property sysbus_fdc_properties[] = { + DEFINE_PROP_DRIVE("driveA", FDCtrlSysBus, state.drives[0].blk), + DEFINE_PROP_DRIVE("driveB", FDCtrlSysBus, state.drives[1].blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sysbus_fdc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = sysbus_fdc_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo sysbus_fdc_info = { + .name = "sysbus-fdc", + .parent = TYPE_SYSBUS_FDC, + .instance_init = sysbus_fdc_initfn, + .class_init = sysbus_fdc_class_init, +}; + +static Property sun4m_fdc_properties[] = { + DEFINE_PROP_DRIVE("drive", FDCtrlSysBus, state.drives[0].blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sun4m_fdc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = sun4m_fdc_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo sun4m_fdc_info = { + .name = "SUNW,fdtwo", + .parent = TYPE_SYSBUS_FDC, + .instance_init = sun4m_fdc_initfn, + .class_init = sun4m_fdc_class_init, +}; + +static void sysbus_fdc_common_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = sysbus_fdc_common_realize; + dc->reset = fdctrl_external_reset_sysbus; + dc->vmsd = &vmstate_sysbus_fdc; +} + +static const TypeInfo sysbus_fdc_type_info = { + .name = TYPE_SYSBUS_FDC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(FDCtrlSysBus), + .instance_init = sysbus_fdc_common_initfn, + .abstract = true, + .class_init = sysbus_fdc_common_class_init, +}; + +static void fdc_register_types(void) +{ + type_register_static(&isa_fdc_info); + type_register_static(&sysbus_fdc_type_info); + type_register_static(&sysbus_fdc_info); + type_register_static(&sun4m_fdc_info); +} + +type_init(fdc_register_types) diff --git a/qemu/hw/block/hd-geometry.c b/qemu/hw/block/hd-geometry.c new file mode 100644 index 000000000..b187878fa --- /dev/null +++ b/qemu/hw/block/hd-geometry.c @@ -0,0 +1,165 @@ +/* + * Hard disk geometry utilities + * + * Copyright (C) 2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sysemu/block-backend.h" +#include "hw/block/block.h" +#include "trace.h" + +struct partition { + uint8_t boot_ind; /* 0x80 - active */ + uint8_t head; /* starting head */ + uint8_t sector; /* starting sector */ + uint8_t cyl; /* starting cylinder */ + uint8_t sys_ind; /* What partition type */ + uint8_t end_head; /* end head */ + uint8_t end_sector; /* end sector */ + uint8_t end_cyl; /* end cylinder */ + uint32_t start_sect; /* starting sector counting from 0 */ + uint32_t nr_sects; /* nr of sectors in partition */ +} QEMU_PACKED; + +/* try to guess the disk logical geometry from the MSDOS partition table. + Return 0 if OK, -1 if could not guess */ +static int guess_disk_lchs(BlockBackend *blk, + int *pcylinders, int *pheads, int *psectors) +{ + uint8_t buf[BDRV_SECTOR_SIZE]; + int i, heads, sectors, cylinders; + struct partition *p; + uint32_t nr_sects; + uint64_t nb_sectors; + + blk_get_geometry(blk, &nb_sectors); + + /** + * The function will be invoked during startup not only in sync I/O mode, + * but also in async I/O mode. So the I/O throttling function has to + * be disabled temporarily here, not permanently. + */ + if (blk_read_unthrottled(blk, 0, buf, 1) < 0) { + return -1; + } + /* test msdos magic */ + if (buf[510] != 0x55 || buf[511] != 0xaa) { + return -1; + } + for (i = 0; i < 4; i++) { + p = ((struct partition *)(buf + 0x1be)) + i; + nr_sects = le32_to_cpu(p->nr_sects); + if (nr_sects && p->end_head) { + /* We make the assumption that the partition terminates on + a cylinder boundary */ + heads = p->end_head + 1; + sectors = p->end_sector & 63; + if (sectors == 0) { + continue; + } + cylinders = nb_sectors / (heads * sectors); + if (cylinders < 1 || cylinders > 16383) { + continue; + } + *pheads = heads; + *psectors = sectors; + *pcylinders = cylinders; + trace_hd_geometry_lchs_guess(blk, cylinders, heads, sectors); + return 0; + } + } + return -1; +} + +static void guess_chs_for_size(BlockBackend *blk, + uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs) +{ + uint64_t nb_sectors; + int cylinders; + + blk_get_geometry(blk, &nb_sectors); + + cylinders = nb_sectors / (16 * 63); + if (cylinders > 16383) { + cylinders = 16383; + } else if (cylinders < 2) { + cylinders = 2; + } + *pcyls = cylinders; + *pheads = 16; + *psecs = 63; +} + +void hd_geometry_guess(BlockBackend *blk, + uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs, + int *ptrans) +{ + int cylinders, heads, secs, translation; + HDGeometry geo; + + /* Try to probe the backing device geometry, otherwise fallback + to the old logic. (as of 12/2014 probing only succeeds on DASDs) */ + if (blk_probe_geometry(blk, &geo) == 0) { + *pcyls = geo.cylinders; + *psecs = geo.sectors; + *pheads = geo.heads; + translation = BIOS_ATA_TRANSLATION_NONE; + } else if (guess_disk_lchs(blk, &cylinders, &heads, &secs) < 0) { + /* no LCHS guess: use a standard physical disk geometry */ + guess_chs_for_size(blk, pcyls, pheads, psecs); + translation = hd_bios_chs_auto_trans(*pcyls, *pheads, *psecs); + } else if (heads > 16) { + /* LCHS guess with heads > 16 means that a BIOS LBA + translation was active, so a standard physical disk + geometry is OK */ + guess_chs_for_size(blk, pcyls, pheads, psecs); + translation = *pcyls * *pheads <= 131072 + ? BIOS_ATA_TRANSLATION_LARGE + : BIOS_ATA_TRANSLATION_LBA; + } else { + /* LCHS guess with heads <= 16: use as physical geometry */ + *pcyls = cylinders; + *pheads = heads; + *psecs = secs; + /* disable any translation to be in sync with + the logical geometry */ + translation = BIOS_ATA_TRANSLATION_NONE; + } + if (ptrans) { + *ptrans = translation; + } + trace_hd_geometry_guess(blk, *pcyls, *pheads, *psecs, translation); +} + +int hd_bios_chs_auto_trans(uint32_t cyls, uint32_t heads, uint32_t secs) +{ + return cyls <= 1024 && heads <= 16 && secs <= 63 + ? BIOS_ATA_TRANSLATION_NONE + : BIOS_ATA_TRANSLATION_LBA; +} diff --git a/qemu/hw/block/m25p80.c b/qemu/hw/block/m25p80.c new file mode 100644 index 000000000..efc43dde6 --- /dev/null +++ b/qemu/hw/block/m25p80.c @@ -0,0 +1,711 @@ +/* + * ST M25P80 emulator. Emulate all SPI flash devices based on the m25p80 command + * set. Known devices table current as of Jun/2012 and taken from linux. + * See drivers/mtd/devices/m25p80.c. + * + * Copyright (C) 2011 Edgar E. Iglesias <edgar.iglesias@gmail.com> + * Copyright (C) 2012 Peter A. G. Crosthwaite <peter.crosthwaite@petalogix.com> + * Copyright (C) 2012 PetaLogix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) a later version of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/hw.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/ssi.h" + +#ifndef M25P80_ERR_DEBUG +#define M25P80_ERR_DEBUG 0 +#endif + +#define DB_PRINT_L(level, ...) do { \ + if (M25P80_ERR_DEBUG > (level)) { \ + fprintf(stderr, ": %s: ", __func__); \ + fprintf(stderr, ## __VA_ARGS__); \ + } \ +} while (0); + +/* Fields for FlashPartInfo->flags */ + +/* erase capabilities */ +#define ER_4K 1 +#define ER_32K 2 +/* set to allow the page program command to write 0s back to 1. Useful for + * modelling EEPROM with SPI flash command set + */ +#define WR_1 0x100 + +typedef struct FlashPartInfo { + const char *part_name; + /* jedec code. (jedec >> 16) & 0xff is the 1st byte, >> 8 the 2nd etc */ + uint32_t jedec; + /* extended jedec code */ + uint16_t ext_jedec; + /* there is confusion between manufacturers as to what a sector is. In this + * device model, a "sector" is the size that is erased by the ERASE_SECTOR + * command (opcode 0xd8). + */ + uint32_t sector_size; + uint32_t n_sectors; + uint32_t page_size; + uint8_t flags; +} FlashPartInfo; + +/* adapted from linux */ + +#define INFO(_part_name, _jedec, _ext_jedec, _sector_size, _n_sectors, _flags)\ + .part_name = (_part_name),\ + .jedec = (_jedec),\ + .ext_jedec = (_ext_jedec),\ + .sector_size = (_sector_size),\ + .n_sectors = (_n_sectors),\ + .page_size = 256,\ + .flags = (_flags),\ + +#define JEDEC_NUMONYX 0x20 +#define JEDEC_WINBOND 0xEF +#define JEDEC_SPANSION 0x01 + +static const FlashPartInfo known_devices[] = { + /* Atmel -- some are (confusingly) marketed as "DataFlash" */ + { INFO("at25fs010", 0x1f6601, 0, 32 << 10, 4, ER_4K) }, + { INFO("at25fs040", 0x1f6604, 0, 64 << 10, 8, ER_4K) }, + + { INFO("at25df041a", 0x1f4401, 0, 64 << 10, 8, ER_4K) }, + { INFO("at25df321a", 0x1f4701, 0, 64 << 10, 64, ER_4K) }, + { INFO("at25df641", 0x1f4800, 0, 64 << 10, 128, ER_4K) }, + + { INFO("at26f004", 0x1f0400, 0, 64 << 10, 8, ER_4K) }, + { INFO("at26df081a", 0x1f4501, 0, 64 << 10, 16, ER_4K) }, + { INFO("at26df161a", 0x1f4601, 0, 64 << 10, 32, ER_4K) }, + { INFO("at26df321", 0x1f4700, 0, 64 << 10, 64, ER_4K) }, + + { INFO("at45db081d", 0x1f2500, 0, 64 << 10, 16, ER_4K) }, + + /* EON -- en25xxx */ + { INFO("en25f32", 0x1c3116, 0, 64 << 10, 64, ER_4K) }, + { INFO("en25p32", 0x1c2016, 0, 64 << 10, 64, 0) }, + { INFO("en25q32b", 0x1c3016, 0, 64 << 10, 64, 0) }, + { INFO("en25p64", 0x1c2017, 0, 64 << 10, 128, 0) }, + { INFO("en25q64", 0x1c3017, 0, 64 << 10, 128, ER_4K) }, + + /* GigaDevice */ + { INFO("gd25q32", 0xc84016, 0, 64 << 10, 64, ER_4K) }, + { INFO("gd25q64", 0xc84017, 0, 64 << 10, 128, ER_4K) }, + + /* Intel/Numonyx -- xxxs33b */ + { INFO("160s33b", 0x898911, 0, 64 << 10, 32, 0) }, + { INFO("320s33b", 0x898912, 0, 64 << 10, 64, 0) }, + { INFO("640s33b", 0x898913, 0, 64 << 10, 128, 0) }, + { INFO("n25q064", 0x20ba17, 0, 64 << 10, 128, 0) }, + + /* Macronix */ + { INFO("mx25l2005a", 0xc22012, 0, 64 << 10, 4, ER_4K) }, + { INFO("mx25l4005a", 0xc22013, 0, 64 << 10, 8, ER_4K) }, + { INFO("mx25l8005", 0xc22014, 0, 64 << 10, 16, 0) }, + { INFO("mx25l1606e", 0xc22015, 0, 64 << 10, 32, ER_4K) }, + { INFO("mx25l3205d", 0xc22016, 0, 64 << 10, 64, 0) }, + { INFO("mx25l6405d", 0xc22017, 0, 64 << 10, 128, 0) }, + { INFO("mx25l12805d", 0xc22018, 0, 64 << 10, 256, 0) }, + { INFO("mx25l12855e", 0xc22618, 0, 64 << 10, 256, 0) }, + { INFO("mx25l25635e", 0xc22019, 0, 64 << 10, 512, 0) }, + { INFO("mx25l25655e", 0xc22619, 0, 64 << 10, 512, 0) }, + + /* Micron */ + { INFO("n25q032a11", 0x20bb16, 0, 64 << 10, 64, ER_4K) }, + { INFO("n25q032a13", 0x20ba16, 0, 64 << 10, 64, ER_4K) }, + { INFO("n25q064a11", 0x20bb17, 0, 64 << 10, 128, ER_4K) }, + { INFO("n25q064a13", 0x20ba17, 0, 64 << 10, 128, ER_4K) }, + { INFO("n25q128a11", 0x20bb18, 0, 64 << 10, 256, ER_4K) }, + { INFO("n25q128a13", 0x20ba18, 0, 64 << 10, 256, ER_4K) }, + { INFO("n25q256a11", 0x20bb19, 0, 64 << 10, 512, ER_4K) }, + { INFO("n25q256a13", 0x20ba19, 0, 64 << 10, 512, ER_4K) }, + + /* Spansion -- single (large) sector size only, at least + * for the chips listed here (without boot sectors). + */ + { INFO("s25sl032p", 0x010215, 0x4d00, 64 << 10, 64, ER_4K) }, + { INFO("s25sl064p", 0x010216, 0x4d00, 64 << 10, 128, ER_4K) }, + { INFO("s25fl256s0", 0x010219, 0x4d00, 256 << 10, 128, 0) }, + { INFO("s25fl256s1", 0x010219, 0x4d01, 64 << 10, 512, 0) }, + { INFO("s25fl512s", 0x010220, 0x4d00, 256 << 10, 256, 0) }, + { INFO("s70fl01gs", 0x010221, 0x4d00, 256 << 10, 256, 0) }, + { INFO("s25sl12800", 0x012018, 0x0300, 256 << 10, 64, 0) }, + { INFO("s25sl12801", 0x012018, 0x0301, 64 << 10, 256, 0) }, + { INFO("s25fl129p0", 0x012018, 0x4d00, 256 << 10, 64, 0) }, + { INFO("s25fl129p1", 0x012018, 0x4d01, 64 << 10, 256, 0) }, + { INFO("s25sl004a", 0x010212, 0, 64 << 10, 8, 0) }, + { INFO("s25sl008a", 0x010213, 0, 64 << 10, 16, 0) }, + { INFO("s25sl016a", 0x010214, 0, 64 << 10, 32, 0) }, + { INFO("s25sl032a", 0x010215, 0, 64 << 10, 64, 0) }, + { INFO("s25sl064a", 0x010216, 0, 64 << 10, 128, 0) }, + { INFO("s25fl016k", 0xef4015, 0, 64 << 10, 32, ER_4K | ER_32K) }, + { INFO("s25fl064k", 0xef4017, 0, 64 << 10, 128, ER_4K | ER_32K) }, + + /* SST -- large erase sizes are "overlays", "sectors" are 4<< 10 */ + { INFO("sst25vf040b", 0xbf258d, 0, 64 << 10, 8, ER_4K) }, + { INFO("sst25vf080b", 0xbf258e, 0, 64 << 10, 16, ER_4K) }, + { INFO("sst25vf016b", 0xbf2541, 0, 64 << 10, 32, ER_4K) }, + { INFO("sst25vf032b", 0xbf254a, 0, 64 << 10, 64, ER_4K) }, + { INFO("sst25wf512", 0xbf2501, 0, 64 << 10, 1, ER_4K) }, + { INFO("sst25wf010", 0xbf2502, 0, 64 << 10, 2, ER_4K) }, + { INFO("sst25wf020", 0xbf2503, 0, 64 << 10, 4, ER_4K) }, + { INFO("sst25wf040", 0xbf2504, 0, 64 << 10, 8, ER_4K) }, + + /* ST Microelectronics -- newer production may have feature updates */ + { INFO("m25p05", 0x202010, 0, 32 << 10, 2, 0) }, + { INFO("m25p10", 0x202011, 0, 32 << 10, 4, 0) }, + { INFO("m25p20", 0x202012, 0, 64 << 10, 4, 0) }, + { INFO("m25p40", 0x202013, 0, 64 << 10, 8, 0) }, + { INFO("m25p80", 0x202014, 0, 64 << 10, 16, 0) }, + { INFO("m25p16", 0x202015, 0, 64 << 10, 32, 0) }, + { INFO("m25p32", 0x202016, 0, 64 << 10, 64, 0) }, + { INFO("m25p64", 0x202017, 0, 64 << 10, 128, 0) }, + { INFO("m25p128", 0x202018, 0, 256 << 10, 64, 0) }, + { INFO("n25q032", 0x20ba16, 0, 64 << 10, 64, 0) }, + + { INFO("m45pe10", 0x204011, 0, 64 << 10, 2, 0) }, + { INFO("m45pe80", 0x204014, 0, 64 << 10, 16, 0) }, + { INFO("m45pe16", 0x204015, 0, 64 << 10, 32, 0) }, + + { INFO("m25pe20", 0x208012, 0, 64 << 10, 4, 0) }, + { INFO("m25pe80", 0x208014, 0, 64 << 10, 16, 0) }, + { INFO("m25pe16", 0x208015, 0, 64 << 10, 32, ER_4K) }, + + { INFO("m25px32", 0x207116, 0, 64 << 10, 64, ER_4K) }, + { INFO("m25px32-s0", 0x207316, 0, 64 << 10, 64, ER_4K) }, + { INFO("m25px32-s1", 0x206316, 0, 64 << 10, 64, ER_4K) }, + { INFO("m25px64", 0x207117, 0, 64 << 10, 128, 0) }, + + /* Winbond -- w25x "blocks" are 64k, "sectors" are 4KiB */ + { INFO("w25x10", 0xef3011, 0, 64 << 10, 2, ER_4K) }, + { INFO("w25x20", 0xef3012, 0, 64 << 10, 4, ER_4K) }, + { INFO("w25x40", 0xef3013, 0, 64 << 10, 8, ER_4K) }, + { INFO("w25x80", 0xef3014, 0, 64 << 10, 16, ER_4K) }, + { INFO("w25x16", 0xef3015, 0, 64 << 10, 32, ER_4K) }, + { INFO("w25x32", 0xef3016, 0, 64 << 10, 64, ER_4K) }, + { INFO("w25q32", 0xef4016, 0, 64 << 10, 64, ER_4K) }, + { INFO("w25q32dw", 0xef6016, 0, 64 << 10, 64, ER_4K) }, + { INFO("w25x64", 0xef3017, 0, 64 << 10, 128, ER_4K) }, + { INFO("w25q64", 0xef4017, 0, 64 << 10, 128, ER_4K) }, + { INFO("w25q80", 0xef5014, 0, 64 << 10, 16, ER_4K) }, + { INFO("w25q80bl", 0xef4014, 0, 64 << 10, 16, ER_4K) }, + { INFO("w25q256", 0xef4019, 0, 64 << 10, 512, ER_4K) }, + + /* Numonyx -- n25q128 */ + { INFO("n25q128", 0x20ba18, 0, 64 << 10, 256, 0) }, +}; + +typedef enum { + NOP = 0, + WRSR = 0x1, + WRDI = 0x4, + RDSR = 0x5, + WREN = 0x6, + JEDEC_READ = 0x9f, + BULK_ERASE = 0xc7, + + READ = 0x3, + FAST_READ = 0xb, + DOR = 0x3b, + QOR = 0x6b, + DIOR = 0xbb, + QIOR = 0xeb, + + PP = 0x2, + DPP = 0xa2, + QPP = 0x32, + + ERASE_4K = 0x20, + ERASE_32K = 0x52, + ERASE_SECTOR = 0xd8, +} FlashCMD; + +typedef enum { + STATE_IDLE, + STATE_PAGE_PROGRAM, + STATE_READ, + STATE_COLLECTING_DATA, + STATE_READING_DATA, +} CMDState; + +typedef struct Flash { + SSISlave parent_obj; + + uint32_t r; + + BlockBackend *blk; + + uint8_t *storage; + uint32_t size; + int page_size; + + uint8_t state; + uint8_t data[16]; + uint32_t len; + uint32_t pos; + uint8_t needed_bytes; + uint8_t cmd_in_progress; + uint64_t cur_addr; + bool write_enable; + + int64_t dirty_page; + + const FlashPartInfo *pi; + +} Flash; + +typedef struct M25P80Class { + SSISlaveClass parent_class; + FlashPartInfo *pi; +} M25P80Class; + +#define TYPE_M25P80 "m25p80-generic" +#define M25P80(obj) \ + OBJECT_CHECK(Flash, (obj), TYPE_M25P80) +#define M25P80_CLASS(klass) \ + OBJECT_CLASS_CHECK(M25P80Class, (klass), TYPE_M25P80) +#define M25P80_GET_CLASS(obj) \ + OBJECT_GET_CLASS(M25P80Class, (obj), TYPE_M25P80) + +static void blk_sync_complete(void *opaque, int ret) +{ + /* do nothing. Masters do not directly interact with the backing store, + * only the working copy so no mutexing required. + */ +} + +static void flash_sync_page(Flash *s, int page) +{ + int blk_sector, nb_sectors; + QEMUIOVector iov; + + if (!s->blk || blk_is_read_only(s->blk)) { + return; + } + + blk_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE; + nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE); + qemu_iovec_init(&iov, 1); + qemu_iovec_add(&iov, s->storage + blk_sector * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); + blk_aio_writev(s->blk, blk_sector, &iov, nb_sectors, blk_sync_complete, + NULL); +} + +static inline void flash_sync_area(Flash *s, int64_t off, int64_t len) +{ + int64_t start, end, nb_sectors; + QEMUIOVector iov; + + if (!s->blk || blk_is_read_only(s->blk)) { + return; + } + + assert(!(len % BDRV_SECTOR_SIZE)); + start = off / BDRV_SECTOR_SIZE; + end = (off + len) / BDRV_SECTOR_SIZE; + nb_sectors = end - start; + qemu_iovec_init(&iov, 1); + qemu_iovec_add(&iov, s->storage + (start * BDRV_SECTOR_SIZE), + nb_sectors * BDRV_SECTOR_SIZE); + blk_aio_writev(s->blk, start, &iov, nb_sectors, blk_sync_complete, NULL); +} + +static void flash_erase(Flash *s, int offset, FlashCMD cmd) +{ + uint32_t len; + uint8_t capa_to_assert = 0; + + switch (cmd) { + case ERASE_4K: + len = 4 << 10; + capa_to_assert = ER_4K; + break; + case ERASE_32K: + len = 32 << 10; + capa_to_assert = ER_32K; + break; + case ERASE_SECTOR: + len = s->pi->sector_size; + break; + case BULK_ERASE: + len = s->size; + break; + default: + abort(); + } + + DB_PRINT_L(0, "offset = %#x, len = %d\n", offset, len); + if ((s->pi->flags & capa_to_assert) != capa_to_assert) { + qemu_log_mask(LOG_GUEST_ERROR, "M25P80: %d erase size not supported by" + " device\n", len); + } + + if (!s->write_enable) { + qemu_log_mask(LOG_GUEST_ERROR, "M25P80: erase with write protect!\n"); + return; + } + memset(s->storage + offset, 0xff, len); + flash_sync_area(s, offset, len); +} + +static inline void flash_sync_dirty(Flash *s, int64_t newpage) +{ + if (s->dirty_page >= 0 && s->dirty_page != newpage) { + flash_sync_page(s, s->dirty_page); + s->dirty_page = newpage; + } +} + +static inline +void flash_write8(Flash *s, uint64_t addr, uint8_t data) +{ + int64_t page = addr / s->pi->page_size; + uint8_t prev = s->storage[s->cur_addr]; + + if (!s->write_enable) { + qemu_log_mask(LOG_GUEST_ERROR, "M25P80: write with write protect!\n"); + } + + if ((prev ^ data) & data) { + DB_PRINT_L(1, "programming zero to one! addr=%" PRIx64 " %" PRIx8 + " -> %" PRIx8 "\n", addr, prev, data); + } + + if (s->pi->flags & WR_1) { + s->storage[s->cur_addr] = data; + } else { + s->storage[s->cur_addr] &= data; + } + + flash_sync_dirty(s, page); + s->dirty_page = page; +} + +static void complete_collecting_data(Flash *s) +{ + s->cur_addr = s->data[0] << 16; + s->cur_addr |= s->data[1] << 8; + s->cur_addr |= s->data[2]; + + s->state = STATE_IDLE; + + switch (s->cmd_in_progress) { + case DPP: + case QPP: + case PP: + s->state = STATE_PAGE_PROGRAM; + break; + case READ: + case FAST_READ: + case DOR: + case QOR: + case DIOR: + case QIOR: + s->state = STATE_READ; + break; + case ERASE_4K: + case ERASE_32K: + case ERASE_SECTOR: + flash_erase(s, s->cur_addr, s->cmd_in_progress); + break; + case WRSR: + if (s->write_enable) { + s->write_enable = false; + } + break; + default: + break; + } +} + +static void decode_new_cmd(Flash *s, uint32_t value) +{ + s->cmd_in_progress = value; + DB_PRINT_L(0, "decoded new command:%x\n", value); + + switch (value) { + + case ERASE_4K: + case ERASE_32K: + case ERASE_SECTOR: + case READ: + case DPP: + case QPP: + case PP: + s->needed_bytes = 3; + s->pos = 0; + s->len = 0; + s->state = STATE_COLLECTING_DATA; + break; + + case FAST_READ: + case DOR: + case QOR: + s->needed_bytes = 4; + s->pos = 0; + s->len = 0; + s->state = STATE_COLLECTING_DATA; + break; + + case DIOR: + switch ((s->pi->jedec >> 16) & 0xFF) { + case JEDEC_WINBOND: + case JEDEC_SPANSION: + s->needed_bytes = 4; + break; + case JEDEC_NUMONYX: + default: + s->needed_bytes = 5; + } + s->pos = 0; + s->len = 0; + s->state = STATE_COLLECTING_DATA; + break; + + case QIOR: + switch ((s->pi->jedec >> 16) & 0xFF) { + case JEDEC_WINBOND: + case JEDEC_SPANSION: + s->needed_bytes = 6; + break; + case JEDEC_NUMONYX: + default: + s->needed_bytes = 8; + } + s->pos = 0; + s->len = 0; + s->state = STATE_COLLECTING_DATA; + break; + + case WRSR: + if (s->write_enable) { + s->needed_bytes = 1; + s->pos = 0; + s->len = 0; + s->state = STATE_COLLECTING_DATA; + } + break; + + case WRDI: + s->write_enable = false; + break; + case WREN: + s->write_enable = true; + break; + + case RDSR: + s->data[0] = (!!s->write_enable) << 1; + s->pos = 0; + s->len = 1; + s->state = STATE_READING_DATA; + break; + + case JEDEC_READ: + DB_PRINT_L(0, "populated jedec code\n"); + s->data[0] = (s->pi->jedec >> 16) & 0xff; + s->data[1] = (s->pi->jedec >> 8) & 0xff; + s->data[2] = s->pi->jedec & 0xff; + if (s->pi->ext_jedec) { + s->data[3] = (s->pi->ext_jedec >> 8) & 0xff; + s->data[4] = s->pi->ext_jedec & 0xff; + s->len = 5; + } else { + s->len = 3; + } + s->pos = 0; + s->state = STATE_READING_DATA; + break; + + case BULK_ERASE: + if (s->write_enable) { + DB_PRINT_L(0, "chip erase\n"); + flash_erase(s, 0, BULK_ERASE); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "M25P80: chip erase with write " + "protect!\n"); + } + break; + case NOP: + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Unknown cmd %x\n", value); + break; + } +} + +static int m25p80_cs(SSISlave *ss, bool select) +{ + Flash *s = M25P80(ss); + + if (select) { + s->len = 0; + s->pos = 0; + s->state = STATE_IDLE; + flash_sync_dirty(s, -1); + } + + DB_PRINT_L(0, "%sselect\n", select ? "de" : ""); + + return 0; +} + +static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx) +{ + Flash *s = M25P80(ss); + uint32_t r = 0; + + switch (s->state) { + + case STATE_PAGE_PROGRAM: + DB_PRINT_L(1, "page program cur_addr=%#" PRIx64 " data=%" PRIx8 "\n", + s->cur_addr, (uint8_t)tx); + flash_write8(s, s->cur_addr, (uint8_t)tx); + s->cur_addr++; + break; + + case STATE_READ: + r = s->storage[s->cur_addr]; + DB_PRINT_L(1, "READ 0x%" PRIx64 "=%" PRIx8 "\n", s->cur_addr, + (uint8_t)r); + s->cur_addr = (s->cur_addr + 1) % s->size; + break; + + case STATE_COLLECTING_DATA: + s->data[s->len] = (uint8_t)tx; + s->len++; + + if (s->len == s->needed_bytes) { + complete_collecting_data(s); + } + break; + + case STATE_READING_DATA: + r = s->data[s->pos]; + s->pos++; + if (s->pos == s->len) { + s->pos = 0; + s->state = STATE_IDLE; + } + break; + + default: + case STATE_IDLE: + decode_new_cmd(s, (uint8_t)tx); + break; + } + + return r; +} + +static int m25p80_init(SSISlave *ss) +{ + DriveInfo *dinfo; + Flash *s = M25P80(ss); + M25P80Class *mc = M25P80_GET_CLASS(s); + + s->pi = mc->pi; + + s->size = s->pi->sector_size * s->pi->n_sectors; + s->dirty_page = -1; + + /* FIXME use a qdev drive property instead of drive_get_next() */ + dinfo = drive_get_next(IF_MTD); + + if (dinfo) { + DB_PRINT_L(0, "Binding to IF_MTD drive\n"); + s->blk = blk_by_legacy_dinfo(dinfo); + blk_attach_dev_nofail(s->blk, s); + + s->storage = blk_blockalign(s->blk, s->size); + + /* FIXME: Move to late init */ + if (blk_read(s->blk, 0, s->storage, + DIV_ROUND_UP(s->size, BDRV_SECTOR_SIZE))) { + fprintf(stderr, "Failed to initialize SPI flash!\n"); + return 1; + } + } else { + DB_PRINT_L(0, "No BDRV - binding to RAM\n"); + s->storage = blk_blockalign(NULL, s->size); + memset(s->storage, 0xFF, s->size); + } + + return 0; +} + +static void m25p80_pre_save(void *opaque) +{ + flash_sync_dirty((Flash *)opaque, -1); +} + +static const VMStateDescription vmstate_m25p80 = { + .name = "xilinx_spi", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = m25p80_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT8(state, Flash), + VMSTATE_UINT8_ARRAY(data, Flash, 16), + VMSTATE_UINT32(len, Flash), + VMSTATE_UINT32(pos, Flash), + VMSTATE_UINT8(needed_bytes, Flash), + VMSTATE_UINT8(cmd_in_progress, Flash), + VMSTATE_UINT64(cur_addr, Flash), + VMSTATE_BOOL(write_enable, Flash), + VMSTATE_END_OF_LIST() + } +}; + +static void m25p80_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SSISlaveClass *k = SSI_SLAVE_CLASS(klass); + M25P80Class *mc = M25P80_CLASS(klass); + + k->init = m25p80_init; + k->transfer = m25p80_transfer8; + k->set_cs = m25p80_cs; + k->cs_polarity = SSI_CS_LOW; + dc->vmsd = &vmstate_m25p80; + mc->pi = data; +} + +static const TypeInfo m25p80_info = { + .name = TYPE_M25P80, + .parent = TYPE_SSI_SLAVE, + .instance_size = sizeof(Flash), + .class_size = sizeof(M25P80Class), + .abstract = true, +}; + +static void m25p80_register_types(void) +{ + int i; + + type_register_static(&m25p80_info); + for (i = 0; i < ARRAY_SIZE(known_devices); ++i) { + TypeInfo ti = { + .name = known_devices[i].part_name, + .parent = TYPE_M25P80, + .class_init = m25p80_class_init, + .class_data = (void *)&known_devices[i], + }; + type_register(&ti); + } +} + +type_init(m25p80_register_types) diff --git a/qemu/hw/block/nand.c b/qemu/hw/block/nand.c new file mode 100644 index 000000000..61d2cec03 --- /dev/null +++ b/qemu/hw/block/nand.c @@ -0,0 +1,799 @@ +/* + * Flash NAND memory emulation. Based on "16M x 8 Bit NAND Flash + * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from + * Samsung Electronic. + * + * Copyright (c) 2006 Openedhand Ltd. + * Written by Andrzej Zaborowski <balrog@zabor.org> + * + * Support for additional features based on "MT29F2G16ABCWP 2Gx16" + * datasheet from Micron Technology and "NAND02G-B2C" datasheet + * from ST Microelectronics. + * + * This code is licensed under the GNU GPL v2. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef NAND_IO + +# include "hw/hw.h" +# include "hw/block/flash.h" +#include "sysemu/block-backend.h" +#include "hw/qdev.h" +#include "qemu/error-report.h" + +# define NAND_CMD_READ0 0x00 +# define NAND_CMD_READ1 0x01 +# define NAND_CMD_READ2 0x50 +# define NAND_CMD_LPREAD2 0x30 +# define NAND_CMD_NOSERIALREAD2 0x35 +# define NAND_CMD_RANDOMREAD1 0x05 +# define NAND_CMD_RANDOMREAD2 0xe0 +# define NAND_CMD_READID 0x90 +# define NAND_CMD_RESET 0xff +# define NAND_CMD_PAGEPROGRAM1 0x80 +# define NAND_CMD_PAGEPROGRAM2 0x10 +# define NAND_CMD_CACHEPROGRAM2 0x15 +# define NAND_CMD_BLOCKERASE1 0x60 +# define NAND_CMD_BLOCKERASE2 0xd0 +# define NAND_CMD_READSTATUS 0x70 +# define NAND_CMD_COPYBACKPRG1 0x85 + +# define NAND_IOSTATUS_ERROR (1 << 0) +# define NAND_IOSTATUS_PLANE0 (1 << 1) +# define NAND_IOSTATUS_PLANE1 (1 << 2) +# define NAND_IOSTATUS_PLANE2 (1 << 3) +# define NAND_IOSTATUS_PLANE3 (1 << 4) +# define NAND_IOSTATUS_READY (1 << 6) +# define NAND_IOSTATUS_UNPROTCT (1 << 7) + +# define MAX_PAGE 0x800 +# define MAX_OOB 0x40 + +typedef struct NANDFlashState NANDFlashState; +struct NANDFlashState { + DeviceState parent_obj; + + uint8_t manf_id, chip_id; + uint8_t buswidth; /* in BYTES */ + int size, pages; + int page_shift, oob_shift, erase_shift, addr_shift; + uint8_t *storage; + BlockBackend *blk; + int mem_oob; + + uint8_t cle, ale, ce, wp, gnd; + + uint8_t io[MAX_PAGE + MAX_OOB + 0x400]; + uint8_t *ioaddr; + int iolen; + + uint32_t cmd; + uint64_t addr; + int addrlen; + int status; + int offset; + + void (*blk_write)(NANDFlashState *s); + void (*blk_erase)(NANDFlashState *s); + void (*blk_load)(NANDFlashState *s, uint64_t addr, int offset); + + uint32_t ioaddr_vmstate; +}; + +#define TYPE_NAND "nand" + +#define NAND(obj) \ + OBJECT_CHECK(NANDFlashState, (obj), TYPE_NAND) + +static void mem_and(uint8_t *dest, const uint8_t *src, size_t n) +{ + /* Like memcpy() but we logical-AND the data into the destination */ + int i; + for (i = 0; i < n; i++) { + dest[i] &= src[i]; + } +} + +# define NAND_NO_AUTOINCR 0x00000001 +# define NAND_BUSWIDTH_16 0x00000002 +# define NAND_NO_PADDING 0x00000004 +# define NAND_CACHEPRG 0x00000008 +# define NAND_COPYBACK 0x00000010 +# define NAND_IS_AND 0x00000020 +# define NAND_4PAGE_ARRAY 0x00000040 +# define NAND_NO_READRDY 0x00000100 +# define NAND_SAMSUNG_LP (NAND_NO_PADDING | NAND_COPYBACK) + +# define NAND_IO + +# define PAGE(addr) ((addr) >> ADDR_SHIFT) +# define PAGE_START(page) (PAGE(page) * (PAGE_SIZE + OOB_SIZE)) +# define PAGE_MASK ((1 << ADDR_SHIFT) - 1) +# define OOB_SHIFT (PAGE_SHIFT - 5) +# define OOB_SIZE (1 << OOB_SHIFT) +# define SECTOR(addr) ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT)) +# define SECTOR_OFFSET(addr) ((addr) & ((511 >> PAGE_SHIFT) << 8)) + +# define PAGE_SIZE 256 +# define PAGE_SHIFT 8 +# define PAGE_SECTORS 1 +# define ADDR_SHIFT 8 +# include "nand.c" +# define PAGE_SIZE 512 +# define PAGE_SHIFT 9 +# define PAGE_SECTORS 1 +# define ADDR_SHIFT 8 +# include "nand.c" +# define PAGE_SIZE 2048 +# define PAGE_SHIFT 11 +# define PAGE_SECTORS 4 +# define ADDR_SHIFT 16 +# include "nand.c" + +/* Information based on Linux drivers/mtd/nand/nand_ids.c */ +static const struct { + int size; + int width; + int page_shift; + int erase_shift; + uint32_t options; +} nand_flash_ids[0x100] = { + [0 ... 0xff] = { 0 }, + + [0x6e] = { 1, 8, 8, 4, 0 }, + [0x64] = { 2, 8, 8, 4, 0 }, + [0x6b] = { 4, 8, 9, 4, 0 }, + [0xe8] = { 1, 8, 8, 4, 0 }, + [0xec] = { 1, 8, 8, 4, 0 }, + [0xea] = { 2, 8, 8, 4, 0 }, + [0xd5] = { 4, 8, 9, 4, 0 }, + [0xe3] = { 4, 8, 9, 4, 0 }, + [0xe5] = { 4, 8, 9, 4, 0 }, + [0xd6] = { 8, 8, 9, 4, 0 }, + + [0x39] = { 8, 8, 9, 4, 0 }, + [0xe6] = { 8, 8, 9, 4, 0 }, + [0x49] = { 8, 16, 9, 4, NAND_BUSWIDTH_16 }, + [0x59] = { 8, 16, 9, 4, NAND_BUSWIDTH_16 }, + + [0x33] = { 16, 8, 9, 5, 0 }, + [0x73] = { 16, 8, 9, 5, 0 }, + [0x43] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 }, + [0x53] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 }, + + [0x35] = { 32, 8, 9, 5, 0 }, + [0x75] = { 32, 8, 9, 5, 0 }, + [0x45] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 }, + [0x55] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 }, + + [0x36] = { 64, 8, 9, 5, 0 }, + [0x76] = { 64, 8, 9, 5, 0 }, + [0x46] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 }, + [0x56] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 }, + + [0x78] = { 128, 8, 9, 5, 0 }, + [0x39] = { 128, 8, 9, 5, 0 }, + [0x79] = { 128, 8, 9, 5, 0 }, + [0x72] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, + [0x49] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, + [0x74] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, + [0x59] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, + + [0x71] = { 256, 8, 9, 5, 0 }, + + /* + * These are the new chips with large page size. The pagesize and the + * erasesize is determined from the extended id bytes + */ +# define LP_OPTIONS (NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR) +# define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16) + + /* 512 Megabit */ + [0xa2] = { 64, 8, 0, 0, LP_OPTIONS }, + [0xf2] = { 64, 8, 0, 0, LP_OPTIONS }, + [0xb2] = { 64, 16, 0, 0, LP_OPTIONS16 }, + [0xc2] = { 64, 16, 0, 0, LP_OPTIONS16 }, + + /* 1 Gigabit */ + [0xa1] = { 128, 8, 0, 0, LP_OPTIONS }, + [0xf1] = { 128, 8, 0, 0, LP_OPTIONS }, + [0xb1] = { 128, 16, 0, 0, LP_OPTIONS16 }, + [0xc1] = { 128, 16, 0, 0, LP_OPTIONS16 }, + + /* 2 Gigabit */ + [0xaa] = { 256, 8, 0, 0, LP_OPTIONS }, + [0xda] = { 256, 8, 0, 0, LP_OPTIONS }, + [0xba] = { 256, 16, 0, 0, LP_OPTIONS16 }, + [0xca] = { 256, 16, 0, 0, LP_OPTIONS16 }, + + /* 4 Gigabit */ + [0xac] = { 512, 8, 0, 0, LP_OPTIONS }, + [0xdc] = { 512, 8, 0, 0, LP_OPTIONS }, + [0xbc] = { 512, 16, 0, 0, LP_OPTIONS16 }, + [0xcc] = { 512, 16, 0, 0, LP_OPTIONS16 }, + + /* 8 Gigabit */ + [0xa3] = { 1024, 8, 0, 0, LP_OPTIONS }, + [0xd3] = { 1024, 8, 0, 0, LP_OPTIONS }, + [0xb3] = { 1024, 16, 0, 0, LP_OPTIONS16 }, + [0xc3] = { 1024, 16, 0, 0, LP_OPTIONS16 }, + + /* 16 Gigabit */ + [0xa5] = { 2048, 8, 0, 0, LP_OPTIONS }, + [0xd5] = { 2048, 8, 0, 0, LP_OPTIONS }, + [0xb5] = { 2048, 16, 0, 0, LP_OPTIONS16 }, + [0xc5] = { 2048, 16, 0, 0, LP_OPTIONS16 }, +}; + +static void nand_reset(DeviceState *dev) +{ + NANDFlashState *s = NAND(dev); + s->cmd = NAND_CMD_READ0; + s->addr = 0; + s->addrlen = 0; + s->iolen = 0; + s->offset = 0; + s->status &= NAND_IOSTATUS_UNPROTCT; + s->status |= NAND_IOSTATUS_READY; +} + +static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value) +{ + s->ioaddr[s->iolen++] = value; + for (value = s->buswidth; --value;) { + s->ioaddr[s->iolen++] = 0; + } +} + +static void nand_command(NANDFlashState *s) +{ + unsigned int offset; + switch (s->cmd) { + case NAND_CMD_READ0: + s->iolen = 0; + break; + + case NAND_CMD_READID: + s->ioaddr = s->io; + s->iolen = 0; + nand_pushio_byte(s, s->manf_id); + nand_pushio_byte(s, s->chip_id); + nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */ + if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { + /* Page Size, Block Size, Spare Size; bit 6 indicates + * 8 vs 16 bit width NAND. + */ + nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15); + } else { + nand_pushio_byte(s, 0xc0); /* Multi-plane */ + } + break; + + case NAND_CMD_RANDOMREAD2: + case NAND_CMD_NOSERIALREAD2: + if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP)) + break; + offset = s->addr & ((1 << s->addr_shift) - 1); + s->blk_load(s, s->addr, offset); + if (s->gnd) + s->iolen = (1 << s->page_shift) - offset; + else + s->iolen = (1 << s->page_shift) + (1 << s->oob_shift) - offset; + break; + + case NAND_CMD_RESET: + nand_reset(DEVICE(s)); + break; + + case NAND_CMD_PAGEPROGRAM1: + s->ioaddr = s->io; + s->iolen = 0; + break; + + case NAND_CMD_PAGEPROGRAM2: + if (s->wp) { + s->blk_write(s); + } + break; + + case NAND_CMD_BLOCKERASE1: + break; + + case NAND_CMD_BLOCKERASE2: + s->addr &= (1ull << s->addrlen * 8) - 1; + s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ? + 16 : 8; + + if (s->wp) { + s->blk_erase(s); + } + break; + + case NAND_CMD_READSTATUS: + s->ioaddr = s->io; + s->iolen = 0; + nand_pushio_byte(s, s->status); + break; + + default: + printf("%s: Unknown NAND command 0x%02x\n", __FUNCTION__, s->cmd); + } +} + +static void nand_pre_save(void *opaque) +{ + NANDFlashState *s = NAND(opaque); + + s->ioaddr_vmstate = s->ioaddr - s->io; +} + +static int nand_post_load(void *opaque, int version_id) +{ + NANDFlashState *s = NAND(opaque); + + if (s->ioaddr_vmstate > sizeof(s->io)) { + return -EINVAL; + } + s->ioaddr = s->io + s->ioaddr_vmstate; + + return 0; +} + +static const VMStateDescription vmstate_nand = { + .name = "nand", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = nand_pre_save, + .post_load = nand_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(cle, NANDFlashState), + VMSTATE_UINT8(ale, NANDFlashState), + VMSTATE_UINT8(ce, NANDFlashState), + VMSTATE_UINT8(wp, NANDFlashState), + VMSTATE_UINT8(gnd, NANDFlashState), + VMSTATE_BUFFER(io, NANDFlashState), + VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState), + VMSTATE_INT32(iolen, NANDFlashState), + VMSTATE_UINT32(cmd, NANDFlashState), + VMSTATE_UINT64(addr, NANDFlashState), + VMSTATE_INT32(addrlen, NANDFlashState), + VMSTATE_INT32(status, NANDFlashState), + VMSTATE_INT32(offset, NANDFlashState), + /* XXX: do we want to save s->storage too? */ + VMSTATE_END_OF_LIST() + } +}; + +static void nand_realize(DeviceState *dev, Error **errp) +{ + int pagesize; + NANDFlashState *s = NAND(dev); + + s->buswidth = nand_flash_ids[s->chip_id].width >> 3; + s->size = nand_flash_ids[s->chip_id].size << 20; + if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { + s->page_shift = 11; + s->erase_shift = 6; + } else { + s->page_shift = nand_flash_ids[s->chip_id].page_shift; + s->erase_shift = nand_flash_ids[s->chip_id].erase_shift; + } + + switch (1 << s->page_shift) { + case 256: + nand_init_256(s); + break; + case 512: + nand_init_512(s); + break; + case 2048: + nand_init_2048(s); + break; + default: + error_setg(errp, "Unsupported NAND block size %#x", + 1 << s->page_shift); + return; + } + + pagesize = 1 << s->oob_shift; + s->mem_oob = 1; + if (s->blk) { + if (blk_is_read_only(s->blk)) { + error_setg(errp, "Can't use a read-only drive"); + return; + } + if (blk_getlength(s->blk) >= + (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { + pagesize = 0; + s->mem_oob = 0; + } + } else { + pagesize += 1 << s->page_shift; + } + if (pagesize) { + s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize), + 0xff, s->pages * pagesize); + } + /* Give s->ioaddr a sane value in case we save state before it is used. */ + s->ioaddr = s->io; +} + +static Property nand_properties[] = { + DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0), + DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0), + DEFINE_PROP_DRIVE("drive", NANDFlashState, blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nand_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = nand_realize; + dc->reset = nand_reset; + dc->vmsd = &vmstate_nand; + dc->props = nand_properties; +} + +static const TypeInfo nand_info = { + .name = TYPE_NAND, + .parent = TYPE_DEVICE, + .instance_size = sizeof(NANDFlashState), + .class_init = nand_class_init, +}; + +static void nand_register_types(void) +{ + type_register_static(&nand_info); +} + +/* + * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins. Chip + * outputs are R/B and eight I/O pins. + * + * CE, WP and R/B are active low. + */ +void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, + uint8_t ce, uint8_t wp, uint8_t gnd) +{ + NANDFlashState *s = NAND(dev); + + s->cle = cle; + s->ale = ale; + s->ce = ce; + s->wp = wp; + s->gnd = gnd; + if (wp) { + s->status |= NAND_IOSTATUS_UNPROTCT; + } else { + s->status &= ~NAND_IOSTATUS_UNPROTCT; + } +} + +void nand_getpins(DeviceState *dev, int *rb) +{ + *rb = 1; +} + +void nand_setio(DeviceState *dev, uint32_t value) +{ + int i; + NANDFlashState *s = NAND(dev); + + if (!s->ce && s->cle) { + if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { + if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2) + return; + if (value == NAND_CMD_RANDOMREAD1) { + s->addr &= ~((1 << s->addr_shift) - 1); + s->addrlen = 0; + return; + } + } + if (value == NAND_CMD_READ0) { + s->offset = 0; + } else if (value == NAND_CMD_READ1) { + s->offset = 0x100; + value = NAND_CMD_READ0; + } else if (value == NAND_CMD_READ2) { + s->offset = 1 << s->page_shift; + value = NAND_CMD_READ0; + } + + s->cmd = value; + + if (s->cmd == NAND_CMD_READSTATUS || + s->cmd == NAND_CMD_PAGEPROGRAM2 || + s->cmd == NAND_CMD_BLOCKERASE1 || + s->cmd == NAND_CMD_BLOCKERASE2 || + s->cmd == NAND_CMD_NOSERIALREAD2 || + s->cmd == NAND_CMD_RANDOMREAD2 || + s->cmd == NAND_CMD_RESET) { + nand_command(s); + } + + if (s->cmd != NAND_CMD_RANDOMREAD2) { + s->addrlen = 0; + } + } + + if (s->ale) { + unsigned int shift = s->addrlen * 8; + unsigned int mask = ~(0xff << shift); + unsigned int v = value << shift; + + s->addr = (s->addr & mask) | v; + s->addrlen ++; + + switch (s->addrlen) { + case 1: + if (s->cmd == NAND_CMD_READID) { + nand_command(s); + } + break; + case 2: /* fix cache address as a byte address */ + s->addr <<= (s->buswidth - 1); + break; + case 3: + if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && + (s->cmd == NAND_CMD_READ0 || + s->cmd == NAND_CMD_PAGEPROGRAM1)) { + nand_command(s); + } + break; + case 4: + if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && + nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */ + (s->cmd == NAND_CMD_READ0 || + s->cmd == NAND_CMD_PAGEPROGRAM1)) { + nand_command(s); + } + break; + case 5: + if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && + nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */ + (s->cmd == NAND_CMD_READ0 || + s->cmd == NAND_CMD_PAGEPROGRAM1)) { + nand_command(s); + } + break; + default: + break; + } + } + + if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) { + if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) { + for (i = s->buswidth; i--; value >>= 8) { + s->io[s->iolen ++] = (uint8_t) (value & 0xff); + } + } + } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) { + if ((s->addr & ((1 << s->addr_shift) - 1)) < + (1 << s->page_shift) + (1 << s->oob_shift)) { + for (i = s->buswidth; i--; s->addr++, value >>= 8) { + s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] = + (uint8_t) (value & 0xff); + } + } + } +} + +uint32_t nand_getio(DeviceState *dev) +{ + int offset; + uint32_t x = 0; + NANDFlashState *s = NAND(dev); + + /* Allow sequential reading */ + if (!s->iolen && s->cmd == NAND_CMD_READ0) { + offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset; + s->offset = 0; + + s->blk_load(s, s->addr, offset); + if (s->gnd) + s->iolen = (1 << s->page_shift) - offset; + else + s->iolen = (1 << s->page_shift) + (1 << s->oob_shift) - offset; + } + + if (s->ce || s->iolen <= 0) { + return 0; + } + + for (offset = s->buswidth; offset--;) { + x |= s->ioaddr[offset] << (offset << 3); + } + /* after receiving READ STATUS command all subsequent reads will + * return the status register value until another command is issued + */ + if (s->cmd != NAND_CMD_READSTATUS) { + s->addr += s->buswidth; + s->ioaddr += s->buswidth; + s->iolen -= s->buswidth; + } + return x; +} + +uint32_t nand_getbuswidth(DeviceState *dev) +{ + NANDFlashState *s = (NANDFlashState *) dev; + return s->buswidth << 3; +} + +DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id) +{ + DeviceState *dev; + + if (nand_flash_ids[chip_id].size == 0) { + hw_error("%s: Unsupported NAND chip ID.\n", __FUNCTION__); + } + dev = DEVICE(object_new(TYPE_NAND)); + qdev_prop_set_uint8(dev, "manufacturer_id", manf_id); + qdev_prop_set_uint8(dev, "chip_id", chip_id); + if (blk) { + qdev_prop_set_drive_nofail(dev, "drive", blk); + } + + qdev_init_nofail(dev); + return dev; +} + +type_init(nand_register_types) + +#else + +/* Program a single page */ +static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState *s) +{ + uint64_t off, page, sector, soff; + uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200]; + if (PAGE(s->addr) >= s->pages) + return; + + if (!s->blk) { + mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) + + s->offset, s->io, s->iolen); + } else if (s->mem_oob) { + sector = SECTOR(s->addr); + off = (s->addr & PAGE_MASK) + s->offset; + soff = SECTOR_OFFSET(s->addr); + if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS) < 0) { + printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); + return; + } + + mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, PAGE_SIZE - off)); + if (off + s->iolen > PAGE_SIZE) { + page = PAGE(s->addr); + mem_and(s->storage + (page << OOB_SHIFT), s->io + PAGE_SIZE - off, + MIN(OOB_SIZE, off + s->iolen - PAGE_SIZE)); + } + + if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS) < 0) { + printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); + } + } else { + off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset; + sector = off >> 9; + soff = off & 0x1ff; + if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) { + printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); + return; + } + + mem_and(iobuf + soff, s->io, s->iolen); + + if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) { + printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); + } + } + s->offset = 0; +} + +/* Erase a single block */ +static void glue(nand_blk_erase_, PAGE_SIZE)(NANDFlashState *s) +{ + uint64_t i, page, addr; + uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, }; + addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1); + + if (PAGE(addr) >= s->pages) { + return; + } + + if (!s->blk) { + memset(s->storage + PAGE_START(addr), + 0xff, (PAGE_SIZE + OOB_SIZE) << s->erase_shift); + } else if (s->mem_oob) { + memset(s->storage + (PAGE(addr) << OOB_SHIFT), + 0xff, OOB_SIZE << s->erase_shift); + i = SECTOR(addr); + page = SECTOR(addr + (ADDR_SHIFT + s->erase_shift)); + for (; i < page; i ++) + if (blk_write(s->blk, i, iobuf, 1) < 0) { + printf("%s: write error in sector %" PRIu64 "\n", __func__, i); + } + } else { + addr = PAGE_START(addr); + page = addr >> 9; + if (blk_read(s->blk, page, iobuf, 1) < 0) { + printf("%s: read error in sector %" PRIu64 "\n", __func__, page); + } + memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1); + if (blk_write(s->blk, page, iobuf, 1) < 0) { + printf("%s: write error in sector %" PRIu64 "\n", __func__, page); + } + + memset(iobuf, 0xff, 0x200); + i = (addr & ~0x1ff) + 0x200; + for (addr += ((PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200; + i < addr; i += 0x200) { + if (blk_write(s->blk, i >> 9, iobuf, 1) < 0) { + printf("%s: write error in sector %" PRIu64 "\n", + __func__, i >> 9); + } + } + + page = i >> 9; + if (blk_read(s->blk, page, iobuf, 1) < 0) { + printf("%s: read error in sector %" PRIu64 "\n", __func__, page); + } + memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1); + if (blk_write(s->blk, page, iobuf, 1) < 0) { + printf("%s: write error in sector %" PRIu64 "\n", __func__, page); + } + } +} + +static void glue(nand_blk_load_, PAGE_SIZE)(NANDFlashState *s, + uint64_t addr, int offset) +{ + if (PAGE(addr) >= s->pages) { + return; + } + + if (s->blk) { + if (s->mem_oob) { + if (blk_read(s->blk, SECTOR(addr), s->io, PAGE_SECTORS) < 0) { + printf("%s: read error in sector %" PRIu64 "\n", + __func__, SECTOR(addr)); + } + memcpy(s->io + SECTOR_OFFSET(s->addr) + PAGE_SIZE, + s->storage + (PAGE(s->addr) << OOB_SHIFT), + OOB_SIZE); + s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset; + } else { + if (blk_read(s->blk, PAGE_START(addr) >> 9, + s->io, (PAGE_SECTORS + 2)) < 0) { + printf("%s: read error in sector %" PRIu64 "\n", + __func__, PAGE_START(addr) >> 9); + } + s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset; + } + } else { + memcpy(s->io, s->storage + PAGE_START(s->addr) + + offset, PAGE_SIZE + OOB_SIZE - offset); + s->ioaddr = s->io; + } +} + +static void glue(nand_init_, PAGE_SIZE)(NANDFlashState *s) +{ + s->oob_shift = PAGE_SHIFT - 5; + s->pages = s->size >> PAGE_SHIFT; + s->addr_shift = ADDR_SHIFT; + + s->blk_erase = glue(nand_blk_erase_, PAGE_SIZE); + s->blk_write = glue(nand_blk_write_, PAGE_SIZE); + s->blk_load = glue(nand_blk_load_, PAGE_SIZE); +} + +# undef PAGE_SIZE +# undef PAGE_SHIFT +# undef PAGE_SECTORS +# undef ADDR_SHIFT +#endif /* NAND_IO */ diff --git a/qemu/hw/block/nvme.c b/qemu/hw/block/nvme.c new file mode 100644 index 000000000..40d488032 --- /dev/null +++ b/qemu/hw/block/nvme.c @@ -0,0 +1,967 @@ +/* + * QEMU NVM Express Controller + * + * Copyright (c) 2012, Intel Corporation + * + * Written by Keith Busch <keith.busch@intel.com> + * + * This code is licensed under the GNU GPL v2 or later. + */ + +/** + * Reference Specs: http://www.nvmexpress.org, 1.1, 1.0e + * + * http://www.nvmexpress.org/resources/ + */ + +/** + * Usage: add options: + * -drive file=<file>,if=none,id=<drive_id> + * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]> + */ + +#include <hw/block/block.h> +#include <hw/hw.h> +#include <hw/pci/msix.h> +#include <hw/pci/pci.h> +#include "sysemu/sysemu.h" +#include "qapi/visitor.h" +#include "sysemu/block-backend.h" + +#include "nvme.h" + +static void nvme_process_sq(void *opaque); + +static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) +{ + return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; +} + +static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) +{ + return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1; +} + +static void nvme_inc_cq_tail(NvmeCQueue *cq) +{ + cq->tail++; + if (cq->tail >= cq->size) { + cq->tail = 0; + cq->phase = !cq->phase; + } +} + +static void nvme_inc_sq_head(NvmeSQueue *sq) +{ + sq->head = (sq->head + 1) % sq->size; +} + +static uint8_t nvme_cq_full(NvmeCQueue *cq) +{ + return (cq->tail + 1) % cq->size == cq->head; +} + +static uint8_t nvme_sq_empty(NvmeSQueue *sq) +{ + return sq->head == sq->tail; +} + +static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) +{ + if (cq->irq_enabled) { + if (msix_enabled(&(n->parent_obj))) { + msix_notify(&(n->parent_obj), cq->vector); + } else { + pci_irq_pulse(&n->parent_obj); + } + } +} + +static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, + uint32_t len, NvmeCtrl *n) +{ + hwaddr trans_len = n->page_size - (prp1 % n->page_size); + trans_len = MIN(len, trans_len); + int num_prps = (len >> n->page_bits) + 1; + + if (!prp1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); + qemu_sglist_add(qsg, prp1, trans_len); + len -= trans_len; + if (len) { + if (!prp2) { + goto unmap; + } + if (len > n->page_size) { + uint64_t prp_list[n->max_prp_ents]; + uint32_t nents, prp_trans; + int i = 0; + + nents = (len + n->page_size - 1) >> n->page_bits; + prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); + pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans); + while (len != 0) { + uint64_t prp_ent = le64_to_cpu(prp_list[i]); + + if (i == n->max_prp_ents - 1 && len > n->page_size) { + if (!prp_ent || prp_ent & (n->page_size - 1)) { + goto unmap; + } + + i = 0; + nents = (len + n->page_size - 1) >> n->page_bits; + prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); + pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list, + prp_trans); + prp_ent = le64_to_cpu(prp_list[i]); + } + + if (!prp_ent || prp_ent & (n->page_size - 1)) { + goto unmap; + } + + trans_len = MIN(len, n->page_size); + qemu_sglist_add(qsg, prp_ent, trans_len); + len -= trans_len; + i++; + } + } else { + if (prp2 & (n->page_size - 1)) { + goto unmap; + } + qemu_sglist_add(qsg, prp2, len); + } + } + return NVME_SUCCESS; + + unmap: + qemu_sglist_destroy(qsg); + return NVME_INVALID_FIELD | NVME_DNR; +} + +static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + uint64_t prp1, uint64_t prp2) +{ + QEMUSGList qsg; + + if (nvme_map_prp(&qsg, prp1, prp2, len, n)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (dma_buf_read(ptr, len, &qsg)) { + qemu_sglist_destroy(&qsg); + return NVME_INVALID_FIELD | NVME_DNR; + } + qemu_sglist_destroy(&qsg); + return NVME_SUCCESS; +} + +static void nvme_post_cqes(void *opaque) +{ + NvmeCQueue *cq = opaque; + NvmeCtrl *n = cq->ctrl; + NvmeRequest *req, *next; + + QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { + NvmeSQueue *sq; + hwaddr addr; + + if (nvme_cq_full(cq)) { + break; + } + + QTAILQ_REMOVE(&cq->req_list, req, entry); + sq = req->sq; + req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); + req->cqe.sq_id = cpu_to_le16(sq->sqid); + req->cqe.sq_head = cpu_to_le16(sq->head); + addr = cq->dma_addr + cq->tail * n->cqe_size; + nvme_inc_cq_tail(cq); + pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, + sizeof(req->cqe)); + QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); + } + nvme_isr_notify(n, cq); +} + +static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) +{ + assert(cq->cqid == req->sq->cqid); + QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); + QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); + timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +} + +static void nvme_rw_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeSQueue *sq = req->sq; + NvmeCtrl *n = sq->ctrl; + NvmeCQueue *cq = n->cq[sq->cqid]; + + block_acct_done(blk_get_stats(n->conf.blk), &req->acct); + if (!ret) { + req->status = NVME_SUCCESS; + } else { + req->status = NVME_INTERNAL_DEV_ERROR; + } + if (req->has_sg) { + qemu_sglist_destroy(&req->qsg); + } + nvme_enqueue_req_completion(cq, req); +} + +static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + req->has_sg = false; + block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, + BLOCK_ACCT_FLUSH); + req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)cmd; + uint32_t nlb = le32_to_cpu(rw->nlb) + 1; + uint64_t slba = le64_to_cpu(rw->slba); + uint64_t prp1 = le64_to_cpu(rw->prp1); + uint64_t prp2 = le64_to_cpu(rw->prp2); + + uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; + uint64_t data_size = (uint64_t)nlb << data_shift; + uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); + int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; + + if ((slba + nlb) > ns->id_ns.nsze) { + return NVME_LBA_RANGE | NVME_DNR; + } + if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + assert((nlb << data_shift) == req->qsg.size); + + req->has_sg = true; + dma_acct_start(n->conf.blk, &req->acct, &req->qsg, + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + req->aiocb = is_write ? + dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) : + dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req); + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeNamespace *ns; + uint32_t nsid = le32_to_cpu(cmd->nsid); + + if (nsid == 0 || nsid > n->num_namespaces) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = &n->namespaces[nsid - 1]; + switch (cmd->opcode) { + case NVME_CMD_FLUSH: + return nvme_flush(n, ns, cmd, req); + case NVME_CMD_WRITE: + case NVME_CMD_READ: + return nvme_rw(n, ns, cmd, req); + default: + return NVME_INVALID_OPCODE | NVME_DNR; + } +} + +static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) +{ + n->sq[sq->sqid] = NULL; + timer_del(sq->timer); + timer_free(sq->timer); + g_free(sq->io_req); + if (sq->sqid) { + g_free(sq); + } +} + +static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) +{ + NvmeDeleteQ *c = (NvmeDeleteQ *)cmd; + NvmeRequest *req, *next; + NvmeSQueue *sq; + NvmeCQueue *cq; + uint16_t qid = le16_to_cpu(c->qid); + + if (!qid || nvme_check_sqid(n, qid)) { + return NVME_INVALID_QID | NVME_DNR; + } + + sq = n->sq[qid]; + while (!QTAILQ_EMPTY(&sq->out_req_list)) { + req = QTAILQ_FIRST(&sq->out_req_list); + assert(req->aiocb); + blk_aio_cancel(req->aiocb); + } + if (!nvme_check_cqid(n, sq->cqid)) { + cq = n->cq[sq->cqid]; + QTAILQ_REMOVE(&cq->sq_list, sq, entry); + + nvme_post_cqes(cq); + QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { + if (req->sq == sq) { + QTAILQ_REMOVE(&cq->req_list, req, entry); + QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); + } + } + } + + nvme_free_sq(sq, n); + return NVME_SUCCESS; +} + +static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, + uint16_t sqid, uint16_t cqid, uint16_t size) +{ + int i; + NvmeCQueue *cq; + + sq->ctrl = n; + sq->dma_addr = dma_addr; + sq->sqid = sqid; + sq->size = size; + sq->cqid = cqid; + sq->head = sq->tail = 0; + sq->io_req = g_new(NvmeRequest, sq->size); + + QTAILQ_INIT(&sq->req_list); + QTAILQ_INIT(&sq->out_req_list); + for (i = 0; i < sq->size; i++) { + sq->io_req[i].sq = sq; + QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); + } + sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); + + assert(n->cq[cqid]); + cq = n->cq[cqid]; + QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); + n->sq[sqid] = sq; +} + +static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) +{ + NvmeSQueue *sq; + NvmeCreateSq *c = (NvmeCreateSq *)cmd; + + uint16_t cqid = le16_to_cpu(c->cqid); + uint16_t sqid = le16_to_cpu(c->sqid); + uint16_t qsize = le16_to_cpu(c->qsize); + uint16_t qflags = le16_to_cpu(c->sq_flags); + uint64_t prp1 = le64_to_cpu(c->prp1); + + if (!cqid || nvme_check_cqid(n, cqid)) { + return NVME_INVALID_CQID | NVME_DNR; + } + if (!sqid || (sqid && !nvme_check_sqid(n, sqid))) { + return NVME_INVALID_QID | NVME_DNR; + } + if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { + return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; + } + if (!prp1 || prp1 & (n->page_size - 1)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (!(NVME_SQ_FLAGS_PC(qflags))) { + return NVME_INVALID_FIELD | NVME_DNR; + } + sq = g_malloc0(sizeof(*sq)); + nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1); + return NVME_SUCCESS; +} + +static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) +{ + n->cq[cq->cqid] = NULL; + timer_del(cq->timer); + timer_free(cq->timer); + msix_vector_unuse(&n->parent_obj, cq->vector); + if (cq->cqid) { + g_free(cq); + } +} + +static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) +{ + NvmeDeleteQ *c = (NvmeDeleteQ *)cmd; + NvmeCQueue *cq; + uint16_t qid = le16_to_cpu(c->qid); + + if (!qid || nvme_check_cqid(n, qid)) { + return NVME_INVALID_CQID | NVME_DNR; + } + + cq = n->cq[qid]; + if (!QTAILQ_EMPTY(&cq->sq_list)) { + return NVME_INVALID_QUEUE_DEL; + } + nvme_free_cq(cq, n); + return NVME_SUCCESS; +} + +static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, + uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) +{ + cq->ctrl = n; + cq->cqid = cqid; + cq->size = size; + cq->dma_addr = dma_addr; + cq->phase = 1; + cq->irq_enabled = irq_enabled; + cq->vector = vector; + cq->head = cq->tail = 0; + QTAILQ_INIT(&cq->req_list); + QTAILQ_INIT(&cq->sq_list); + msix_vector_use(&n->parent_obj, cq->vector); + n->cq[cqid] = cq; + cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); +} + +static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) +{ + NvmeCQueue *cq; + NvmeCreateCq *c = (NvmeCreateCq *)cmd; + uint16_t cqid = le16_to_cpu(c->cqid); + uint16_t vector = le16_to_cpu(c->irq_vector); + uint16_t qsize = le16_to_cpu(c->qsize); + uint16_t qflags = le16_to_cpu(c->cq_flags); + uint64_t prp1 = le64_to_cpu(c->prp1); + + if (!cqid || (cqid && !nvme_check_cqid(n, cqid))) { + return NVME_INVALID_CQID | NVME_DNR; + } + if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { + return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; + } + if (!prp1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (vector > n->num_queues) { + return NVME_INVALID_IRQ_VECTOR | NVME_DNR; + } + if (!(NVME_CQ_FLAGS_PC(qflags))) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + cq = g_malloc0(sizeof(*cq)); + nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1, + NVME_CQ_FLAGS_IEN(qflags)); + return NVME_SUCCESS; +} + +static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)cmd; + uint32_t cns = le32_to_cpu(c->cns); + uint32_t nsid = le32_to_cpu(c->nsid); + uint64_t prp1 = le64_to_cpu(c->prp1); + uint64_t prp2 = le64_to_cpu(c->prp2); + + if (cns) { + return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), + prp1, prp2); + } + if (nsid == 0 || nsid > n->num_namespaces) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = &n->namespaces[nsid - 1]; + return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), + prp1, prp2); +} + +static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t result; + + switch (dw10) { + case NVME_VOLATILE_WRITE_CACHE: + result = blk_enable_write_cache(n->conf.blk); + break; + case NVME_NUMBER_OF_QUEUES: + result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); + break; + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + + req->cqe.result = result; + return NVME_SUCCESS; +} + +static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + + switch (dw10) { + case NVME_VOLATILE_WRITE_CACHE: + blk_set_enable_write_cache(n->conf.blk, dw11 & 1); + break; + case NVME_NUMBER_OF_QUEUES: + req->cqe.result = + cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); + break; + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + return NVME_SUCCESS; +} + +static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + switch (cmd->opcode) { + case NVME_ADM_CMD_DELETE_SQ: + return nvme_del_sq(n, cmd); + case NVME_ADM_CMD_CREATE_SQ: + return nvme_create_sq(n, cmd); + case NVME_ADM_CMD_DELETE_CQ: + return nvme_del_cq(n, cmd); + case NVME_ADM_CMD_CREATE_CQ: + return nvme_create_cq(n, cmd); + case NVME_ADM_CMD_IDENTIFY: + return nvme_identify(n, cmd); + case NVME_ADM_CMD_SET_FEATURES: + return nvme_set_feature(n, cmd, req); + case NVME_ADM_CMD_GET_FEATURES: + return nvme_get_feature(n, cmd, req); + default: + return NVME_INVALID_OPCODE | NVME_DNR; + } +} + +static void nvme_process_sq(void *opaque) +{ + NvmeSQueue *sq = opaque; + NvmeCtrl *n = sq->ctrl; + NvmeCQueue *cq = n->cq[sq->cqid]; + + uint16_t status; + hwaddr addr; + NvmeCmd cmd; + NvmeRequest *req; + + while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { + addr = sq->dma_addr + sq->head * n->sqe_size; + pci_dma_read(&n->parent_obj, addr, (void *)&cmd, sizeof(cmd)); + nvme_inc_sq_head(sq); + + req = QTAILQ_FIRST(&sq->req_list); + QTAILQ_REMOVE(&sq->req_list, req, entry); + QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); + memset(&req->cqe, 0, sizeof(req->cqe)); + req->cqe.cid = cmd.cid; + + status = sq->sqid ? nvme_io_cmd(n, &cmd, req) : + nvme_admin_cmd(n, &cmd, req); + if (status != NVME_NO_COMPLETE) { + req->status = status; + nvme_enqueue_req_completion(cq, req); + } + } +} + +static void nvme_clear_ctrl(NvmeCtrl *n) +{ + int i; + + for (i = 0; i < n->num_queues; i++) { + if (n->sq[i] != NULL) { + nvme_free_sq(n->sq[i], n); + } + } + for (i = 0; i < n->num_queues; i++) { + if (n->cq[i] != NULL) { + nvme_free_cq(n->cq[i], n); + } + } + + blk_flush(n->conf.blk); + n->bar.cc = 0; +} + +static int nvme_start_ctrl(NvmeCtrl *n) +{ + uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; + uint32_t page_size = 1 << page_bits; + + if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || + n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || + NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || + NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || + NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || + NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || + NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || + NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || + !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { + return -1; + } + + n->page_bits = page_bits; + n->page_size = page_size; + n->max_prp_ents = n->page_size / sizeof(uint64_t); + n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc); + n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc); + nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0, + NVME_AQA_ACQS(n->bar.aqa) + 1, 1); + nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, + NVME_AQA_ASQS(n->bar.aqa) + 1); + + return 0; +} + +static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, + unsigned size) +{ + switch (offset) { + case 0xc: + n->bar.intms |= data & 0xffffffff; + n->bar.intmc = n->bar.intms; + break; + case 0x10: + n->bar.intms &= ~(data & 0xffffffff); + n->bar.intmc = n->bar.intms; + break; + case 0x14: + /* Windows first sends data, then sends enable bit */ + if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && + !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) + { + n->bar.cc = data; + } + + if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { + n->bar.cc = data; + if (nvme_start_ctrl(n)) { + n->bar.csts = NVME_CSTS_FAILED; + } else { + n->bar.csts = NVME_CSTS_READY; + } + } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { + nvme_clear_ctrl(n); + n->bar.csts &= ~NVME_CSTS_READY; + } + if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { + nvme_clear_ctrl(n); + n->bar.cc = data; + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; + } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; + n->bar.cc = data; + } + break; + case 0x24: + n->bar.aqa = data & 0xffffffff; + break; + case 0x28: + n->bar.asq = data; + break; + case 0x2c: + n->bar.asq |= data << 32; + break; + case 0x30: + n->bar.acq = data; + break; + case 0x34: + n->bar.acq |= data << 32; + break; + default: + break; + } +} + +static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + uint8_t *ptr = (uint8_t *)&n->bar; + uint64_t val = 0; + + if (addr < sizeof(n->bar)) { + memcpy(&val, ptr + addr, size); + } + return val; +} + +static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) +{ + uint32_t qid; + + if (addr & ((1 << 2) - 1)) { + return; + } + + if (((addr - 0x1000) >> 2) & 1) { + uint16_t new_head = val & 0xffff; + int start_sqs; + NvmeCQueue *cq; + + qid = (addr - (0x1000 + (1 << 2))) >> 3; + if (nvme_check_cqid(n, qid)) { + return; + } + + cq = n->cq[qid]; + if (new_head >= cq->size) { + return; + } + + start_sqs = nvme_cq_full(cq) ? 1 : 0; + cq->head = new_head; + if (start_sqs) { + NvmeSQueue *sq; + QTAILQ_FOREACH(sq, &cq->sq_list, entry) { + timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } + timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } + + if (cq->tail != cq->head) { + nvme_isr_notify(n, cq); + } + } else { + uint16_t new_tail = val & 0xffff; + NvmeSQueue *sq; + + qid = (addr - 0x1000) >> 3; + if (nvme_check_sqid(n, qid)) { + return; + } + + sq = n->sq[qid]; + if (new_tail >= sq->size) { + return; + } + + sq->tail = new_tail; + timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + if (addr < sizeof(n->bar)) { + nvme_write_bar(n, addr, data, size); + } else if (addr >= 0x1000) { + nvme_process_db(n, addr, data); + } +} + +static const MemoryRegionOps nvme_mmio_ops = { + .read = nvme_mmio_read, + .write = nvme_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 2, + .max_access_size = 8, + }, +}; + +static int nvme_init(PCIDevice *pci_dev) +{ + NvmeCtrl *n = NVME(pci_dev); + NvmeIdCtrl *id = &n->id_ctrl; + + int i; + int64_t bs_size; + uint8_t *pci_conf; + + if (!n->conf.blk) { + return -1; + } + + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + return -1; + } + + blkconf_serial(&n->conf, &n->serial); + if (!n->serial) { + return -1; + } + blkconf_blocksizes(&n->conf); + + pci_conf = pci_dev->config; + pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_config_set_prog_interface(pci_dev->config, 0x2); + pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); + pcie_endpoint_cap_init(&n->parent_obj, 0x80); + + n->num_namespaces = 1; + n->num_queues = 64; + n->reg_size = 1 << qemu_fls(0x1004 + 2 * (n->num_queues + 1) * 4); + n->ns_size = bs_size / (uint64_t)n->num_namespaces; + + n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); + n->sq = g_new0(NvmeSQueue *, n->num_queues); + n->cq = g_new0(NvmeCQueue *, n->num_queues); + + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, + "nvme", n->reg_size); + pci_register_bar(&n->parent_obj, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, + &n->iomem); + msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4); + + id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); + id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); + strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); + strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' '); + id->rab = 6; + id->ieee[0] = 0x00; + id->ieee[1] = 0x02; + id->ieee[2] = 0xb3; + id->oacs = cpu_to_le16(0); + id->frmw = 7 << 1; + id->lpa = 1 << 0; + id->sqes = (0x6 << 4) | 0x6; + id->cqes = (0x4 << 4) | 0x4; + id->nn = cpu_to_le32(n->num_namespaces); + id->psd[0].mp = cpu_to_le16(0x9c4); + id->psd[0].enlat = cpu_to_le32(0x10); + id->psd[0].exlat = cpu_to_le32(0x4); + if (blk_enable_write_cache(n->conf.blk)) { + id->vwc = 1; + } + + n->bar.cap = 0; + NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); + NVME_CAP_SET_CQR(n->bar.cap, 1); + NVME_CAP_SET_AMS(n->bar.cap, 1); + NVME_CAP_SET_TO(n->bar.cap, 0xf); + NVME_CAP_SET_CSS(n->bar.cap, 1); + NVME_CAP_SET_MPSMAX(n->bar.cap, 4); + + n->bar.vs = 0x00010100; + n->bar.intmc = n->bar.intms = 0; + + for (i = 0; i < n->num_namespaces; i++) { + NvmeNamespace *ns = &n->namespaces[i]; + NvmeIdNs *id_ns = &ns->id_ns; + id_ns->nsfeat = 0; + id_ns->nlbaf = 0; + id_ns->flbas = 0; + id_ns->mc = 0; + id_ns->dpc = 0; + id_ns->dps = 0; + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->ncap = id_ns->nuse = id_ns->nsze = + cpu_to_le64(n->ns_size >> + id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); + } + return 0; +} + +static void nvme_exit(PCIDevice *pci_dev) +{ + NvmeCtrl *n = NVME(pci_dev); + + nvme_clear_ctrl(n); + g_free(n->namespaces); + g_free(n->cq); + g_free(n->sq); + msix_uninit_exclusive_bar(pci_dev); +} + +static Property nvme_props[] = { + DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), + DEFINE_PROP_STRING("serial", NvmeCtrl, serial), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription nvme_vmstate = { + .name = "nvme", + .unmigratable = 1, +}; + +static void nvme_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + + pc->init = nvme_init; + pc->exit = nvme_exit; + pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->vendor_id = PCI_VENDOR_ID_INTEL; + pc->device_id = 0x5845; + pc->revision = 1; + pc->is_express = 1; + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "Non-Volatile Memory Express"; + dc->props = nvme_props; + dc->vmsd = &nvme_vmstate; +} + +static void nvme_get_bootindex(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + NvmeCtrl *s = NVME(obj); + + visit_type_int32(v, &s->conf.bootindex, name, errp); +} + +static void nvme_set_bootindex(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + NvmeCtrl *s = NVME(obj); + int32_t boot_index; + Error *local_err = NULL; + + visit_type_int32(v, &boot_index, name, &local_err); + if (local_err) { + goto out; + } + /* check whether bootindex is present in fw_boot_order list */ + check_boot_index(boot_index, &local_err); + if (local_err) { + goto out; + } + /* change bootindex to a new one */ + s->conf.bootindex = boot_index; + +out: + if (local_err) { + error_propagate(errp, local_err); + } +} + +static void nvme_instance_init(Object *obj) +{ + object_property_add(obj, "bootindex", "int32", + nvme_get_bootindex, + nvme_set_bootindex, NULL, NULL, NULL); + object_property_set_int(obj, -1, "bootindex", NULL); +} + +static const TypeInfo nvme_info = { + .name = "nvme", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(NvmeCtrl), + .class_init = nvme_class_init, + .instance_init = nvme_instance_init, +}; + +static void nvme_register_types(void) +{ + type_register_static(&nvme_info); +} + +type_init(nvme_register_types) diff --git a/qemu/hw/block/nvme.h b/qemu/hw/block/nvme.h new file mode 100644 index 000000000..bf3a3ccac --- /dev/null +++ b/qemu/hw/block/nvme.h @@ -0,0 +1,712 @@ +#ifndef HW_NVME_H +#define HW_NVME_H + +typedef struct NvmeBar { + uint64_t cap; + uint32_t vs; + uint32_t intms; + uint32_t intmc; + uint32_t cc; + uint32_t rsvd1; + uint32_t csts; + uint32_t nssrc; + uint32_t aqa; + uint64_t asq; + uint64_t acq; +} NvmeBar; + +enum NvmeCapShift { + CAP_MQES_SHIFT = 0, + CAP_CQR_SHIFT = 16, + CAP_AMS_SHIFT = 17, + CAP_TO_SHIFT = 24, + CAP_DSTRD_SHIFT = 32, + CAP_NSSRS_SHIFT = 33, + CAP_CSS_SHIFT = 37, + CAP_MPSMIN_SHIFT = 48, + CAP_MPSMAX_SHIFT = 52, +}; + +enum NvmeCapMask { + CAP_MQES_MASK = 0xffff, + CAP_CQR_MASK = 0x1, + CAP_AMS_MASK = 0x3, + CAP_TO_MASK = 0xff, + CAP_DSTRD_MASK = 0xf, + CAP_NSSRS_MASK = 0x1, + CAP_CSS_MASK = 0xff, + CAP_MPSMIN_MASK = 0xf, + CAP_MPSMAX_MASK = 0xf, +}; + +#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK) +#define NVME_CAP_CQR(cap) (((cap) >> CAP_CQR_SHIFT) & CAP_CQR_MASK) +#define NVME_CAP_AMS(cap) (((cap) >> CAP_AMS_SHIFT) & CAP_AMS_MASK) +#define NVME_CAP_TO(cap) (((cap) >> CAP_TO_SHIFT) & CAP_TO_MASK) +#define NVME_CAP_DSTRD(cap) (((cap) >> CAP_DSTRD_SHIFT) & CAP_DSTRD_MASK) +#define NVME_CAP_NSSRS(cap) (((cap) >> CAP_NSSRS_SHIFT) & CAP_NSSRS_MASK) +#define NVME_CAP_CSS(cap) (((cap) >> CAP_CSS_SHIFT) & CAP_CSS_MASK) +#define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK) +#define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK) + +#define NVME_CAP_SET_MQES(cap, val) (cap |= (uint64_t)(val & CAP_MQES_MASK) \ + << CAP_MQES_SHIFT) +#define NVME_CAP_SET_CQR(cap, val) (cap |= (uint64_t)(val & CAP_CQR_MASK) \ + << CAP_CQR_SHIFT) +#define NVME_CAP_SET_AMS(cap, val) (cap |= (uint64_t)(val & CAP_AMS_MASK) \ + << CAP_AMS_SHIFT) +#define NVME_CAP_SET_TO(cap, val) (cap |= (uint64_t)(val & CAP_TO_MASK) \ + << CAP_TO_SHIFT) +#define NVME_CAP_SET_DSTRD(cap, val) (cap |= (uint64_t)(val & CAP_DSTRD_MASK) \ + << CAP_DSTRD_SHIFT) +#define NVME_CAP_SET_NSSRS(cap, val) (cap |= (uint64_t)(val & CAP_NSSRS_MASK) \ + << CAP_NSSRS_SHIFT) +#define NVME_CAP_SET_CSS(cap, val) (cap |= (uint64_t)(val & CAP_CSS_MASK) \ + << CAP_CSS_SHIFT) +#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\ + << CAP_MPSMIN_SHIFT) +#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ + << CAP_MPSMAX_SHIFT) + +enum NvmeCcShift { + CC_EN_SHIFT = 0, + CC_CSS_SHIFT = 4, + CC_MPS_SHIFT = 7, + CC_AMS_SHIFT = 11, + CC_SHN_SHIFT = 14, + CC_IOSQES_SHIFT = 16, + CC_IOCQES_SHIFT = 20, +}; + +enum NvmeCcMask { + CC_EN_MASK = 0x1, + CC_CSS_MASK = 0x7, + CC_MPS_MASK = 0xf, + CC_AMS_MASK = 0x7, + CC_SHN_MASK = 0x3, + CC_IOSQES_MASK = 0xf, + CC_IOCQES_MASK = 0xf, +}; + +#define NVME_CC_EN(cc) ((cc >> CC_EN_SHIFT) & CC_EN_MASK) +#define NVME_CC_CSS(cc) ((cc >> CC_CSS_SHIFT) & CC_CSS_MASK) +#define NVME_CC_MPS(cc) ((cc >> CC_MPS_SHIFT) & CC_MPS_MASK) +#define NVME_CC_AMS(cc) ((cc >> CC_AMS_SHIFT) & CC_AMS_MASK) +#define NVME_CC_SHN(cc) ((cc >> CC_SHN_SHIFT) & CC_SHN_MASK) +#define NVME_CC_IOSQES(cc) ((cc >> CC_IOSQES_SHIFT) & CC_IOSQES_MASK) +#define NVME_CC_IOCQES(cc) ((cc >> CC_IOCQES_SHIFT) & CC_IOCQES_MASK) + +enum NvmeCstsShift { + CSTS_RDY_SHIFT = 0, + CSTS_CFS_SHIFT = 1, + CSTS_SHST_SHIFT = 2, + CSTS_NSSRO_SHIFT = 4, +}; + +enum NvmeCstsMask { + CSTS_RDY_MASK = 0x1, + CSTS_CFS_MASK = 0x1, + CSTS_SHST_MASK = 0x3, + CSTS_NSSRO_MASK = 0x1, +}; + +enum NvmeCsts { + NVME_CSTS_READY = 1 << CSTS_RDY_SHIFT, + NVME_CSTS_FAILED = 1 << CSTS_CFS_SHIFT, + NVME_CSTS_SHST_NORMAL = 0 << CSTS_SHST_SHIFT, + NVME_CSTS_SHST_PROGRESS = 1 << CSTS_SHST_SHIFT, + NVME_CSTS_SHST_COMPLETE = 2 << CSTS_SHST_SHIFT, + NVME_CSTS_NSSRO = 1 << CSTS_NSSRO_SHIFT, +}; + +#define NVME_CSTS_RDY(csts) ((csts >> CSTS_RDY_SHIFT) & CSTS_RDY_MASK) +#define NVME_CSTS_CFS(csts) ((csts >> CSTS_CFS_SHIFT) & CSTS_CFS_MASK) +#define NVME_CSTS_SHST(csts) ((csts >> CSTS_SHST_SHIFT) & CSTS_SHST_MASK) +#define NVME_CSTS_NSSRO(csts) ((csts >> CSTS_NSSRO_SHIFT) & CSTS_NSSRO_MASK) + +enum NvmeAqaShift { + AQA_ASQS_SHIFT = 0, + AQA_ACQS_SHIFT = 16, +}; + +enum NvmeAqaMask { + AQA_ASQS_MASK = 0xfff, + AQA_ACQS_MASK = 0xfff, +}; + +#define NVME_AQA_ASQS(aqa) ((aqa >> AQA_ASQS_SHIFT) & AQA_ASQS_MASK) +#define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK) + +typedef struct NvmeCmd { + uint8_t opcode; + uint8_t fuse; + uint16_t cid; + uint32_t nsid; + uint64_t res1; + uint64_t mptr; + uint64_t prp1; + uint64_t prp2; + uint32_t cdw10; + uint32_t cdw11; + uint32_t cdw12; + uint32_t cdw13; + uint32_t cdw14; + uint32_t cdw15; +} NvmeCmd; + +enum NvmeAdminCommands { + NVME_ADM_CMD_DELETE_SQ = 0x00, + NVME_ADM_CMD_CREATE_SQ = 0x01, + NVME_ADM_CMD_GET_LOG_PAGE = 0x02, + NVME_ADM_CMD_DELETE_CQ = 0x04, + NVME_ADM_CMD_CREATE_CQ = 0x05, + NVME_ADM_CMD_IDENTIFY = 0x06, + NVME_ADM_CMD_ABORT = 0x08, + NVME_ADM_CMD_SET_FEATURES = 0x09, + NVME_ADM_CMD_GET_FEATURES = 0x0a, + NVME_ADM_CMD_ASYNC_EV_REQ = 0x0c, + NVME_ADM_CMD_ACTIVATE_FW = 0x10, + NVME_ADM_CMD_DOWNLOAD_FW = 0x11, + NVME_ADM_CMD_FORMAT_NVM = 0x80, + NVME_ADM_CMD_SECURITY_SEND = 0x81, + NVME_ADM_CMD_SECURITY_RECV = 0x82, +}; + +enum NvmeIoCommands { + NVME_CMD_FLUSH = 0x00, + NVME_CMD_WRITE = 0x01, + NVME_CMD_READ = 0x02, + NVME_CMD_WRITE_UNCOR = 0x04, + NVME_CMD_COMPARE = 0x05, + NVME_CMD_DSM = 0x09, +}; + +typedef struct NvmeDeleteQ { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t rsvd1[9]; + uint16_t qid; + uint16_t rsvd10; + uint32_t rsvd11[5]; +} NvmeDeleteQ; + +typedef struct NvmeCreateCq { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t rsvd1[5]; + uint64_t prp1; + uint64_t rsvd8; + uint16_t cqid; + uint16_t qsize; + uint16_t cq_flags; + uint16_t irq_vector; + uint32_t rsvd12[4]; +} NvmeCreateCq; + +#define NVME_CQ_FLAGS_PC(cq_flags) (cq_flags & 0x1) +#define NVME_CQ_FLAGS_IEN(cq_flags) ((cq_flags >> 1) & 0x1) + +typedef struct NvmeCreateSq { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t rsvd1[5]; + uint64_t prp1; + uint64_t rsvd8; + uint16_t sqid; + uint16_t qsize; + uint16_t sq_flags; + uint16_t cqid; + uint32_t rsvd12[4]; +} NvmeCreateSq; + +#define NVME_SQ_FLAGS_PC(sq_flags) (sq_flags & 0x1) +#define NVME_SQ_FLAGS_QPRIO(sq_flags) ((sq_flags >> 1) & 0x3) + +enum NvmeQueueFlags { + NVME_Q_PC = 1, + NVME_Q_PRIO_URGENT = 0, + NVME_Q_PRIO_HIGH = 1, + NVME_Q_PRIO_NORMAL = 2, + NVME_Q_PRIO_LOW = 3, +}; + +typedef struct NvmeIdentify { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t cns; + uint32_t rsvd11[5]; +} NvmeIdentify; + +typedef struct NvmeRwCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2; + uint64_t mptr; + uint64_t prp1; + uint64_t prp2; + uint64_t slba; + uint16_t nlb; + uint16_t control; + uint32_t dsmgmt; + uint32_t reftag; + uint16_t apptag; + uint16_t appmask; +} NvmeRwCmd; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRACT = 1 << 13, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, +}; + +typedef struct NvmeDsmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t nr; + uint32_t attributes; + uint32_t rsvd12[4]; +} NvmeDsmCmd; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +typedef struct NvmeDsmRange { + uint32_t cattr; + uint32_t nlb; + uint64_t slba; +} NvmeDsmRange; + +enum NvmeAsyncEventRequest { + NVME_AER_TYPE_ERROR = 0, + NVME_AER_TYPE_SMART = 1, + NVME_AER_TYPE_IO_SPECIFIC = 6, + NVME_AER_TYPE_VENDOR_SPECIFIC = 7, + NVME_AER_INFO_ERR_INVALID_SQ = 0, + NVME_AER_INFO_ERR_INVALID_DB = 1, + NVME_AER_INFO_ERR_DIAG_FAIL = 2, + NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3, + NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4, + NVME_AER_INFO_ERR_FW_IMG_LOAD_ERR = 5, + NVME_AER_INFO_SMART_RELIABILITY = 0, + NVME_AER_INFO_SMART_TEMP_THRESH = 1, + NVME_AER_INFO_SMART_SPARE_THRESH = 2, +}; + +typedef struct NvmeAerResult { + uint8_t event_type; + uint8_t event_info; + uint8_t log_page; + uint8_t resv; +} NvmeAerResult; + +typedef struct NvmeCqe { + uint32_t result; + uint32_t rsvd; + uint16_t sq_head; + uint16_t sq_id; + uint16_t cid; + uint16_t status; +} NvmeCqe; + +enum NvmeStatusCodes { + NVME_SUCCESS = 0x0000, + NVME_INVALID_OPCODE = 0x0001, + NVME_INVALID_FIELD = 0x0002, + NVME_CID_CONFLICT = 0x0003, + NVME_DATA_TRAS_ERROR = 0x0004, + NVME_POWER_LOSS_ABORT = 0x0005, + NVME_INTERNAL_DEV_ERROR = 0x0006, + NVME_CMD_ABORT_REQ = 0x0007, + NVME_CMD_ABORT_SQ_DEL = 0x0008, + NVME_CMD_ABORT_FAILED_FUSE = 0x0009, + NVME_CMD_ABORT_MISSING_FUSE = 0x000a, + NVME_INVALID_NSID = 0x000b, + NVME_CMD_SEQ_ERROR = 0x000c, + NVME_LBA_RANGE = 0x0080, + NVME_CAP_EXCEEDED = 0x0081, + NVME_NS_NOT_READY = 0x0082, + NVME_NS_RESV_CONFLICT = 0x0083, + NVME_INVALID_CQID = 0x0100, + NVME_INVALID_QID = 0x0101, + NVME_MAX_QSIZE_EXCEEDED = 0x0102, + NVME_ACL_EXCEEDED = 0x0103, + NVME_RESERVED = 0x0104, + NVME_AER_LIMIT_EXCEEDED = 0x0105, + NVME_INVALID_FW_SLOT = 0x0106, + NVME_INVALID_FW_IMAGE = 0x0107, + NVME_INVALID_IRQ_VECTOR = 0x0108, + NVME_INVALID_LOG_ID = 0x0109, + NVME_INVALID_FORMAT = 0x010a, + NVME_FW_REQ_RESET = 0x010b, + NVME_INVALID_QUEUE_DEL = 0x010c, + NVME_FID_NOT_SAVEABLE = 0x010d, + NVME_FID_NOT_NSID_SPEC = 0x010f, + NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, + NVME_CONFLICTING_ATTRS = 0x0180, + NVME_INVALID_PROT_INFO = 0x0181, + NVME_WRITE_TO_RO = 0x0182, + NVME_WRITE_FAULT = 0x0280, + NVME_UNRECOVERED_READ = 0x0281, + NVME_E2E_GUARD_ERROR = 0x0282, + NVME_E2E_APP_ERROR = 0x0283, + NVME_E2E_REF_ERROR = 0x0284, + NVME_CMP_FAILURE = 0x0285, + NVME_ACCESS_DENIED = 0x0286, + NVME_MORE = 0x2000, + NVME_DNR = 0x4000, + NVME_NO_COMPLETE = 0xffff, +}; + +typedef struct NvmeFwSlotInfoLog { + uint8_t afi; + uint8_t reserved1[7]; + uint8_t frs1[8]; + uint8_t frs2[8]; + uint8_t frs3[8]; + uint8_t frs4[8]; + uint8_t frs5[8]; + uint8_t frs6[8]; + uint8_t frs7[8]; + uint8_t reserved2[448]; +} NvmeFwSlotInfoLog; + +typedef struct NvmeErrorLog { + uint64_t error_count; + uint16_t sqid; + uint16_t cid; + uint16_t status_field; + uint16_t param_error_location; + uint64_t lba; + uint32_t nsid; + uint8_t vs; + uint8_t resv[35]; +} NvmeErrorLog; + +typedef struct NvmeSmartLog { + uint8_t critical_warning; + uint8_t temperature[2]; + uint8_t available_spare; + uint8_t available_spare_threshold; + uint8_t percentage_used; + uint8_t reserved1[26]; + uint64_t data_units_read[2]; + uint64_t data_units_written[2]; + uint64_t host_read_commands[2]; + uint64_t host_write_commands[2]; + uint64_t controller_busy_time[2]; + uint64_t power_cycles[2]; + uint64_t power_on_hours[2]; + uint64_t unsafe_shutdowns[2]; + uint64_t media_errors[2]; + uint64_t number_of_error_log_entries[2]; + uint8_t reserved2[320]; +} NvmeSmartLog; + +enum NvmeSmartWarn { + NVME_SMART_SPARE = 1 << 0, + NVME_SMART_TEMPERATURE = 1 << 1, + NVME_SMART_RELIABILITY = 1 << 2, + NVME_SMART_MEDIA_READ_ONLY = 1 << 3, + NVME_SMART_FAILED_VOLATILE_MEDIA = 1 << 4, +}; + +enum LogIdentifier { + NVME_LOG_ERROR_INFO = 0x01, + NVME_LOG_SMART_INFO = 0x02, + NVME_LOG_FW_SLOT_INFO = 0x03, +}; + +typedef struct NvmePSD { + uint16_t mp; + uint16_t reserved; + uint32_t enlat; + uint32_t exlat; + uint8_t rrt; + uint8_t rrl; + uint8_t rwt; + uint8_t rwl; + uint8_t resv[16]; +} NvmePSD; + +typedef struct NvmeIdCtrl { + uint16_t vid; + uint16_t ssvid; + uint8_t sn[20]; + uint8_t mn[40]; + uint8_t fr[8]; + uint8_t rab; + uint8_t ieee[3]; + uint8_t cmic; + uint8_t mdts; + uint8_t rsvd255[178]; + uint16_t oacs; + uint8_t acl; + uint8_t aerl; + uint8_t frmw; + uint8_t lpa; + uint8_t elpe; + uint8_t npss; + uint8_t rsvd511[248]; + uint8_t sqes; + uint8_t cqes; + uint16_t rsvd515; + uint32_t nn; + uint16_t oncs; + uint16_t fuses; + uint8_t fna; + uint8_t vwc; + uint16_t awun; + uint16_t awupf; + uint8_t rsvd703[174]; + uint8_t rsvd2047[1344]; + NvmePSD psd[32]; + uint8_t vs[1024]; +} NvmeIdCtrl; + +enum NvmeIdCtrlOacs { + NVME_OACS_SECURITY = 1 << 0, + NVME_OACS_FORMAT = 1 << 1, + NVME_OACS_FW = 1 << 2, +}; + +enum NvmeIdCtrlOncs { + NVME_ONCS_COMPARE = 1 << 0, + NVME_ONCS_WRITE_UNCORR = 1 << 1, + NVME_ONCS_DSM = 1 << 2, + NVME_ONCS_WRITE_ZEROS = 1 << 3, + NVME_ONCS_FEATURES = 1 << 4, + NVME_ONCS_RESRVATIONS = 1 << 5, +}; + +#define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) +#define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf) +#define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) +#define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf) + +typedef struct NvmeFeatureVal { + uint32_t arbitration; + uint32_t power_mgmt; + uint32_t temp_thresh; + uint32_t err_rec; + uint32_t volatile_wc; + uint32_t num_queues; + uint32_t int_coalescing; + uint32_t *int_vector_config; + uint32_t write_atomicity; + uint32_t async_config; + uint32_t sw_prog_marker; +} NvmeFeatureVal; + +#define NVME_ARB_AB(arb) (arb & 0x7) +#define NVME_ARB_LPW(arb) ((arb >> 8) & 0xff) +#define NVME_ARB_MPW(arb) ((arb >> 16) & 0xff) +#define NVME_ARB_HPW(arb) ((arb >> 24) & 0xff) + +#define NVME_INTC_THR(intc) (intc & 0xff) +#define NVME_INTC_TIME(intc) ((intc >> 8) & 0xff) + +enum NvmeFeatureIds { + NVME_ARBITRATION = 0x1, + NVME_POWER_MANAGEMENT = 0x2, + NVME_LBA_RANGE_TYPE = 0x3, + NVME_TEMPERATURE_THRESHOLD = 0x4, + NVME_ERROR_RECOVERY = 0x5, + NVME_VOLATILE_WRITE_CACHE = 0x6, + NVME_NUMBER_OF_QUEUES = 0x7, + NVME_INTERRUPT_COALESCING = 0x8, + NVME_INTERRUPT_VECTOR_CONF = 0x9, + NVME_WRITE_ATOMICITY = 0xa, + NVME_ASYNCHRONOUS_EVENT_CONF = 0xb, + NVME_SOFTWARE_PROGRESS_MARKER = 0x80 +}; + +typedef struct NvmeRangeType { + uint8_t type; + uint8_t attributes; + uint8_t rsvd2[14]; + uint64_t slba; + uint64_t nlb; + uint8_t guid[16]; + uint8_t rsvd48[16]; +} NvmeRangeType; + +typedef struct NvmeLBAF { + uint16_t ms; + uint8_t ds; + uint8_t rp; +} NvmeLBAF; + +typedef struct NvmeIdNs { + uint64_t nsze; + uint64_t ncap; + uint64_t nuse; + uint8_t nsfeat; + uint8_t nlbaf; + uint8_t flbas; + uint8_t mc; + uint8_t dpc; + uint8_t dps; + uint8_t res30[98]; + NvmeLBAF lbaf[16]; + uint8_t res192[192]; + uint8_t vs[3712]; +} NvmeIdNs; + +#define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1)) +#define NVME_ID_NS_FLBAS_EXTENDED(flbas) ((flbas >> 4) & 0x1) +#define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf)) +#define NVME_ID_NS_MC_SEPARATE(mc) ((mc >> 1) & 0x1) +#define NVME_ID_NS_MC_EXTENDED(mc) ((mc & 0x1)) +#define NVME_ID_NS_DPC_LAST_EIGHT(dpc) ((dpc >> 4) & 0x1) +#define NVME_ID_NS_DPC_FIRST_EIGHT(dpc) ((dpc >> 3) & 0x1) +#define NVME_ID_NS_DPC_TYPE_3(dpc) ((dpc >> 2) & 0x1) +#define NVME_ID_NS_DPC_TYPE_2(dpc) ((dpc >> 1) & 0x1) +#define NVME_ID_NS_DPC_TYPE_1(dpc) ((dpc & 0x1)) +#define NVME_ID_NS_DPC_TYPE_MASK 0x7 + +enum NvmeIdNsDps { + DPS_TYPE_NONE = 0, + DPS_TYPE_1 = 1, + DPS_TYPE_2 = 2, + DPS_TYPE_3 = 3, + DPS_TYPE_MASK = 0x7, + DPS_FIRST_EIGHT = 8, +}; + +static inline void _nvme_check_size(void) +{ + QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4); + QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCreateSq) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); +} + +typedef struct NvmeAsyncEvent { + QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; + NvmeAerResult result; +} NvmeAsyncEvent; + +typedef struct NvmeRequest { + struct NvmeSQueue *sq; + BlockAIOCB *aiocb; + uint16_t status; + bool has_sg; + NvmeCqe cqe; + BlockAcctCookie acct; + QEMUSGList qsg; + QTAILQ_ENTRY(NvmeRequest)entry; +} NvmeRequest; + +typedef struct NvmeSQueue { + struct NvmeCtrl *ctrl; + uint16_t sqid; + uint16_t cqid; + uint32_t head; + uint32_t tail; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + NvmeRequest *io_req; + QTAILQ_HEAD(sq_req_list, NvmeRequest) req_list; + QTAILQ_HEAD(out_req_list, NvmeRequest) out_req_list; + QTAILQ_ENTRY(NvmeSQueue) entry; +} NvmeSQueue; + +typedef struct NvmeCQueue { + struct NvmeCtrl *ctrl; + uint8_t phase; + uint16_t cqid; + uint16_t irq_enabled; + uint32_t head; + uint32_t tail; + uint32_t vector; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + QTAILQ_HEAD(sq_list, NvmeSQueue) sq_list; + QTAILQ_HEAD(cq_req_list, NvmeRequest) req_list; +} NvmeCQueue; + +typedef struct NvmeNamespace { + NvmeIdNs id_ns; +} NvmeNamespace; + +#define TYPE_NVME "nvme" +#define NVME(obj) \ + OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) + +typedef struct NvmeCtrl { + PCIDevice parent_obj; + MemoryRegion iomem; + NvmeBar bar; + BlockConf conf; + + uint32_t page_size; + uint16_t page_bits; + uint16_t max_prp_ents; + uint16_t cqe_size; + uint16_t sqe_size; + uint32_t reg_size; + uint32_t num_namespaces; + uint32_t num_queues; + uint32_t max_q_ents; + uint64_t ns_size; + + char *serial; + NvmeNamespace *namespaces; + NvmeSQueue **sq; + NvmeCQueue **cq; + NvmeSQueue admin_sq; + NvmeCQueue admin_cq; + NvmeIdCtrl id_ctrl; +} NvmeCtrl; + +#endif /* HW_NVME_H */ diff --git a/qemu/hw/block/onenand.c b/qemu/hw/block/onenand.c new file mode 100644 index 000000000..1b2c89375 --- /dev/null +++ b/qemu/hw/block/onenand.c @@ -0,0 +1,848 @@ +/* + * OneNAND flash memories emulation. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski <andrew@openedhand.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu-common.h" +#include "hw/hw.h" +#include "hw/block/flash.h" +#include "hw/irq.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "hw/sysbus.h" +#include "qemu/error-report.h" + +/* 11 for 2kB-page OneNAND ("2nd generation") and 10 for 1kB-page chips */ +#define PAGE_SHIFT 11 + +/* Fixed */ +#define BLOCK_SHIFT (PAGE_SHIFT + 6) + +#define TYPE_ONE_NAND "onenand" +#define ONE_NAND(obj) OBJECT_CHECK(OneNANDState, (obj), TYPE_ONE_NAND) + +typedef struct OneNANDState { + SysBusDevice parent_obj; + + struct { + uint16_t man; + uint16_t dev; + uint16_t ver; + } id; + int shift; + hwaddr base; + qemu_irq intr; + qemu_irq rdy; + BlockBackend *blk; + BlockBackend *blk_cur; + uint8_t *image; + uint8_t *otp; + uint8_t *current; + MemoryRegion ram; + MemoryRegion mapped_ram; + uint8_t current_direction; + uint8_t *boot[2]; + uint8_t *data[2][2]; + MemoryRegion iomem; + MemoryRegion container; + int cycle; + int otpmode; + + uint16_t addr[8]; + uint16_t unladdr[8]; + int bufaddr; + int count; + uint16_t command; + uint16_t config[2]; + uint16_t status; + uint16_t intstatus; + uint16_t wpstatus; + + ECCState ecc; + + int density_mask; + int secs; + int secs_cur; + int blocks; + uint8_t *blockwp; +} OneNANDState; + +enum { + ONEN_BUF_BLOCK = 0, + ONEN_BUF_BLOCK2 = 1, + ONEN_BUF_DEST_BLOCK = 2, + ONEN_BUF_DEST_PAGE = 3, + ONEN_BUF_PAGE = 7, +}; + +enum { + ONEN_ERR_CMD = 1 << 10, + ONEN_ERR_ERASE = 1 << 11, + ONEN_ERR_PROG = 1 << 12, + ONEN_ERR_LOAD = 1 << 13, +}; + +enum { + ONEN_INT_RESET = 1 << 4, + ONEN_INT_ERASE = 1 << 5, + ONEN_INT_PROG = 1 << 6, + ONEN_INT_LOAD = 1 << 7, + ONEN_INT = 1 << 15, +}; + +enum { + ONEN_LOCK_LOCKTIGHTEN = 1 << 0, + ONEN_LOCK_LOCKED = 1 << 1, + ONEN_LOCK_UNLOCKED = 1 << 2, +}; + +static void onenand_mem_setup(OneNANDState *s) +{ + /* XXX: We should use IO_MEM_ROMD but we broke it earlier... + * Both 0x0000 ... 0x01ff and 0x8000 ... 0x800f can be used to + * write boot commands. Also take note of the BWPS bit. */ + memory_region_init(&s->container, OBJECT(s), "onenand", + 0x10000 << s->shift); + memory_region_add_subregion(&s->container, 0, &s->iomem); + memory_region_init_alias(&s->mapped_ram, OBJECT(s), "onenand-mapped-ram", + &s->ram, 0x0200 << s->shift, + 0xbe00 << s->shift); + memory_region_add_subregion_overlap(&s->container, + 0x0200 << s->shift, + &s->mapped_ram, + 1); +} + +static void onenand_intr_update(OneNANDState *s) +{ + qemu_set_irq(s->intr, ((s->intstatus >> 15) ^ (~s->config[0] >> 6)) & 1); +} + +static void onenand_pre_save(void *opaque) +{ + OneNANDState *s = opaque; + if (s->current == s->otp) { + s->current_direction = 1; + } else if (s->current == s->image) { + s->current_direction = 2; + } else { + s->current_direction = 0; + } +} + +static int onenand_post_load(void *opaque, int version_id) +{ + OneNANDState *s = opaque; + switch (s->current_direction) { + case 0: + break; + case 1: + s->current = s->otp; + break; + case 2: + s->current = s->image; + break; + default: + return -1; + } + onenand_intr_update(s); + return 0; +} + +static const VMStateDescription vmstate_onenand = { + .name = "onenand", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = onenand_pre_save, + .post_load = onenand_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(current_direction, OneNANDState), + VMSTATE_INT32(cycle, OneNANDState), + VMSTATE_INT32(otpmode, OneNANDState), + VMSTATE_UINT16_ARRAY(addr, OneNANDState, 8), + VMSTATE_UINT16_ARRAY(unladdr, OneNANDState, 8), + VMSTATE_INT32(bufaddr, OneNANDState), + VMSTATE_INT32(count, OneNANDState), + VMSTATE_UINT16(command, OneNANDState), + VMSTATE_UINT16_ARRAY(config, OneNANDState, 2), + VMSTATE_UINT16(status, OneNANDState), + VMSTATE_UINT16(intstatus, OneNANDState), + VMSTATE_UINT16(wpstatus, OneNANDState), + VMSTATE_INT32(secs_cur, OneNANDState), + VMSTATE_PARTIAL_VBUFFER(blockwp, OneNANDState, blocks), + VMSTATE_UINT8(ecc.cp, OneNANDState), + VMSTATE_UINT16_ARRAY(ecc.lp, OneNANDState, 2), + VMSTATE_UINT16(ecc.count, OneNANDState), + VMSTATE_BUFFER_POINTER_UNSAFE(otp, OneNANDState, 0, + ((64 + 2) << PAGE_SHIFT)), + VMSTATE_END_OF_LIST() + } +}; + +/* Hot reset (Reset OneNAND command) or warm reset (RP pin low) */ +static void onenand_reset(OneNANDState *s, int cold) +{ + memset(&s->addr, 0, sizeof(s->addr)); + s->command = 0; + s->count = 1; + s->bufaddr = 0; + s->config[0] = 0x40c0; + s->config[1] = 0x0000; + onenand_intr_update(s); + qemu_irq_raise(s->rdy); + s->status = 0x0000; + s->intstatus = cold ? 0x8080 : 0x8010; + s->unladdr[0] = 0; + s->unladdr[1] = 0; + s->wpstatus = 0x0002; + s->cycle = 0; + s->otpmode = 0; + s->blk_cur = s->blk; + s->current = s->image; + s->secs_cur = s->secs; + + if (cold) { + /* Lock the whole flash */ + memset(s->blockwp, ONEN_LOCK_LOCKED, s->blocks); + + if (s->blk_cur && blk_read(s->blk_cur, 0, s->boot[0], 8) < 0) { + hw_error("%s: Loading the BootRAM failed.\n", __func__); + } + } +} + +static void onenand_system_reset(DeviceState *dev) +{ + OneNANDState *s = ONE_NAND(dev); + + onenand_reset(s, 1); +} + +static inline int onenand_load_main(OneNANDState *s, int sec, int secn, + void *dest) +{ + if (s->blk_cur) { + return blk_read(s->blk_cur, sec, dest, secn) < 0; + } else if (sec + secn > s->secs_cur) { + return 1; + } + + memcpy(dest, s->current + (sec << 9), secn << 9); + + return 0; +} + +static inline int onenand_prog_main(OneNANDState *s, int sec, int secn, + void *src) +{ + int result = 0; + + if (secn > 0) { + uint32_t size = (uint32_t)secn * 512; + const uint8_t *sp = (const uint8_t *)src; + uint8_t *dp = 0; + if (s->blk_cur) { + dp = g_malloc(size); + if (!dp || blk_read(s->blk_cur, sec, dp, secn) < 0) { + result = 1; + } + } else { + if (sec + secn > s->secs_cur) { + result = 1; + } else { + dp = (uint8_t *)s->current + (sec << 9); + } + } + if (!result) { + uint32_t i; + for (i = 0; i < size; i++) { + dp[i] &= sp[i]; + } + if (s->blk_cur) { + result = blk_write(s->blk_cur, sec, dp, secn) < 0; + } + } + if (dp && s->blk_cur) { + g_free(dp); + } + } + + return result; +} + +static inline int onenand_load_spare(OneNANDState *s, int sec, int secn, + void *dest) +{ + uint8_t buf[512]; + + if (s->blk_cur) { + if (blk_read(s->blk_cur, s->secs_cur + (sec >> 5), buf, 1) < 0) { + return 1; + } + memcpy(dest, buf + ((sec & 31) << 4), secn << 4); + } else if (sec + secn > s->secs_cur) { + return 1; + } else { + memcpy(dest, s->current + (s->secs_cur << 9) + (sec << 4), secn << 4); + } + + return 0; +} + +static inline int onenand_prog_spare(OneNANDState *s, int sec, int secn, + void *src) +{ + int result = 0; + if (secn > 0) { + const uint8_t *sp = (const uint8_t *)src; + uint8_t *dp = 0, *dpp = 0; + if (s->blk_cur) { + dp = g_malloc(512); + if (!dp + || blk_read(s->blk_cur, s->secs_cur + (sec >> 5), dp, 1) < 0) { + result = 1; + } else { + dpp = dp + ((sec & 31) << 4); + } + } else { + if (sec + secn > s->secs_cur) { + result = 1; + } else { + dpp = s->current + (s->secs_cur << 9) + (sec << 4); + } + } + if (!result) { + uint32_t i; + for (i = 0; i < (secn << 4); i++) { + dpp[i] &= sp[i]; + } + if (s->blk_cur) { + result = blk_write(s->blk_cur, s->secs_cur + (sec >> 5), + dp, 1) < 0; + } + } + g_free(dp); + } + return result; +} + +static inline int onenand_erase(OneNANDState *s, int sec, int num) +{ + uint8_t *blankbuf, *tmpbuf; + + blankbuf = g_malloc(512); + tmpbuf = g_malloc(512); + memset(blankbuf, 0xff, 512); + for (; num > 0; num--, sec++) { + if (s->blk_cur) { + int erasesec = s->secs_cur + (sec >> 5); + if (blk_write(s->blk_cur, sec, blankbuf, 1) < 0) { + goto fail; + } + if (blk_read(s->blk_cur, erasesec, tmpbuf, 1) < 0) { + goto fail; + } + memcpy(tmpbuf + ((sec & 31) << 4), blankbuf, 1 << 4); + if (blk_write(s->blk_cur, erasesec, tmpbuf, 1) < 0) { + goto fail; + } + } else { + if (sec + 1 > s->secs_cur) { + goto fail; + } + memcpy(s->current + (sec << 9), blankbuf, 512); + memcpy(s->current + (s->secs_cur << 9) + (sec << 4), + blankbuf, 1 << 4); + } + } + + g_free(tmpbuf); + g_free(blankbuf); + return 0; + +fail: + g_free(tmpbuf); + g_free(blankbuf); + return 1; +} + +static void onenand_command(OneNANDState *s) +{ + int b; + int sec; + void *buf; +#define SETADDR(block, page) \ + sec = (s->addr[page] & 3) + \ + ((((s->addr[page] >> 2) & 0x3f) + \ + (((s->addr[block] & 0xfff) | \ + (s->addr[block] >> 15 ? \ + s->density_mask : 0)) << 6)) << (PAGE_SHIFT - 9)); +#define SETBUF_M() \ + buf = (s->bufaddr & 8) ? \ + s->data[(s->bufaddr >> 2) & 1][0] : s->boot[0]; \ + buf += (s->bufaddr & 3) << 9; +#define SETBUF_S() \ + buf = (s->bufaddr & 8) ? \ + s->data[(s->bufaddr >> 2) & 1][1] : s->boot[1]; \ + buf += (s->bufaddr & 3) << 4; + + switch (s->command) { + case 0x00: /* Load single/multiple sector data unit into buffer */ + SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + + SETBUF_M() + if (onenand_load_main(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_LOAD; + +#if 0 + SETBUF_S() + if (onenand_load_spare(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_LOAD; +#endif + + /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) + * or if (s->bufaddr & 1) + s->count was > 2 (1k-pages) + * then we need two split the read/write into two chunks. + */ + s->intstatus |= ONEN_INT | ONEN_INT_LOAD; + break; + case 0x13: /* Load single/multiple spare sector into buffer */ + SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + + SETBUF_S() + if (onenand_load_spare(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_LOAD; + + /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) + * or if (s->bufaddr & 1) + s->count was > 2 (1k-pages) + * then we need two split the read/write into two chunks. + */ + s->intstatus |= ONEN_INT | ONEN_INT_LOAD; + break; + case 0x80: /* Program single/multiple sector data unit from buffer */ + SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + + SETBUF_M() + if (onenand_prog_main(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + +#if 0 + SETBUF_S() + if (onenand_prog_spare(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; +#endif + + /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) + * or if (s->bufaddr & 1) + s->count was > 2 (1k-pages) + * then we need two split the read/write into two chunks. + */ + s->intstatus |= ONEN_INT | ONEN_INT_PROG; + break; + case 0x1a: /* Program single/multiple spare area sector from buffer */ + SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + + SETBUF_S() + if (onenand_prog_spare(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + + /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) + * or if (s->bufaddr & 1) + s->count was > 2 (1k-pages) + * then we need two split the read/write into two chunks. + */ + s->intstatus |= ONEN_INT | ONEN_INT_PROG; + break; + case 0x1b: /* Copy-back program */ + SETBUF_S() + + SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + if (onenand_load_main(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + + SETADDR(ONEN_BUF_DEST_BLOCK, ONEN_BUF_DEST_PAGE) + if (onenand_prog_main(s, sec, s->count, buf)) + s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + + /* TODO: spare areas */ + + s->intstatus |= ONEN_INT | ONEN_INT_PROG; + break; + + case 0x23: /* Unlock NAND array block(s) */ + s->intstatus |= ONEN_INT; + + /* XXX the previous (?) area should be locked automatically */ + for (b = s->unladdr[0]; b <= s->unladdr[1]; b ++) { + if (b >= s->blocks) { + s->status |= ONEN_ERR_CMD; + break; + } + if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) + break; + + s->wpstatus = s->blockwp[b] = ONEN_LOCK_UNLOCKED; + } + break; + case 0x27: /* Unlock All NAND array blocks */ + s->intstatus |= ONEN_INT; + + for (b = 0; b < s->blocks; b ++) { + if (b >= s->blocks) { + s->status |= ONEN_ERR_CMD; + break; + } + if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) + break; + + s->wpstatus = s->blockwp[b] = ONEN_LOCK_UNLOCKED; + } + break; + + case 0x2a: /* Lock NAND array block(s) */ + s->intstatus |= ONEN_INT; + + for (b = s->unladdr[0]; b <= s->unladdr[1]; b ++) { + if (b >= s->blocks) { + s->status |= ONEN_ERR_CMD; + break; + } + if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) + break; + + s->wpstatus = s->blockwp[b] = ONEN_LOCK_LOCKED; + } + break; + case 0x2c: /* Lock-tight NAND array block(s) */ + s->intstatus |= ONEN_INT; + + for (b = s->unladdr[0]; b <= s->unladdr[1]; b ++) { + if (b >= s->blocks) { + s->status |= ONEN_ERR_CMD; + break; + } + if (s->blockwp[b] == ONEN_LOCK_UNLOCKED) + continue; + + s->wpstatus = s->blockwp[b] = ONEN_LOCK_LOCKTIGHTEN; + } + break; + + case 0x71: /* Erase-Verify-Read */ + s->intstatus |= ONEN_INT; + break; + case 0x95: /* Multi-block erase */ + qemu_irq_pulse(s->intr); + /* Fall through. */ + case 0x94: /* Block erase */ + sec = ((s->addr[ONEN_BUF_BLOCK] & 0xfff) | + (s->addr[ONEN_BUF_BLOCK] >> 15 ? s->density_mask : 0)) + << (BLOCK_SHIFT - 9); + if (onenand_erase(s, sec, 1 << (BLOCK_SHIFT - 9))) + s->status |= ONEN_ERR_CMD | ONEN_ERR_ERASE; + + s->intstatus |= ONEN_INT | ONEN_INT_ERASE; + break; + case 0xb0: /* Erase suspend */ + break; + case 0x30: /* Erase resume */ + s->intstatus |= ONEN_INT | ONEN_INT_ERASE; + break; + + case 0xf0: /* Reset NAND Flash core */ + onenand_reset(s, 0); + break; + case 0xf3: /* Reset OneNAND */ + onenand_reset(s, 0); + break; + + case 0x65: /* OTP Access */ + s->intstatus |= ONEN_INT; + s->blk_cur = NULL; + s->current = s->otp; + s->secs_cur = 1 << (BLOCK_SHIFT - 9); + s->addr[ONEN_BUF_BLOCK] = 0; + s->otpmode = 1; + break; + + default: + s->status |= ONEN_ERR_CMD; + s->intstatus |= ONEN_INT; + fprintf(stderr, "%s: unknown OneNAND command %x\n", + __func__, s->command); + } + + onenand_intr_update(s); +} + +static uint64_t onenand_read(void *opaque, hwaddr addr, + unsigned size) +{ + OneNANDState *s = (OneNANDState *) opaque; + int offset = addr >> s->shift; + + switch (offset) { + case 0x0000 ... 0xc000: + return lduw_le_p(s->boot[0] + addr); + + case 0xf000: /* Manufacturer ID */ + return s->id.man; + case 0xf001: /* Device ID */ + return s->id.dev; + case 0xf002: /* Version ID */ + return s->id.ver; + /* TODO: get the following values from a real chip! */ + case 0xf003: /* Data Buffer size */ + return 1 << PAGE_SHIFT; + case 0xf004: /* Boot Buffer size */ + return 0x200; + case 0xf005: /* Amount of buffers */ + return 1 | (2 << 8); + case 0xf006: /* Technology */ + return 0; + + case 0xf100 ... 0xf107: /* Start addresses */ + return s->addr[offset - 0xf100]; + + case 0xf200: /* Start buffer */ + return (s->bufaddr << 8) | ((s->count - 1) & (1 << (PAGE_SHIFT - 10))); + + case 0xf220: /* Command */ + return s->command; + case 0xf221: /* System Configuration 1 */ + return s->config[0] & 0xffe0; + case 0xf222: /* System Configuration 2 */ + return s->config[1]; + + case 0xf240: /* Controller Status */ + return s->status; + case 0xf241: /* Interrupt */ + return s->intstatus; + case 0xf24c: /* Unlock Start Block Address */ + return s->unladdr[0]; + case 0xf24d: /* Unlock End Block Address */ + return s->unladdr[1]; + case 0xf24e: /* Write Protection Status */ + return s->wpstatus; + + case 0xff00: /* ECC Status */ + return 0x00; + case 0xff01: /* ECC Result of main area data */ + case 0xff02: /* ECC Result of spare area data */ + case 0xff03: /* ECC Result of main area data */ + case 0xff04: /* ECC Result of spare area data */ + hw_error("%s: imeplement ECC\n", __FUNCTION__); + return 0x0000; + } + + fprintf(stderr, "%s: unknown OneNAND register %x\n", + __FUNCTION__, offset); + return 0; +} + +static void onenand_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + OneNANDState *s = (OneNANDState *) opaque; + int offset = addr >> s->shift; + int sec; + + switch (offset) { + case 0x0000 ... 0x01ff: + case 0x8000 ... 0x800f: + if (s->cycle) { + s->cycle = 0; + + if (value == 0x0000) { + SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + onenand_load_main(s, sec, + 1 << (PAGE_SHIFT - 9), s->data[0][0]); + s->addr[ONEN_BUF_PAGE] += 4; + s->addr[ONEN_BUF_PAGE] &= 0xff; + } + break; + } + + switch (value) { + case 0x00f0: /* Reset OneNAND */ + onenand_reset(s, 0); + break; + + case 0x00e0: /* Load Data into Buffer */ + s->cycle = 1; + break; + + case 0x0090: /* Read Identification Data */ + memset(s->boot[0], 0, 3 << s->shift); + s->boot[0][0 << s->shift] = s->id.man & 0xff; + s->boot[0][1 << s->shift] = s->id.dev & 0xff; + s->boot[0][2 << s->shift] = s->wpstatus & 0xff; + break; + + default: + fprintf(stderr, "%s: unknown OneNAND boot command %"PRIx64"\n", + __FUNCTION__, value); + } + break; + + case 0xf100 ... 0xf107: /* Start addresses */ + s->addr[offset - 0xf100] = value; + break; + + case 0xf200: /* Start buffer */ + s->bufaddr = (value >> 8) & 0xf; + if (PAGE_SHIFT == 11) + s->count = (value & 3) ?: 4; + else if (PAGE_SHIFT == 10) + s->count = (value & 1) ?: 2; + break; + + case 0xf220: /* Command */ + if (s->intstatus & (1 << 15)) + break; + s->command = value; + onenand_command(s); + break; + case 0xf221: /* System Configuration 1 */ + s->config[0] = value; + onenand_intr_update(s); + qemu_set_irq(s->rdy, (s->config[0] >> 7) & 1); + break; + case 0xf222: /* System Configuration 2 */ + s->config[1] = value; + break; + + case 0xf241: /* Interrupt */ + s->intstatus &= value; + if ((1 << 15) & ~s->intstatus) + s->status &= ~(ONEN_ERR_CMD | ONEN_ERR_ERASE | + ONEN_ERR_PROG | ONEN_ERR_LOAD); + onenand_intr_update(s); + break; + case 0xf24c: /* Unlock Start Block Address */ + s->unladdr[0] = value & (s->blocks - 1); + /* For some reason we have to set the end address to by default + * be same as start because the software forgets to write anything + * in there. */ + s->unladdr[1] = value & (s->blocks - 1); + break; + case 0xf24d: /* Unlock End Block Address */ + s->unladdr[1] = value & (s->blocks - 1); + break; + + default: + fprintf(stderr, "%s: unknown OneNAND register %x\n", + __FUNCTION__, offset); + } +} + +static const MemoryRegionOps onenand_ops = { + .read = onenand_read, + .write = onenand_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static int onenand_initfn(SysBusDevice *sbd) +{ + DeviceState *dev = DEVICE(sbd); + OneNANDState *s = ONE_NAND(dev); + uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7)); + void *ram; + + s->base = (hwaddr)-1; + s->rdy = NULL; + s->blocks = size >> BLOCK_SHIFT; + s->secs = size >> 9; + s->blockwp = g_malloc(s->blocks); + s->density_mask = (s->id.dev & 0x08) + ? (1 << (6 + ((s->id.dev >> 4) & 7))) : 0; + memory_region_init_io(&s->iomem, OBJECT(s), &onenand_ops, s, "onenand", + 0x10000 << s->shift); + if (!s->blk) { + s->image = memset(g_malloc(size + (size >> 5)), + 0xff, size + (size >> 5)); + } else { + if (blk_is_read_only(s->blk)) { + error_report("Can't use a read-only drive"); + return -1; + } + s->blk_cur = s->blk; + } + s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT), + 0xff, (64 + 2) << PAGE_SHIFT); + memory_region_init_ram(&s->ram, OBJECT(s), "onenand.ram", + 0xc000 << s->shift, &error_abort); + vmstate_register_ram_global(&s->ram); + ram = memory_region_get_ram_ptr(&s->ram); + s->boot[0] = ram + (0x0000 << s->shift); + s->boot[1] = ram + (0x8000 << s->shift); + s->data[0][0] = ram + ((0x0200 + (0 << (PAGE_SHIFT - 1))) << s->shift); + s->data[0][1] = ram + ((0x8010 + (0 << (PAGE_SHIFT - 6))) << s->shift); + s->data[1][0] = ram + ((0x0200 + (1 << (PAGE_SHIFT - 1))) << s->shift); + s->data[1][1] = ram + ((0x8010 + (1 << (PAGE_SHIFT - 6))) << s->shift); + onenand_mem_setup(s); + sysbus_init_irq(sbd, &s->intr); + sysbus_init_mmio(sbd, &s->container); + vmstate_register(dev, + ((s->shift & 0x7f) << 24) + | ((s->id.man & 0xff) << 16) + | ((s->id.dev & 0xff) << 8) + | (s->id.ver & 0xff), + &vmstate_onenand, s); + return 0; +} + +static Property onenand_properties[] = { + DEFINE_PROP_UINT16("manufacturer_id", OneNANDState, id.man, 0), + DEFINE_PROP_UINT16("device_id", OneNANDState, id.dev, 0), + DEFINE_PROP_UINT16("version_id", OneNANDState, id.ver, 0), + DEFINE_PROP_INT32("shift", OneNANDState, shift, 0), + DEFINE_PROP_DRIVE("drive", OneNANDState, blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static void onenand_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = onenand_initfn; + dc->reset = onenand_system_reset; + dc->props = onenand_properties; +} + +static const TypeInfo onenand_info = { + .name = TYPE_ONE_NAND, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(OneNANDState), + .class_init = onenand_class_init, +}; + +static void onenand_register_types(void) +{ + type_register_static(&onenand_info); +} + +void *onenand_raw_otp(DeviceState *onenand_device) +{ + OneNANDState *s = ONE_NAND(onenand_device); + + return s->otp; +} + +type_init(onenand_register_types) diff --git a/qemu/hw/block/pflash_cfi01.c b/qemu/hw/block/pflash_cfi01.c new file mode 100644 index 000000000..2ba6c7729 --- /dev/null +++ b/qemu/hw/block/pflash_cfi01.c @@ -0,0 +1,954 @@ +/* + * CFI parallel flash with Intel command set emulation + * + * Copyright (c) 2006 Thorsten Zitterell + * Copyright (c) 2005 Jocelyn Mayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * For now, this code can emulate flashes of 1, 2 or 4 bytes width. + * Supported commands/modes are: + * - flash read + * - flash write + * - flash ID read + * - sector erase + * - CFI queries + * + * It does not support timings + * It does not support flash interleaving + * It does not implement software data protection as found in many real chips + * It does not implement erase suspend/resume commands + * It does not implement multiple sectors erase + * + * It does not implement much more ... + */ + +#include "hw/hw.h" +#include "hw/block/flash.h" +#include "sysemu/block-backend.h" +#include "qemu/timer.h" +#include "qemu/bitops.h" +#include "exec/address-spaces.h" +#include "qemu/host-utils.h" +#include "hw/sysbus.h" + +#define PFLASH_BUG(fmt, ...) \ +do { \ + fprintf(stderr, "PFLASH: Possible BUG - " fmt, ## __VA_ARGS__); \ + exit(1); \ +} while(0) + +/* #define PFLASH_DEBUG */ +#ifdef PFLASH_DEBUG +#define DPRINTF(fmt, ...) \ +do { \ + fprintf(stderr, "PFLASH: " fmt , ## __VA_ARGS__); \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#define TYPE_CFI_PFLASH01 "cfi.pflash01" +#define CFI_PFLASH01(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH01) + +#define PFLASH_BE 0 +#define PFLASH_SECURE 1 + +struct pflash_t { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + BlockBackend *blk; + uint32_t nb_blocs; + uint64_t sector_len; + uint8_t bank_width; + uint8_t device_width; /* If 0, device width not specified. */ + uint8_t max_device_width; /* max device width in bytes */ + uint32_t features; + uint8_t wcycle; /* if 0, the flash is read normally */ + int ro; + uint8_t cmd; + uint8_t status; + uint16_t ident0; + uint16_t ident1; + uint16_t ident2; + uint16_t ident3; + uint8_t cfi_len; + uint8_t cfi_table[0x52]; + uint64_t counter; + unsigned int writeblock_size; + QEMUTimer *timer; + MemoryRegion mem; + char *name; + void *storage; +}; + +static int pflash_post_load(void *opaque, int version_id); + +static const VMStateDescription vmstate_pflash = { + .name = "pflash_cfi01", + .version_id = 1, + .minimum_version_id = 1, + .post_load = pflash_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(wcycle, pflash_t), + VMSTATE_UINT8(cmd, pflash_t), + VMSTATE_UINT8(status, pflash_t), + VMSTATE_UINT64(counter, pflash_t), + VMSTATE_END_OF_LIST() + } +}; + +static void pflash_timer (void *opaque) +{ + pflash_t *pfl = opaque; + + DPRINTF("%s: command %02x done\n", __func__, pfl->cmd); + /* Reset flash */ + pfl->status ^= 0x80; + memory_region_rom_device_set_romd(&pfl->mem, true); + pfl->wcycle = 0; + pfl->cmd = 0; +} + +/* Perform a CFI query based on the bank width of the flash. + * If this code is called we know we have a device_width set for + * this flash. + */ +static uint32_t pflash_cfi_query(pflash_t *pfl, hwaddr offset) +{ + int i; + uint32_t resp = 0; + hwaddr boff; + + /* Adjust incoming offset to match expected device-width + * addressing. CFI query addresses are always specified in terms of + * the maximum supported width of the device. This means that x8 + * devices and x8/x16 devices in x8 mode behave differently. For + * devices that are not used at their max width, we will be + * provided with addresses that use higher address bits than + * expected (based on the max width), so we will shift them lower + * so that they will match the addresses used when + * device_width==max_device_width. + */ + boff = offset >> (ctz32(pfl->bank_width) + + ctz32(pfl->max_device_width) - ctz32(pfl->device_width)); + + if (boff > pfl->cfi_len) { + return 0; + } + /* Now we will construct the CFI response generated by a single + * device, then replicate that for all devices that make up the + * bus. For wide parts used in x8 mode, CFI query responses + * are different than native byte-wide parts. + */ + resp = pfl->cfi_table[boff]; + if (pfl->device_width != pfl->max_device_width) { + /* The only case currently supported is x8 mode for a + * wider part. + */ + if (pfl->device_width != 1 || pfl->bank_width > 4) { + DPRINTF("%s: Unsupported device configuration: " + "device_width=%d, max_device_width=%d\n", + __func__, pfl->device_width, + pfl->max_device_width); + return 0; + } + /* CFI query data is repeated, rather than zero padded for + * wide devices used in x8 mode. + */ + for (i = 1; i < pfl->max_device_width; i++) { + resp = deposit32(resp, 8 * i, 8, pfl->cfi_table[boff]); + } + } + /* Replicate responses for each device in bank. */ + if (pfl->device_width < pfl->bank_width) { + for (i = pfl->device_width; + i < pfl->bank_width; i += pfl->device_width) { + resp = deposit32(resp, 8 * i, 8 * pfl->device_width, resp); + } + } + + return resp; +} + + + +/* Perform a device id query based on the bank width of the flash. */ +static uint32_t pflash_devid_query(pflash_t *pfl, hwaddr offset) +{ + int i; + uint32_t resp; + hwaddr boff; + + /* Adjust incoming offset to match expected device-width + * addressing. Device ID read addresses are always specified in + * terms of the maximum supported width of the device. This means + * that x8 devices and x8/x16 devices in x8 mode behave + * differently. For devices that are not used at their max width, + * we will be provided with addresses that use higher address bits + * than expected (based on the max width), so we will shift them + * lower so that they will match the addresses used when + * device_width==max_device_width. + */ + boff = offset >> (ctz32(pfl->bank_width) + + ctz32(pfl->max_device_width) - ctz32(pfl->device_width)); + + /* Mask off upper bits which may be used in to query block + * or sector lock status at other addresses. + * Offsets 2/3 are block lock status, is not emulated. + */ + switch (boff & 0xFF) { + case 0: + resp = pfl->ident0; + DPRINTF("%s: Manufacturer Code %04x\n", __func__, resp); + break; + case 1: + resp = pfl->ident1; + DPRINTF("%s: Device ID Code %04x\n", __func__, resp); + break; + default: + DPRINTF("%s: Read Device Information offset=%x\n", __func__, + (unsigned)offset); + return 0; + break; + } + /* Replicate responses for each device in bank. */ + if (pfl->device_width < pfl->bank_width) { + for (i = pfl->device_width; + i < pfl->bank_width; i += pfl->device_width) { + resp = deposit32(resp, 8 * i, 8 * pfl->device_width, resp); + } + } + + return resp; +} + +static uint32_t pflash_data_read(pflash_t *pfl, hwaddr offset, + int width, int be) +{ + uint8_t *p; + uint32_t ret; + + p = pfl->storage; + switch (width) { + case 1: + ret = p[offset]; + DPRINTF("%s: data offset " TARGET_FMT_plx " %02x\n", + __func__, offset, ret); + break; + case 2: + if (be) { + ret = p[offset] << 8; + ret |= p[offset + 1]; + } else { + ret = p[offset]; + ret |= p[offset + 1] << 8; + } + DPRINTF("%s: data offset " TARGET_FMT_plx " %04x\n", + __func__, offset, ret); + break; + case 4: + if (be) { + ret = p[offset] << 24; + ret |= p[offset + 1] << 16; + ret |= p[offset + 2] << 8; + ret |= p[offset + 3]; + } else { + ret = p[offset]; + ret |= p[offset + 1] << 8; + ret |= p[offset + 2] << 16; + ret |= p[offset + 3] << 24; + } + DPRINTF("%s: data offset " TARGET_FMT_plx " %08x\n", + __func__, offset, ret); + break; + default: + DPRINTF("BUG in %s\n", __func__); + abort(); + } + return ret; +} + +static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, + int width, int be) +{ + hwaddr boff; + uint32_t ret; + + ret = -1; + +#if 0 + DPRINTF("%s: reading offset " TARGET_FMT_plx " under cmd %02x width %d\n", + __func__, offset, pfl->cmd, width); +#endif + switch (pfl->cmd) { + default: + /* This should never happen : reset state & treat it as a read */ + DPRINTF("%s: unknown command state: %x\n", __func__, pfl->cmd); + pfl->wcycle = 0; + pfl->cmd = 0; + /* fall through to read code */ + case 0x00: + /* Flash area read */ + ret = pflash_data_read(pfl, offset, width, be); + break; + case 0x10: /* Single byte program */ + case 0x20: /* Block erase */ + case 0x28: /* Block erase */ + case 0x40: /* single byte program */ + case 0x50: /* Clear status register */ + case 0x60: /* Block /un)lock */ + case 0x70: /* Status Register */ + case 0xe8: /* Write block */ + /* Status register read. Return status from each device in + * bank. + */ + ret = pfl->status; + if (pfl->device_width && width > pfl->device_width) { + int shift = pfl->device_width * 8; + while (shift + pfl->device_width * 8 <= width * 8) { + ret |= pfl->status << shift; + shift += pfl->device_width * 8; + } + } else if (!pfl->device_width && width > 2) { + /* Handle 32 bit flash cases where device width is not + * set. (Existing behavior before device width added.) + */ + ret |= pfl->status << 16; + } + DPRINTF("%s: status %x\n", __func__, ret); + break; + case 0x90: + if (!pfl->device_width) { + /* Preserve old behavior if device width not specified */ + boff = offset & 0xFF; + if (pfl->bank_width == 2) { + boff = boff >> 1; + } else if (pfl->bank_width == 4) { + boff = boff >> 2; + } + + switch (boff) { + case 0: + ret = pfl->ident0 << 8 | pfl->ident1; + DPRINTF("%s: Manufacturer Code %04x\n", __func__, ret); + break; + case 1: + ret = pfl->ident2 << 8 | pfl->ident3; + DPRINTF("%s: Device ID Code %04x\n", __func__, ret); + break; + default: + DPRINTF("%s: Read Device Information boff=%x\n", __func__, + (unsigned)boff); + ret = 0; + break; + } + } else { + /* If we have a read larger than the bank_width, combine multiple + * manufacturer/device ID queries into a single response. + */ + int i; + for (i = 0; i < width; i += pfl->bank_width) { + ret = deposit32(ret, i * 8, pfl->bank_width * 8, + pflash_devid_query(pfl, + offset + i * pfl->bank_width)); + } + } + break; + case 0x98: /* Query mode */ + if (!pfl->device_width) { + /* Preserve old behavior if device width not specified */ + boff = offset & 0xFF; + if (pfl->bank_width == 2) { + boff = boff >> 1; + } else if (pfl->bank_width == 4) { + boff = boff >> 2; + } + + if (boff > pfl->cfi_len) { + ret = 0; + } else { + ret = pfl->cfi_table[boff]; + } + } else { + /* If we have a read larger than the bank_width, combine multiple + * CFI queries into a single response. + */ + int i; + for (i = 0; i < width; i += pfl->bank_width) { + ret = deposit32(ret, i * 8, pfl->bank_width * 8, + pflash_cfi_query(pfl, + offset + i * pfl->bank_width)); + } + } + + break; + } + return ret; +} + +/* update flash content on disk */ +static void pflash_update(pflash_t *pfl, int offset, + int size) +{ + int offset_end; + if (pfl->blk) { + offset_end = offset + size; + /* round to sectors */ + offset = offset >> 9; + offset_end = (offset_end + 511) >> 9; + blk_write(pfl->blk, offset, pfl->storage + (offset << 9), + offset_end - offset); + } +} + +static inline void pflash_data_write(pflash_t *pfl, hwaddr offset, + uint32_t value, int width, int be) +{ + uint8_t *p = pfl->storage; + + DPRINTF("%s: block write offset " TARGET_FMT_plx + " value %x counter %016" PRIx64 "\n", + __func__, offset, value, pfl->counter); + switch (width) { + case 1: + p[offset] = value; + break; + case 2: + if (be) { + p[offset] = value >> 8; + p[offset + 1] = value; + } else { + p[offset] = value; + p[offset + 1] = value >> 8; + } + break; + case 4: + if (be) { + p[offset] = value >> 24; + p[offset + 1] = value >> 16; + p[offset + 2] = value >> 8; + p[offset + 3] = value; + } else { + p[offset] = value; + p[offset + 1] = value >> 8; + p[offset + 2] = value >> 16; + p[offset + 3] = value >> 24; + } + break; + } + +} + +static void pflash_write(pflash_t *pfl, hwaddr offset, + uint32_t value, int width, int be) +{ + uint8_t *p; + uint8_t cmd; + + cmd = value; + + DPRINTF("%s: writing offset " TARGET_FMT_plx " value %08x width %d wcycle 0x%x\n", + __func__, offset, value, width, pfl->wcycle); + + if (!pfl->wcycle) { + /* Set the device in I/O access mode */ + memory_region_rom_device_set_romd(&pfl->mem, false); + } + + switch (pfl->wcycle) { + case 0: + /* read mode */ + switch (cmd) { + case 0x00: /* ??? */ + goto reset_flash; + case 0x10: /* Single Byte Program */ + case 0x40: /* Single Byte Program */ + DPRINTF("%s: Single Byte Program\n", __func__); + break; + case 0x20: /* Block erase */ + p = pfl->storage; + offset &= ~(pfl->sector_len - 1); + + DPRINTF("%s: block erase at " TARGET_FMT_plx " bytes %x\n", + __func__, offset, (unsigned)pfl->sector_len); + + if (!pfl->ro) { + memset(p + offset, 0xff, pfl->sector_len); + pflash_update(pfl, offset, pfl->sector_len); + } else { + pfl->status |= 0x20; /* Block erase error */ + } + pfl->status |= 0x80; /* Ready! */ + break; + case 0x50: /* Clear status bits */ + DPRINTF("%s: Clear status bits\n", __func__); + pfl->status = 0x0; + goto reset_flash; + case 0x60: /* Block (un)lock */ + DPRINTF("%s: Block unlock\n", __func__); + break; + case 0x70: /* Status Register */ + DPRINTF("%s: Read status register\n", __func__); + pfl->cmd = cmd; + return; + case 0x90: /* Read Device ID */ + DPRINTF("%s: Read Device information\n", __func__); + pfl->cmd = cmd; + return; + case 0x98: /* CFI query */ + DPRINTF("%s: CFI query\n", __func__); + break; + case 0xe8: /* Write to buffer */ + DPRINTF("%s: Write to buffer\n", __func__); + pfl->status |= 0x80; /* Ready! */ + break; + case 0xf0: /* Probe for AMD flash */ + DPRINTF("%s: Probe for AMD flash\n", __func__); + goto reset_flash; + case 0xff: /* Read array mode */ + DPRINTF("%s: Read array mode\n", __func__); + goto reset_flash; + default: + goto error_flash; + } + pfl->wcycle++; + pfl->cmd = cmd; + break; + case 1: + switch (pfl->cmd) { + case 0x10: /* Single Byte Program */ + case 0x40: /* Single Byte Program */ + DPRINTF("%s: Single Byte Program\n", __func__); + if (!pfl->ro) { + pflash_data_write(pfl, offset, value, width, be); + pflash_update(pfl, offset, width); + } else { + pfl->status |= 0x10; /* Programming error */ + } + pfl->status |= 0x80; /* Ready! */ + pfl->wcycle = 0; + break; + case 0x20: /* Block erase */ + case 0x28: + if (cmd == 0xd0) { /* confirm */ + pfl->wcycle = 0; + pfl->status |= 0x80; + } else if (cmd == 0xff) { /* read array mode */ + goto reset_flash; + } else + goto error_flash; + + break; + case 0xe8: + /* Mask writeblock size based on device width, or bank width if + * device width not specified. + */ + if (pfl->device_width) { + value = extract32(value, 0, pfl->device_width * 8); + } else { + value = extract32(value, 0, pfl->bank_width * 8); + } + DPRINTF("%s: block write of %x bytes\n", __func__, value); + pfl->counter = value; + pfl->wcycle++; + break; + case 0x60: + if (cmd == 0xd0) { + pfl->wcycle = 0; + pfl->status |= 0x80; + } else if (cmd == 0x01) { + pfl->wcycle = 0; + pfl->status |= 0x80; + } else if (cmd == 0xff) { + goto reset_flash; + } else { + DPRINTF("%s: Unknown (un)locking command\n", __func__); + goto reset_flash; + } + break; + case 0x98: + if (cmd == 0xff) { + goto reset_flash; + } else { + DPRINTF("%s: leaving query mode\n", __func__); + } + break; + default: + goto error_flash; + } + break; + case 2: + switch (pfl->cmd) { + case 0xe8: /* Block write */ + if (!pfl->ro) { + pflash_data_write(pfl, offset, value, width, be); + } else { + pfl->status |= 0x10; /* Programming error */ + } + + pfl->status |= 0x80; + + if (!pfl->counter) { + hwaddr mask = pfl->writeblock_size - 1; + mask = ~mask; + + DPRINTF("%s: block write finished\n", __func__); + pfl->wcycle++; + if (!pfl->ro) { + /* Flush the entire write buffer onto backing storage. */ + pflash_update(pfl, offset & mask, pfl->writeblock_size); + } else { + pfl->status |= 0x10; /* Programming error */ + } + } + + pfl->counter--; + break; + default: + goto error_flash; + } + break; + case 3: /* Confirm mode */ + switch (pfl->cmd) { + case 0xe8: /* Block write */ + if (cmd == 0xd0) { + pfl->wcycle = 0; + pfl->status |= 0x80; + } else { + DPRINTF("%s: unknown command for \"write block\"\n", __func__); + PFLASH_BUG("Write block confirm"); + goto reset_flash; + } + break; + default: + goto error_flash; + } + break; + default: + /* Should never happen */ + DPRINTF("%s: invalid write state\n", __func__); + goto reset_flash; + } + return; + + error_flash: + qemu_log_mask(LOG_UNIMP, "%s: Unimplemented flash cmd sequence " + "(offset " TARGET_FMT_plx ", wcycle 0x%x cmd 0x%x value 0x%x)" + "\n", __func__, offset, pfl->wcycle, pfl->cmd, value); + + reset_flash: + memory_region_rom_device_set_romd(&pfl->mem, true); + + pfl->wcycle = 0; + pfl->cmd = 0; +} + + +static MemTxResult pflash_mem_read_with_attrs(void *opaque, hwaddr addr, uint64_t *value, + unsigned len, MemTxAttrs attrs) +{ + pflash_t *pfl = opaque; + bool be = !!(pfl->features & (1 << PFLASH_BE)); + + if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) { + *value = pflash_data_read(opaque, addr, len, be); + } else { + *value = pflash_read(opaque, addr, len, be); + } + return MEMTX_OK; +} + +static MemTxResult pflash_mem_write_with_attrs(void *opaque, hwaddr addr, uint64_t value, + unsigned len, MemTxAttrs attrs) +{ + pflash_t *pfl = opaque; + bool be = !!(pfl->features & (1 << PFLASH_BE)); + + if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) { + return MEMTX_ERROR; + } else { + pflash_write(opaque, addr, value, len, be); + return MEMTX_OK; + } +} + +static const MemoryRegionOps pflash_cfi01_ops = { + .read_with_attrs = pflash_mem_read_with_attrs, + .write_with_attrs = pflash_mem_write_with_attrs, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void pflash_cfi01_realize(DeviceState *dev, Error **errp) +{ + pflash_t *pfl = CFI_PFLASH01(dev); + uint64_t total_len; + int ret; + uint64_t blocks_per_device, device_len; + int num_devices; + Error *local_err = NULL; + + total_len = pfl->sector_len * pfl->nb_blocs; + + /* These are only used to expose the parameters of each device + * in the cfi_table[]. + */ + num_devices = pfl->device_width ? (pfl->bank_width / pfl->device_width) : 1; + blocks_per_device = pfl->nb_blocs / num_devices; + device_len = pfl->sector_len * blocks_per_device; + + /* XXX: to be fixed */ +#if 0 + if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) && + total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024)) + return NULL; +#endif + + memory_region_init_rom_device( + &pfl->mem, OBJECT(dev), + &pflash_cfi01_ops, + pfl, + pfl->name, total_len, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + vmstate_register_ram(&pfl->mem, DEVICE(pfl)); + pfl->storage = memory_region_get_ram_ptr(&pfl->mem); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + + if (pfl->blk) { + /* read the initial flash content */ + ret = blk_read(pfl->blk, 0, pfl->storage, total_len >> 9); + + if (ret < 0) { + vmstate_unregister_ram(&pfl->mem, DEVICE(pfl)); + error_setg(errp, "failed to read the initial flash content"); + return; + } + } + + if (pfl->blk) { + pfl->ro = blk_is_read_only(pfl->blk); + } else { + pfl->ro = 0; + } + + /* Default to devices being used at their maximum device width. This was + * assumed before the device_width support was added. + */ + if (!pfl->max_device_width) { + pfl->max_device_width = pfl->device_width; + } + + pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); + pfl->wcycle = 0; + pfl->cmd = 0; + pfl->status = 0; + /* Hardcoded CFI table */ + pfl->cfi_len = 0x52; + /* Standard "QRY" string */ + pfl->cfi_table[0x10] = 'Q'; + pfl->cfi_table[0x11] = 'R'; + pfl->cfi_table[0x12] = 'Y'; + /* Command set (Intel) */ + pfl->cfi_table[0x13] = 0x01; + pfl->cfi_table[0x14] = 0x00; + /* Primary extended table address (none) */ + pfl->cfi_table[0x15] = 0x31; + pfl->cfi_table[0x16] = 0x00; + /* Alternate command set (none) */ + pfl->cfi_table[0x17] = 0x00; + pfl->cfi_table[0x18] = 0x00; + /* Alternate extended table (none) */ + pfl->cfi_table[0x19] = 0x00; + pfl->cfi_table[0x1A] = 0x00; + /* Vcc min */ + pfl->cfi_table[0x1B] = 0x45; + /* Vcc max */ + pfl->cfi_table[0x1C] = 0x55; + /* Vpp min (no Vpp pin) */ + pfl->cfi_table[0x1D] = 0x00; + /* Vpp max (no Vpp pin) */ + pfl->cfi_table[0x1E] = 0x00; + /* Reserved */ + pfl->cfi_table[0x1F] = 0x07; + /* Timeout for min size buffer write */ + pfl->cfi_table[0x20] = 0x07; + /* Typical timeout for block erase */ + pfl->cfi_table[0x21] = 0x0a; + /* Typical timeout for full chip erase (4096 ms) */ + pfl->cfi_table[0x22] = 0x00; + /* Reserved */ + pfl->cfi_table[0x23] = 0x04; + /* Max timeout for buffer write */ + pfl->cfi_table[0x24] = 0x04; + /* Max timeout for block erase */ + pfl->cfi_table[0x25] = 0x04; + /* Max timeout for chip erase */ + pfl->cfi_table[0x26] = 0x00; + /* Device size */ + pfl->cfi_table[0x27] = ctz32(device_len); /* + 1; */ + /* Flash device interface (8 & 16 bits) */ + pfl->cfi_table[0x28] = 0x02; + pfl->cfi_table[0x29] = 0x00; + /* Max number of bytes in multi-bytes write */ + if (pfl->bank_width == 1) { + pfl->cfi_table[0x2A] = 0x08; + } else { + pfl->cfi_table[0x2A] = 0x0B; + } + pfl->writeblock_size = 1 << pfl->cfi_table[0x2A]; + + pfl->cfi_table[0x2B] = 0x00; + /* Number of erase block regions (uniform) */ + pfl->cfi_table[0x2C] = 0x01; + /* Erase block region 1 */ + pfl->cfi_table[0x2D] = blocks_per_device - 1; + pfl->cfi_table[0x2E] = (blocks_per_device - 1) >> 8; + pfl->cfi_table[0x2F] = pfl->sector_len >> 8; + pfl->cfi_table[0x30] = pfl->sector_len >> 16; + + /* Extended */ + pfl->cfi_table[0x31] = 'P'; + pfl->cfi_table[0x32] = 'R'; + pfl->cfi_table[0x33] = 'I'; + + pfl->cfi_table[0x34] = '1'; + pfl->cfi_table[0x35] = '0'; + + pfl->cfi_table[0x36] = 0x00; + pfl->cfi_table[0x37] = 0x00; + pfl->cfi_table[0x38] = 0x00; + pfl->cfi_table[0x39] = 0x00; + + pfl->cfi_table[0x3a] = 0x00; + + pfl->cfi_table[0x3b] = 0x00; + pfl->cfi_table[0x3c] = 0x00; + + pfl->cfi_table[0x3f] = 0x01; /* Number of protection fields */ +} + +static Property pflash_cfi01_properties[] = { + DEFINE_PROP_DRIVE("drive", struct pflash_t, blk), + /* num-blocks is the number of blocks actually visible to the guest, + * ie the total size of the device divided by the sector length. + * If we're emulating flash devices wired in parallel the actual + * number of blocks per indvidual device will differ. + */ + DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), + DEFINE_PROP_UINT64("sector-length", struct pflash_t, sector_len, 0), + /* width here is the overall width of this QEMU device in bytes. + * The QEMU device may be emulating a number of flash devices + * wired up in parallel; the width of each individual flash + * device should be specified via device-width. If the individual + * devices have a maximum width which is greater than the width + * they are being used for, this maximum width should be set via + * max-device-width (which otherwise defaults to device-width). + * So for instance a 32-bit wide QEMU flash device made from four + * 16-bit flash devices used in 8-bit wide mode would be configured + * with width = 4, device-width = 1, max-device-width = 2. + * + * If device-width is not specified we default to backwards + * compatible behaviour which is a bad emulation of two + * 16 bit devices making up a 32 bit wide QEMU device. This + * is deprecated for new uses of this device. + */ + DEFINE_PROP_UINT8("width", struct pflash_t, bank_width, 0), + DEFINE_PROP_UINT8("device-width", struct pflash_t, device_width, 0), + DEFINE_PROP_UINT8("max-device-width", struct pflash_t, max_device_width, 0), + DEFINE_PROP_BIT("big-endian", struct pflash_t, features, PFLASH_BE, 0), + DEFINE_PROP_BIT("secure", struct pflash_t, features, PFLASH_SECURE, 0), + DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), + DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), + DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), + DEFINE_PROP_UINT16("id3", struct pflash_t, ident3, 0), + DEFINE_PROP_STRING("name", struct pflash_t, name), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pflash_cfi01_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pflash_cfi01_realize; + dc->props = pflash_cfi01_properties; + dc->vmsd = &vmstate_pflash; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + + +static const TypeInfo pflash_cfi01_info = { + .name = TYPE_CFI_PFLASH01, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(struct pflash_t), + .class_init = pflash_cfi01_class_init, +}; + +static void pflash_cfi01_register_types(void) +{ + type_register_static(&pflash_cfi01_info); +} + +type_init(pflash_cfi01_register_types) + +pflash_t *pflash_cfi01_register(hwaddr base, + DeviceState *qdev, const char *name, + hwaddr size, + BlockBackend *blk, + uint32_t sector_len, int nb_blocs, + int bank_width, uint16_t id0, uint16_t id1, + uint16_t id2, uint16_t id3, int be) +{ + DeviceState *dev = qdev_create(NULL, TYPE_CFI_PFLASH01); + + if (blk) { + qdev_prop_set_drive(dev, "drive", blk, &error_abort); + } + qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); + qdev_prop_set_uint64(dev, "sector-length", sector_len); + qdev_prop_set_uint8(dev, "width", bank_width); + qdev_prop_set_bit(dev, "big-endian", !!be); + qdev_prop_set_uint16(dev, "id0", id0); + qdev_prop_set_uint16(dev, "id1", id1); + qdev_prop_set_uint16(dev, "id2", id2); + qdev_prop_set_uint16(dev, "id3", id3); + qdev_prop_set_string(dev, "name", name); + qdev_init_nofail(dev); + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); + return CFI_PFLASH01(dev); +} + +MemoryRegion *pflash_cfi01_get_memory(pflash_t *fl) +{ + return &fl->mem; +} + +static int pflash_post_load(void *opaque, int version_id) +{ + pflash_t *pfl = opaque; + + if (!pfl->ro) { + DPRINTF("%s: updating bdrv for %s\n", __func__, pfl->name); + pflash_update(pfl, 0, pfl->sector_len * pfl->nb_blocs); + } + return 0; +} diff --git a/qemu/hw/block/pflash_cfi02.c b/qemu/hw/block/pflash_cfi02.c new file mode 100644 index 000000000..074a005f6 --- /dev/null +++ b/qemu/hw/block/pflash_cfi02.c @@ -0,0 +1,795 @@ +/* + * CFI parallel flash with AMD command set emulation + * + * Copyright (c) 2005 Jocelyn Mayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * For now, this code can emulate flashes of 1, 2 or 4 bytes width. + * Supported commands/modes are: + * - flash read + * - flash write + * - flash ID read + * - sector erase + * - chip erase + * - unlock bypass command + * - CFI queries + * + * It does not support flash interleaving. + * It does not implement boot blocs with reduced size + * It does not implement software data protection as found in many real chips + * It does not implement erase suspend/resume commands + * It does not implement multiple sectors erase + */ + +#include "hw/hw.h" +#include "hw/block/flash.h" +#include "qemu/timer.h" +#include "sysemu/block-backend.h" +#include "exec/address-spaces.h" +#include "qemu/host-utils.h" +#include "hw/sysbus.h" + +//#define PFLASH_DEBUG +#ifdef PFLASH_DEBUG +#define DPRINTF(fmt, ...) \ +do { \ + fprintf(stderr, "PFLASH: " fmt , ## __VA_ARGS__); \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#define PFLASH_LAZY_ROMD_THRESHOLD 42 + +#define TYPE_CFI_PFLASH02 "cfi.pflash02" +#define CFI_PFLASH02(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH02) + +struct pflash_t { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + BlockBackend *blk; + uint32_t sector_len; + uint32_t nb_blocs; + uint32_t chip_len; + uint8_t mappings; + uint8_t width; + uint8_t be; + int wcycle; /* if 0, the flash is read normally */ + int bypass; + int ro; + uint8_t cmd; + uint8_t status; + /* FIXME: implement array device properties */ + uint16_t ident0; + uint16_t ident1; + uint16_t ident2; + uint16_t ident3; + uint16_t unlock_addr0; + uint16_t unlock_addr1; + uint8_t cfi_len; + uint8_t cfi_table[0x52]; + QEMUTimer *timer; + /* The device replicates the flash memory across its memory space. Emulate + * that by having a container (.mem) filled with an array of aliases + * (.mem_mappings) pointing to the flash memory (.orig_mem). + */ + MemoryRegion mem; + MemoryRegion *mem_mappings; /* array; one per mapping */ + MemoryRegion orig_mem; + int rom_mode; + int read_counter; /* used for lazy switch-back to rom mode */ + char *name; + void *storage; +}; + +/* + * Set up replicated mappings of the same region. + */ +static void pflash_setup_mappings(pflash_t *pfl) +{ + unsigned i; + hwaddr size = memory_region_size(&pfl->orig_mem); + + memory_region_init(&pfl->mem, OBJECT(pfl), "pflash", pfl->mappings * size); + pfl->mem_mappings = g_new(MemoryRegion, pfl->mappings); + for (i = 0; i < pfl->mappings; ++i) { + memory_region_init_alias(&pfl->mem_mappings[i], OBJECT(pfl), + "pflash-alias", &pfl->orig_mem, 0, size); + memory_region_add_subregion(&pfl->mem, i * size, &pfl->mem_mappings[i]); + } +} + +static void pflash_register_memory(pflash_t *pfl, int rom_mode) +{ + memory_region_rom_device_set_romd(&pfl->orig_mem, rom_mode); + pfl->rom_mode = rom_mode; +} + +static void pflash_timer (void *opaque) +{ + pflash_t *pfl = opaque; + + DPRINTF("%s: command %02x done\n", __func__, pfl->cmd); + /* Reset flash */ + pfl->status ^= 0x80; + if (pfl->bypass) { + pfl->wcycle = 2; + } else { + pflash_register_memory(pfl, 1); + pfl->wcycle = 0; + } + pfl->cmd = 0; +} + +static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, + int width, int be) +{ + hwaddr boff; + uint32_t ret; + uint8_t *p; + + DPRINTF("%s: offset " TARGET_FMT_plx "\n", __func__, offset); + ret = -1; + /* Lazy reset to ROMD mode after a certain amount of read accesses */ + if (!pfl->rom_mode && pfl->wcycle == 0 && + ++pfl->read_counter > PFLASH_LAZY_ROMD_THRESHOLD) { + pflash_register_memory(pfl, 1); + } + offset &= pfl->chip_len - 1; + boff = offset & 0xFF; + if (pfl->width == 2) + boff = boff >> 1; + else if (pfl->width == 4) + boff = boff >> 2; + switch (pfl->cmd) { + default: + /* This should never happen : reset state & treat it as a read*/ + DPRINTF("%s: unknown command state: %x\n", __func__, pfl->cmd); + pfl->wcycle = 0; + pfl->cmd = 0; + /* fall through to the read code */ + case 0x80: + /* We accept reads during second unlock sequence... */ + case 0x00: + flash_read: + /* Flash area read */ + p = pfl->storage; + switch (width) { + case 1: + ret = p[offset]; +// DPRINTF("%s: data offset %08x %02x\n", __func__, offset, ret); + break; + case 2: + if (be) { + ret = p[offset] << 8; + ret |= p[offset + 1]; + } else { + ret = p[offset]; + ret |= p[offset + 1] << 8; + } +// DPRINTF("%s: data offset %08x %04x\n", __func__, offset, ret); + break; + case 4: + if (be) { + ret = p[offset] << 24; + ret |= p[offset + 1] << 16; + ret |= p[offset + 2] << 8; + ret |= p[offset + 3]; + } else { + ret = p[offset]; + ret |= p[offset + 1] << 8; + ret |= p[offset + 2] << 16; + ret |= p[offset + 3] << 24; + } +// DPRINTF("%s: data offset %08x %08x\n", __func__, offset, ret); + break; + } + break; + case 0x90: + /* flash ID read */ + switch (boff) { + case 0x00: + case 0x01: + ret = boff & 0x01 ? pfl->ident1 : pfl->ident0; + break; + case 0x02: + ret = 0x00; /* Pretend all sectors are unprotected */ + break; + case 0x0E: + case 0x0F: + ret = boff & 0x01 ? pfl->ident3 : pfl->ident2; + if (ret == (uint8_t)-1) { + goto flash_read; + } + break; + default: + goto flash_read; + } + DPRINTF("%s: ID " TARGET_FMT_plx " %x\n", __func__, boff, ret); + break; + case 0xA0: + case 0x10: + case 0x30: + /* Status register read */ + ret = pfl->status; + DPRINTF("%s: status %x\n", __func__, ret); + /* Toggle bit 6 */ + pfl->status ^= 0x40; + break; + case 0x98: + /* CFI query mode */ + if (boff > pfl->cfi_len) + ret = 0; + else + ret = pfl->cfi_table[boff]; + break; + } + + return ret; +} + +/* update flash content on disk */ +static void pflash_update(pflash_t *pfl, int offset, + int size) +{ + int offset_end; + if (pfl->blk) { + offset_end = offset + size; + /* round to sectors */ + offset = offset >> 9; + offset_end = (offset_end + 511) >> 9; + blk_write(pfl->blk, offset, pfl->storage + (offset << 9), + offset_end - offset); + } +} + +static void pflash_write (pflash_t *pfl, hwaddr offset, + uint32_t value, int width, int be) +{ + hwaddr boff; + uint8_t *p; + uint8_t cmd; + + cmd = value; + if (pfl->cmd != 0xA0 && cmd == 0xF0) { +#if 0 + DPRINTF("%s: flash reset asked (%02x %02x)\n", + __func__, pfl->cmd, cmd); +#endif + goto reset_flash; + } + DPRINTF("%s: offset " TARGET_FMT_plx " %08x %d %d\n", __func__, + offset, value, width, pfl->wcycle); + offset &= pfl->chip_len - 1; + + DPRINTF("%s: offset " TARGET_FMT_plx " %08x %d\n", __func__, + offset, value, width); + boff = offset & (pfl->sector_len - 1); + if (pfl->width == 2) + boff = boff >> 1; + else if (pfl->width == 4) + boff = boff >> 2; + switch (pfl->wcycle) { + case 0: + /* Set the device in I/O access mode if required */ + if (pfl->rom_mode) + pflash_register_memory(pfl, 0); + pfl->read_counter = 0; + /* We're in read mode */ + check_unlock0: + if (boff == 0x55 && cmd == 0x98) { + enter_CFI_mode: + /* Enter CFI query mode */ + pfl->wcycle = 7; + pfl->cmd = 0x98; + return; + } + if (boff != pfl->unlock_addr0 || cmd != 0xAA) { + DPRINTF("%s: unlock0 failed " TARGET_FMT_plx " %02x %04x\n", + __func__, boff, cmd, pfl->unlock_addr0); + goto reset_flash; + } + DPRINTF("%s: unlock sequence started\n", __func__); + break; + case 1: + /* We started an unlock sequence */ + check_unlock1: + if (boff != pfl->unlock_addr1 || cmd != 0x55) { + DPRINTF("%s: unlock1 failed " TARGET_FMT_plx " %02x\n", __func__, + boff, cmd); + goto reset_flash; + } + DPRINTF("%s: unlock sequence done\n", __func__); + break; + case 2: + /* We finished an unlock sequence */ + if (!pfl->bypass && boff != pfl->unlock_addr0) { + DPRINTF("%s: command failed " TARGET_FMT_plx " %02x\n", __func__, + boff, cmd); + goto reset_flash; + } + switch (cmd) { + case 0x20: + pfl->bypass = 1; + goto do_bypass; + case 0x80: + case 0x90: + case 0xA0: + pfl->cmd = cmd; + DPRINTF("%s: starting command %02x\n", __func__, cmd); + break; + default: + DPRINTF("%s: unknown command %02x\n", __func__, cmd); + goto reset_flash; + } + break; + case 3: + switch (pfl->cmd) { + case 0x80: + /* We need another unlock sequence */ + goto check_unlock0; + case 0xA0: + DPRINTF("%s: write data offset " TARGET_FMT_plx " %08x %d\n", + __func__, offset, value, width); + p = pfl->storage; + if (!pfl->ro) { + switch (width) { + case 1: + p[offset] &= value; + pflash_update(pfl, offset, 1); + break; + case 2: + if (be) { + p[offset] &= value >> 8; + p[offset + 1] &= value; + } else { + p[offset] &= value; + p[offset + 1] &= value >> 8; + } + pflash_update(pfl, offset, 2); + break; + case 4: + if (be) { + p[offset] &= value >> 24; + p[offset + 1] &= value >> 16; + p[offset + 2] &= value >> 8; + p[offset + 3] &= value; + } else { + p[offset] &= value; + p[offset + 1] &= value >> 8; + p[offset + 2] &= value >> 16; + p[offset + 3] &= value >> 24; + } + pflash_update(pfl, offset, 4); + break; + } + } + pfl->status = 0x00 | ~(value & 0x80); + /* Let's pretend write is immediate */ + if (pfl->bypass) + goto do_bypass; + goto reset_flash; + case 0x90: + if (pfl->bypass && cmd == 0x00) { + /* Unlock bypass reset */ + goto reset_flash; + } + /* We can enter CFI query mode from autoselect mode */ + if (boff == 0x55 && cmd == 0x98) + goto enter_CFI_mode; + /* No break here */ + default: + DPRINTF("%s: invalid write for command %02x\n", + __func__, pfl->cmd); + goto reset_flash; + } + case 4: + switch (pfl->cmd) { + case 0xA0: + /* Ignore writes while flash data write is occurring */ + /* As we suppose write is immediate, this should never happen */ + return; + case 0x80: + goto check_unlock1; + default: + /* Should never happen */ + DPRINTF("%s: invalid command state %02x (wc 4)\n", + __func__, pfl->cmd); + goto reset_flash; + } + break; + case 5: + switch (cmd) { + case 0x10: + if (boff != pfl->unlock_addr0) { + DPRINTF("%s: chip erase: invalid address " TARGET_FMT_plx "\n", + __func__, offset); + goto reset_flash; + } + /* Chip erase */ + DPRINTF("%s: start chip erase\n", __func__); + if (!pfl->ro) { + memset(pfl->storage, 0xFF, pfl->chip_len); + pflash_update(pfl, 0, pfl->chip_len); + } + pfl->status = 0x00; + /* Let's wait 5 seconds before chip erase is done */ + timer_mod(pfl->timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() * 5)); + break; + case 0x30: + /* Sector erase */ + p = pfl->storage; + offset &= ~(pfl->sector_len - 1); + DPRINTF("%s: start sector erase at " TARGET_FMT_plx "\n", __func__, + offset); + if (!pfl->ro) { + memset(p + offset, 0xFF, pfl->sector_len); + pflash_update(pfl, offset, pfl->sector_len); + } + pfl->status = 0x00; + /* Let's wait 1/2 second before sector erase is done */ + timer_mod(pfl->timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() / 2)); + break; + default: + DPRINTF("%s: invalid command %02x (wc 5)\n", __func__, cmd); + goto reset_flash; + } + pfl->cmd = cmd; + break; + case 6: + switch (pfl->cmd) { + case 0x10: + /* Ignore writes during chip erase */ + return; + case 0x30: + /* Ignore writes during sector erase */ + return; + default: + /* Should never happen */ + DPRINTF("%s: invalid command state %02x (wc 6)\n", + __func__, pfl->cmd); + goto reset_flash; + } + break; + case 7: /* Special value for CFI queries */ + DPRINTF("%s: invalid write in CFI query mode\n", __func__); + goto reset_flash; + default: + /* Should never happen */ + DPRINTF("%s: invalid write state (wc 7)\n", __func__); + goto reset_flash; + } + pfl->wcycle++; + + return; + + /* Reset flash */ + reset_flash: + pfl->bypass = 0; + pfl->wcycle = 0; + pfl->cmd = 0; + return; + + do_bypass: + pfl->wcycle = 2; + pfl->cmd = 0; +} + + +static uint32_t pflash_readb_be(void *opaque, hwaddr addr) +{ + return pflash_read(opaque, addr, 1, 1); +} + +static uint32_t pflash_readb_le(void *opaque, hwaddr addr) +{ + return pflash_read(opaque, addr, 1, 0); +} + +static uint32_t pflash_readw_be(void *opaque, hwaddr addr) +{ + pflash_t *pfl = opaque; + + return pflash_read(pfl, addr, 2, 1); +} + +static uint32_t pflash_readw_le(void *opaque, hwaddr addr) +{ + pflash_t *pfl = opaque; + + return pflash_read(pfl, addr, 2, 0); +} + +static uint32_t pflash_readl_be(void *opaque, hwaddr addr) +{ + pflash_t *pfl = opaque; + + return pflash_read(pfl, addr, 4, 1); +} + +static uint32_t pflash_readl_le(void *opaque, hwaddr addr) +{ + pflash_t *pfl = opaque; + + return pflash_read(pfl, addr, 4, 0); +} + +static void pflash_writeb_be(void *opaque, hwaddr addr, + uint32_t value) +{ + pflash_write(opaque, addr, value, 1, 1); +} + +static void pflash_writeb_le(void *opaque, hwaddr addr, + uint32_t value) +{ + pflash_write(opaque, addr, value, 1, 0); +} + +static void pflash_writew_be(void *opaque, hwaddr addr, + uint32_t value) +{ + pflash_t *pfl = opaque; + + pflash_write(pfl, addr, value, 2, 1); +} + +static void pflash_writew_le(void *opaque, hwaddr addr, + uint32_t value) +{ + pflash_t *pfl = opaque; + + pflash_write(pfl, addr, value, 2, 0); +} + +static void pflash_writel_be(void *opaque, hwaddr addr, + uint32_t value) +{ + pflash_t *pfl = opaque; + + pflash_write(pfl, addr, value, 4, 1); +} + +static void pflash_writel_le(void *opaque, hwaddr addr, + uint32_t value) +{ + pflash_t *pfl = opaque; + + pflash_write(pfl, addr, value, 4, 0); +} + +static const MemoryRegionOps pflash_cfi02_ops_be = { + .old_mmio = { + .read = { pflash_readb_be, pflash_readw_be, pflash_readl_be, }, + .write = { pflash_writeb_be, pflash_writew_be, pflash_writel_be, }, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps pflash_cfi02_ops_le = { + .old_mmio = { + .read = { pflash_readb_le, pflash_readw_le, pflash_readl_le, }, + .write = { pflash_writeb_le, pflash_writew_le, pflash_writel_le, }, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void pflash_cfi02_realize(DeviceState *dev, Error **errp) +{ + pflash_t *pfl = CFI_PFLASH02(dev); + uint32_t chip_len; + int ret; + Error *local_err = NULL; + + chip_len = pfl->sector_len * pfl->nb_blocs; + /* XXX: to be fixed */ +#if 0 + if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) && + total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024)) + return NULL; +#endif + + memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ? + &pflash_cfi02_ops_be : &pflash_cfi02_ops_le, + pfl, pfl->name, chip_len, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); + pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); + pfl->chip_len = chip_len; + if (pfl->blk) { + /* read the initial flash content */ + ret = blk_read(pfl->blk, 0, pfl->storage, chip_len >> 9); + if (ret < 0) { + vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl)); + error_setg(errp, "failed to read the initial flash content"); + return; + } + } + + pflash_setup_mappings(pfl); + pfl->rom_mode = 1; + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + + if (pfl->blk) { + pfl->ro = blk_is_read_only(pfl->blk); + } else { + pfl->ro = 0; + } + + pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); + pfl->wcycle = 0; + pfl->cmd = 0; + pfl->status = 0; + /* Hardcoded CFI table (mostly from SG29 Spansion flash) */ + pfl->cfi_len = 0x52; + /* Standard "QRY" string */ + pfl->cfi_table[0x10] = 'Q'; + pfl->cfi_table[0x11] = 'R'; + pfl->cfi_table[0x12] = 'Y'; + /* Command set (AMD/Fujitsu) */ + pfl->cfi_table[0x13] = 0x02; + pfl->cfi_table[0x14] = 0x00; + /* Primary extended table address */ + pfl->cfi_table[0x15] = 0x31; + pfl->cfi_table[0x16] = 0x00; + /* Alternate command set (none) */ + pfl->cfi_table[0x17] = 0x00; + pfl->cfi_table[0x18] = 0x00; + /* Alternate extended table (none) */ + pfl->cfi_table[0x19] = 0x00; + pfl->cfi_table[0x1A] = 0x00; + /* Vcc min */ + pfl->cfi_table[0x1B] = 0x27; + /* Vcc max */ + pfl->cfi_table[0x1C] = 0x36; + /* Vpp min (no Vpp pin) */ + pfl->cfi_table[0x1D] = 0x00; + /* Vpp max (no Vpp pin) */ + pfl->cfi_table[0x1E] = 0x00; + /* Reserved */ + pfl->cfi_table[0x1F] = 0x07; + /* Timeout for min size buffer write (NA) */ + pfl->cfi_table[0x20] = 0x00; + /* Typical timeout for block erase (512 ms) */ + pfl->cfi_table[0x21] = 0x09; + /* Typical timeout for full chip erase (4096 ms) */ + pfl->cfi_table[0x22] = 0x0C; + /* Reserved */ + pfl->cfi_table[0x23] = 0x01; + /* Max timeout for buffer write (NA) */ + pfl->cfi_table[0x24] = 0x00; + /* Max timeout for block erase */ + pfl->cfi_table[0x25] = 0x0A; + /* Max timeout for chip erase */ + pfl->cfi_table[0x26] = 0x0D; + /* Device size */ + pfl->cfi_table[0x27] = ctz32(chip_len); + /* Flash device interface (8 & 16 bits) */ + pfl->cfi_table[0x28] = 0x02; + pfl->cfi_table[0x29] = 0x00; + /* Max number of bytes in multi-bytes write */ + /* XXX: disable buffered write as it's not supported */ + // pfl->cfi_table[0x2A] = 0x05; + pfl->cfi_table[0x2A] = 0x00; + pfl->cfi_table[0x2B] = 0x00; + /* Number of erase block regions (uniform) */ + pfl->cfi_table[0x2C] = 0x01; + /* Erase block region 1 */ + pfl->cfi_table[0x2D] = pfl->nb_blocs - 1; + pfl->cfi_table[0x2E] = (pfl->nb_blocs - 1) >> 8; + pfl->cfi_table[0x2F] = pfl->sector_len >> 8; + pfl->cfi_table[0x30] = pfl->sector_len >> 16; + + /* Extended */ + pfl->cfi_table[0x31] = 'P'; + pfl->cfi_table[0x32] = 'R'; + pfl->cfi_table[0x33] = 'I'; + + pfl->cfi_table[0x34] = '1'; + pfl->cfi_table[0x35] = '0'; + + pfl->cfi_table[0x36] = 0x00; + pfl->cfi_table[0x37] = 0x00; + pfl->cfi_table[0x38] = 0x00; + pfl->cfi_table[0x39] = 0x00; + + pfl->cfi_table[0x3a] = 0x00; + + pfl->cfi_table[0x3b] = 0x00; + pfl->cfi_table[0x3c] = 0x00; +} + +static Property pflash_cfi02_properties[] = { + DEFINE_PROP_DRIVE("drive", struct pflash_t, blk), + DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), + DEFINE_PROP_UINT32("sector-length", struct pflash_t, sector_len, 0), + DEFINE_PROP_UINT8("width", struct pflash_t, width, 0), + DEFINE_PROP_UINT8("mappings", struct pflash_t, mappings, 0), + DEFINE_PROP_UINT8("big-endian", struct pflash_t, be, 0), + DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), + DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), + DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), + DEFINE_PROP_UINT16("id3", struct pflash_t, ident3, 0), + DEFINE_PROP_UINT16("unlock-addr0", struct pflash_t, unlock_addr0, 0), + DEFINE_PROP_UINT16("unlock-addr1", struct pflash_t, unlock_addr1, 0), + DEFINE_PROP_STRING("name", struct pflash_t, name), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pflash_cfi02_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pflash_cfi02_realize; + dc->props = pflash_cfi02_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo pflash_cfi02_info = { + .name = TYPE_CFI_PFLASH02, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(struct pflash_t), + .class_init = pflash_cfi02_class_init, +}; + +static void pflash_cfi02_register_types(void) +{ + type_register_static(&pflash_cfi02_info); +} + +type_init(pflash_cfi02_register_types) + +pflash_t *pflash_cfi02_register(hwaddr base, + DeviceState *qdev, const char *name, + hwaddr size, + BlockBackend *blk, uint32_t sector_len, + int nb_blocs, int nb_mappings, int width, + uint16_t id0, uint16_t id1, + uint16_t id2, uint16_t id3, + uint16_t unlock_addr0, uint16_t unlock_addr1, + int be) +{ + DeviceState *dev = qdev_create(NULL, TYPE_CFI_PFLASH02); + + if (blk) { + qdev_prop_set_drive(dev, "drive", blk, &error_abort); + } + qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); + qdev_prop_set_uint32(dev, "sector-length", sector_len); + qdev_prop_set_uint8(dev, "width", width); + qdev_prop_set_uint8(dev, "mappings", nb_mappings); + qdev_prop_set_uint8(dev, "big-endian", !!be); + qdev_prop_set_uint16(dev, "id0", id0); + qdev_prop_set_uint16(dev, "id1", id1); + qdev_prop_set_uint16(dev, "id2", id2); + qdev_prop_set_uint16(dev, "id3", id3); + qdev_prop_set_uint16(dev, "unlock-addr0", unlock_addr0); + qdev_prop_set_uint16(dev, "unlock-addr1", unlock_addr1); + qdev_prop_set_string(dev, "name", name); + qdev_init_nofail(dev); + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); + return CFI_PFLASH02(dev); +} diff --git a/qemu/hw/block/tc58128.c b/qemu/hw/block/tc58128.c new file mode 100644 index 000000000..728f1c3b6 --- /dev/null +++ b/qemu/hw/block/tc58128.c @@ -0,0 +1,180 @@ +#include "hw/hw.h" +#include "hw/sh4/sh.h" +#include "hw/loader.h" +#include "sysemu/qtest.h" +#include "qemu/error-report.h" + +#define CE1 0x0100 +#define CE2 0x0200 +#define RE 0x0400 +#define WE 0x0800 +#define ALE 0x1000 +#define CLE 0x2000 +#define RDY1 0x4000 +#define RDY2 0x8000 +#define RDY(n) ((n) == 0 ? RDY1 : RDY2) + +typedef enum { WAIT, READ1, READ2, READ3 } state_t; + +typedef struct { + uint8_t *flash_contents; + state_t state; + uint32_t address; + uint8_t address_cycle; +} tc58128_dev; + +static tc58128_dev tc58128_devs[2]; + +#define FLASH_SIZE (16*1024*1024) + +static void init_dev(tc58128_dev * dev, const char *filename) +{ + int ret, blocks; + + dev->state = WAIT; + dev->flash_contents = g_malloc(FLASH_SIZE); + memset(dev->flash_contents, 0xff, FLASH_SIZE); + if (filename) { + /* Load flash image skipping the first block */ + ret = load_image(filename, dev->flash_contents + 528 * 32); + if (ret < 0) { + if (!qtest_enabled()) { + error_report("Could not load flash image %s", filename); + exit(1); + } + } else { + /* Build first block with number of blocks */ + blocks = (ret + 528 * 32 - 1) / (528 * 32); + dev->flash_contents[0] = blocks & 0xff; + dev->flash_contents[1] = (blocks >> 8) & 0xff; + dev->flash_contents[2] = (blocks >> 16) & 0xff; + dev->flash_contents[3] = (blocks >> 24) & 0xff; + fprintf(stderr, "loaded %d bytes for %s into flash\n", ret, + filename); + } + } +} + +static void handle_command(tc58128_dev * dev, uint8_t command) +{ + switch (command) { + case 0xff: + fprintf(stderr, "reset flash device\n"); + dev->state = WAIT; + break; + case 0x00: + fprintf(stderr, "read mode 1\n"); + dev->state = READ1; + dev->address_cycle = 0; + break; + case 0x01: + fprintf(stderr, "read mode 2\n"); + dev->state = READ2; + dev->address_cycle = 0; + break; + case 0x50: + fprintf(stderr, "read mode 3\n"); + dev->state = READ3; + dev->address_cycle = 0; + break; + default: + fprintf(stderr, "unknown flash command 0x%02x\n", command); + abort(); + } +} + +static void handle_address(tc58128_dev * dev, uint8_t data) +{ + switch (dev->state) { + case READ1: + case READ2: + case READ3: + switch (dev->address_cycle) { + case 0: + dev->address = data; + if (dev->state == READ2) + dev->address |= 0x100; + else if (dev->state == READ3) + dev->address |= 0x200; + break; + case 1: + dev->address += data * 528 * 0x100; + break; + case 2: + dev->address += data * 528; + fprintf(stderr, "address pointer in flash: 0x%08x\n", + dev->address); + break; + default: + /* Invalid data */ + abort(); + } + dev->address_cycle++; + break; + default: + abort(); + } +} + +static uint8_t handle_read(tc58128_dev * dev) +{ +#if 0 + if (dev->address % 0x100000 == 0) + fprintf(stderr, "reading flash at address 0x%08x\n", dev->address); +#endif + return dev->flash_contents[dev->address++]; +} + +/* We never mark the device as busy, so interrupts cannot be triggered + XXXXX */ + +static int tc58128_cb(uint16_t porta, uint16_t portb, + uint16_t * periph_pdtra, uint16_t * periph_portadir, + uint16_t * periph_pdtrb, uint16_t * periph_portbdir) +{ + int dev; + + if ((porta & CE1) == 0) + dev = 0; + else if ((porta & CE2) == 0) + dev = 1; + else + return 0; /* No device selected */ + + if ((porta & RE) && (porta & WE)) { + /* Nothing to do, assert ready and return to input state */ + *periph_portadir &= 0xff00; + *periph_portadir |= RDY(dev); + *periph_pdtra |= RDY(dev); + return 1; + } + + if (porta & CLE) { + /* Command */ + assert((porta & WE) == 0); + handle_command(&tc58128_devs[dev], porta & 0x00ff); + } else if (porta & ALE) { + assert((porta & WE) == 0); + handle_address(&tc58128_devs[dev], porta & 0x00ff); + } else if ((porta & RE) == 0) { + *periph_portadir |= 0x00ff; + *periph_pdtra &= 0xff00; + *periph_pdtra |= handle_read(&tc58128_devs[dev]); + } else { + abort(); + } + return 1; +} + +static sh7750_io_device tc58128 = { + RE | WE, /* Port A triggers */ + 0, /* Port B triggers */ + tc58128_cb /* Callback */ +}; + +int tc58128_init(struct SH7750State *s, const char *zone1, const char *zone2) +{ + init_dev(&tc58128_devs[0], zone1); + init_dev(&tc58128_devs[1], zone2); + return sh7750_register_io_device(s, &tc58128); +} diff --git a/qemu/hw/block/virtio-blk.c b/qemu/hw/block/virtio-blk.c new file mode 100644 index 000000000..1556c9cf5 --- /dev/null +++ b/qemu/hw/block/virtio-blk.c @@ -0,0 +1,1012 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "qemu/iov.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "hw/block/block.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/virtio/virtio-blk.h" +#include "dataplane/virtio-blk.h" +#include "migration/migration.h" +#include "block/scsi.h" +#ifdef __linux__ +# include <scsi/sg.h> +#endif +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" + +VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) +{ + VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); + req->dev = s; + req->qiov.size = 0; + req->in_len = 0; + req->next = NULL; + req->mr_next = NULL; + return req; +} + +void virtio_blk_free_request(VirtIOBlockReq *req) +{ + if (req) { + g_slice_free(VirtIOBlockReq, req); + } +} + +static void virtio_blk_complete_request(VirtIOBlockReq *req, + unsigned char status) +{ + VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + trace_virtio_blk_req_complete(req, status); + + stb_p(&req->in->status, status); + virtqueue_push(s->vq, &req->elem, req->in_len); + virtio_notify(vdev, s->vq); +} + +static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) +{ + req->dev->complete_request(req, status); +} + +static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, + bool is_read) +{ + BlockErrorAction action = blk_get_error_action(req->dev->blk, + is_read, error); + VirtIOBlock *s = req->dev; + + if (action == BLOCK_ERROR_ACTION_STOP) { + req->next = s->rq; + s->rq = req; + } else if (action == BLOCK_ERROR_ACTION_REPORT) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); + } + + blk_error_action(s->blk, action, is_read, error); + return action != BLOCK_ERROR_ACTION_IGNORE; +} + +static void virtio_blk_rw_complete(void *opaque, int ret) +{ + VirtIOBlockReq *next = opaque; + + while (next) { + VirtIOBlockReq *req = next; + next = req->mr_next; + trace_virtio_blk_rw_complete(req, ret); + + if (req->qiov.nalloc != -1) { + /* If nalloc is != 1 req->qiov is a local copy of the original + * external iovec. It was allocated in submit_merged_requests + * to be able to merge requests. */ + qemu_iovec_destroy(&req->qiov); + } + + if (ret) { + int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); + bool is_read = !(p & VIRTIO_BLK_T_OUT); + /* Note that memory may be dirtied on read failure. If the + * virtio request is not completed here, as is the case for + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied + * correctly during live migration. While this is ugly, + * it is acceptable because the device is free to write to + * the memory until the request is completed (which will + * happen on the other side of the migration). + */ + if (virtio_blk_handle_rw_error(req, -ret, is_read)) { + continue; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(req->dev->blk), &req->acct); + virtio_blk_free_request(req); + } +} + +static void virtio_blk_flush_complete(void *opaque, int ret) +{ + VirtIOBlockReq *req = opaque; + + if (ret) { + if (virtio_blk_handle_rw_error(req, -ret, 0)) { + return; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(req->dev->blk), &req->acct); + virtio_blk_free_request(req); +} + +#ifdef __linux__ + +typedef struct { + VirtIOBlockReq *req; + struct sg_io_hdr hdr; +} VirtIOBlockIoctlReq; + +static void virtio_blk_ioctl_complete(void *opaque, int status) +{ + VirtIOBlockIoctlReq *ioctl_req = opaque; + VirtIOBlockReq *req = ioctl_req->req; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + struct virtio_scsi_inhdr *scsi; + struct sg_io_hdr *hdr; + + scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; + + if (status) { + status = VIRTIO_BLK_S_UNSUPP; + virtio_stl_p(vdev, &scsi->errors, 255); + goto out; + } + + hdr = &ioctl_req->hdr; + /* + * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) + * clear the masked_status field [hence status gets cleared too, see + * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED + * status has occurred. However they do set DRIVER_SENSE in driver_status + * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. + */ + if (hdr->status == 0 && hdr->sb_len_wr > 0) { + hdr->status = CHECK_CONDITION; + } + + virtio_stl_p(vdev, &scsi->errors, + hdr->status | (hdr->msg_status << 8) | + (hdr->host_status << 16) | (hdr->driver_status << 24)); + virtio_stl_p(vdev, &scsi->residual, hdr->resid); + virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); + virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); + +out: + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); + g_free(ioctl_req); +} + +#endif + +static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) +{ + VirtIOBlockReq *req = virtio_blk_alloc_request(s); + + if (!virtqueue_pop(s->vq, &req->elem)) { + virtio_blk_free_request(req); + return NULL; + } + + return req; +} + +static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) +{ + int status = VIRTIO_BLK_S_OK; + struct virtio_scsi_inhdr *scsi = NULL; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + VirtQueueElement *elem = &req->elem; + VirtIOBlock *blk = req->dev; + +#ifdef __linux__ + int i; + VirtIOBlockIoctlReq *ioctl_req; + BlockAIOCB *acb; +#endif + + /* + * We require at least one output segment each for the virtio_blk_outhdr + * and the SCSI command block. + * + * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr + * and the sense buffer pointer in the input segments. + */ + if (elem->out_num < 2 || elem->in_num < 3) { + status = VIRTIO_BLK_S_IOERR; + goto fail; + } + + /* + * The scsi inhdr is placed in the second-to-last input segment, just + * before the regular inhdr. + */ + scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; + + if (!blk->conf.scsi) { + status = VIRTIO_BLK_S_UNSUPP; + goto fail; + } + + /* + * No support for bidirection commands yet. + */ + if (elem->out_num > 2 && elem->in_num > 3) { + status = VIRTIO_BLK_S_UNSUPP; + goto fail; + } + +#ifdef __linux__ + ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); + ioctl_req->req = req; + ioctl_req->hdr.interface_id = 'S'; + ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; + ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; + ioctl_req->hdr.dxfer_len = 0; + + if (elem->out_num > 2) { + /* + * If there are more than the minimally required 2 output segments + * there is write payload starting from the third iovec. + */ + ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; + ioctl_req->hdr.iovec_count = elem->out_num - 2; + + for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { + ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; + } + + ioctl_req->hdr.dxferp = elem->out_sg + 2; + + } else if (elem->in_num > 3) { + /* + * If we have more than 3 input segments the guest wants to actually + * read data. + */ + ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; + ioctl_req->hdr.iovec_count = elem->in_num - 3; + for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { + ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; + } + + ioctl_req->hdr.dxferp = elem->in_sg; + } else { + /* + * Some SCSI commands don't actually transfer any data. + */ + ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; + } + + ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; + ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; + + acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, + virtio_blk_ioctl_complete, ioctl_req); + if (!acb) { + g_free(ioctl_req); + status = VIRTIO_BLK_S_UNSUPP; + goto fail; + } + return -EINPROGRESS; +#else + abort(); +#endif + +fail: + /* Just put anything nonzero so that the ioctl fails in the guest. */ + if (scsi) { + virtio_stl_p(vdev, &scsi->errors, 255); + } + return status; +} + +static void virtio_blk_handle_scsi(VirtIOBlockReq *req) +{ + int status; + + status = virtio_blk_handle_scsi_req(req); + if (status != -EINPROGRESS) { + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); + } +} + +static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, + int start, int num_reqs, int niov) +{ + QEMUIOVector *qiov = &mrb->reqs[start]->qiov; + int64_t sector_num = mrb->reqs[start]->sector_num; + int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; + bool is_write = mrb->is_write; + + if (num_reqs > 1) { + int i; + struct iovec *tmp_iov = qiov->iov; + int tmp_niov = qiov->niov; + + /* mrb->reqs[start]->qiov was initialized from external so we can't + * modifiy it here. We need to initialize it locally and then add the + * external iovecs. */ + qemu_iovec_init(qiov, niov); + + for (i = 0; i < tmp_niov; i++) { + qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); + } + + for (i = start + 1; i < start + num_reqs; i++) { + qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, + mrb->reqs[i]->qiov.size); + mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; + nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE; + } + assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE); + + trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num, + nb_sectors, is_write); + block_acct_merge_done(blk_get_stats(blk), + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, + num_reqs - 1); + } + + if (is_write) { + blk_aio_writev(blk, sector_num, qiov, nb_sectors, + virtio_blk_rw_complete, mrb->reqs[start]); + } else { + blk_aio_readv(blk, sector_num, qiov, nb_sectors, + virtio_blk_rw_complete, mrb->reqs[start]); + } +} + +static int multireq_compare(const void *a, const void *b) +{ + const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, + *req2 = *(VirtIOBlockReq **)b; + + /* + * Note that we can't simply subtract sector_num1 from sector_num2 + * here as that could overflow the return value. + */ + if (req1->sector_num > req2->sector_num) { + return 1; + } else if (req1->sector_num < req2->sector_num) { + return -1; + } else { + return 0; + } +} + +void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) +{ + int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; + int max_xfer_len = 0; + int64_t sector_num = 0; + + if (mrb->num_reqs == 1) { + submit_requests(blk, mrb, 0, 1, -1); + mrb->num_reqs = 0; + return; + } + + max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk); + max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS); + + qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), + &multireq_compare); + + for (i = 0; i < mrb->num_reqs; i++) { + VirtIOBlockReq *req = mrb->reqs[i]; + if (num_reqs > 0) { + bool merge = true; + + /* merge would exceed maximum number of IOVs */ + if (niov + req->qiov.niov > IOV_MAX) { + merge = false; + } + + /* merge would exceed maximum transfer length of backend device */ + if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) { + merge = false; + } + + /* requests are not sequential */ + if (sector_num + nb_sectors != req->sector_num) { + merge = false; + } + + if (!merge) { + submit_requests(blk, mrb, start, num_reqs, niov); + num_reqs = 0; + } + } + + if (num_reqs == 0) { + sector_num = req->sector_num; + nb_sectors = niov = 0; + start = i; + } + + nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; + niov += req->qiov.niov; + num_reqs++; + } + + submit_requests(blk, mrb, start, num_reqs, niov); + mrb->num_reqs = 0; +} + +static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) +{ + block_acct_start(blk_get_stats(req->dev->blk), &req->acct, 0, + BLOCK_ACCT_FLUSH); + + /* + * Make sure all outstanding writes are posted to the backing device. + */ + if (mrb->is_write && mrb->num_reqs > 0) { + virtio_blk_submit_multireq(req->dev->blk, mrb); + } + blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req); +} + +static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, + uint64_t sector, size_t size) +{ + uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; + uint64_t total_sectors; + + if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return false; + } + if (sector & dev->sector_mask) { + return false; + } + if (size % dev->conf.conf.logical_block_size) { + return false; + } + blk_get_geometry(dev->blk, &total_sectors); + if (sector > total_sectors || nb_sectors > total_sectors - sector) { + return false; + } + return true; +} + +void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) +{ + uint32_t type; + struct iovec *in_iov = req->elem.in_sg; + struct iovec *iov = req->elem.out_sg; + unsigned in_num = req->elem.in_num; + unsigned out_num = req->elem.out_num; + + if (req->elem.out_num < 1 || req->elem.in_num < 1) { + error_report("virtio-blk missing headers"); + exit(1); + } + + if (unlikely(iov_to_buf(iov, out_num, 0, &req->out, + sizeof(req->out)) != sizeof(req->out))) { + error_report("virtio-blk request outhdr too short"); + exit(1); + } + + iov_discard_front(&iov, &out_num, sizeof(req->out)); + + if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { + error_report("virtio-blk request inhdr too short"); + exit(1); + } + + /* We always touch the last byte, so just see how big in_iov is. */ + req->in_len = iov_size(in_iov, in_num); + req->in = (void *)in_iov[in_num - 1].iov_base + + in_iov[in_num - 1].iov_len + - sizeof(struct virtio_blk_inhdr); + iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); + + type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); + + /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER + * is an optional flag. Although a guest should not send this flag if + * not negotiated we ignored it in the past. So keep ignoring it. */ + switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { + case VIRTIO_BLK_T_IN: + { + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev), + &req->out.sector); + + if (is_write) { + qemu_iovec_init_external(&req->qiov, iov, out_num); + trace_virtio_blk_handle_write(req, req->sector_num, + req->qiov.size / BDRV_SECTOR_SIZE); + } else { + qemu_iovec_init_external(&req->qiov, in_iov, in_num); + trace_virtio_blk_handle_read(req, req->sector_num, + req->qiov.size / BDRV_SECTOR_SIZE); + } + + if (!virtio_blk_sect_range_ok(req->dev, req->sector_num, + req->qiov.size)) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + virtio_blk_free_request(req); + return; + } + + block_acct_start(blk_get_stats(req->dev->blk), + &req->acct, req->qiov.size, + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + + /* merge would exceed maximum number of requests or IO direction + * changes */ + if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || + is_write != mrb->is_write || + !req->dev->conf.request_merging)) { + virtio_blk_submit_multireq(req->dev->blk, mrb); + } + + assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); + mrb->reqs[mrb->num_reqs++] = req; + mrb->is_write = is_write; + break; + } + case VIRTIO_BLK_T_FLUSH: + virtio_blk_handle_flush(req, mrb); + break; + case VIRTIO_BLK_T_SCSI_CMD: + virtio_blk_handle_scsi(req); + break; + case VIRTIO_BLK_T_GET_ID: + { + VirtIOBlock *s = req->dev; + + /* + * NB: per existing s/n string convention the string is + * terminated by '\0' only when shorter than buffer. + */ + const char *serial = s->conf.serial ? s->conf.serial : ""; + size_t size = MIN(strlen(serial) + 1, + MIN(iov_size(in_iov, in_num), + VIRTIO_BLK_ID_BYTES)); + iov_from_buf(in_iov, in_num, 0, serial, size); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + virtio_blk_free_request(req); + break; + } + default: + virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); + virtio_blk_free_request(req); + } +} + +static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + + /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start + * dataplane here instead of waiting for .set_status(). + */ + if (s->dataplane) { + virtio_blk_data_plane_start(s->dataplane); + return; + } + + while ((req = virtio_blk_get_request(s))) { + virtio_blk_handle_request(req, &mrb); + } + + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } +} + +static void virtio_blk_dma_restart_bh(void *opaque) +{ + VirtIOBlock *s = opaque; + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + s->rq = NULL; + + while (req) { + VirtIOBlockReq *next = req->next; + virtio_blk_handle_request(req, &mrb); + req = next; + } + + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } +} + +static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) +{ + VirtIOBlock *s = opaque; + + if (!running) { + return; + } + + if (!s->bh) { + s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), + virtio_blk_dma_restart_bh, s); + qemu_bh_schedule(s->bh); + } +} + +static void virtio_blk_reset(VirtIODevice *vdev) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + AioContext *ctx; + + /* + * This should cancel pending requests, but can't do nicely until there + * are per-device request lists. + */ + ctx = blk_get_aio_context(s->blk); + aio_context_acquire(ctx); + blk_drain(s->blk); + + if (s->dataplane) { + virtio_blk_data_plane_stop(s->dataplane); + } + aio_context_release(ctx); + + blk_set_enable_write_cache(s->blk, s->original_wce); +} + +/* coalesce internal state, copy to pci i/o region 0 + */ +static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + BlockConf *conf = &s->conf.conf; + struct virtio_blk_config blkcfg; + uint64_t capacity; + int blk_size = conf->logical_block_size; + + blk_get_geometry(s->blk, &capacity); + memset(&blkcfg, 0, sizeof(blkcfg)); + virtio_stq_p(vdev, &blkcfg.capacity, capacity); + virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); + virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); + virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); + virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); + virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); + blkcfg.geometry.heads = conf->heads; + /* + * We must ensure that the block device capacity is a multiple of + * the logical block size. If that is not the case, let's use + * sector_mask to adopt the geometry to have a correct picture. + * For those devices where the capacity is ok for the given geometry + * we don't touch the sector value of the geometry, since some devices + * (like s390 dasd) need a specific value. Here the capacity is already + * cyls*heads*secs*blk_size and the sector value is not block size + * divided by 512 - instead it is the amount of blk_size blocks + * per track (cylinder). + */ + if (blk_getlength(s->blk) / conf->heads / conf->secs % blk_size) { + blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; + } else { + blkcfg.geometry.sectors = conf->secs; + } + blkcfg.size_max = 0; + blkcfg.physical_block_exp = get_physical_block_exp(conf); + blkcfg.alignment_offset = 0; + blkcfg.wce = blk_enable_write_cache(s->blk); + memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); +} + +static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + struct virtio_blk_config blkcfg; + + memcpy(&blkcfg, config, sizeof(blkcfg)); + + aio_context_acquire(blk_get_aio_context(s->blk)); + blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); + aio_context_release(blk_get_aio_context(s->blk)); +} + +static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); + virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); + virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); + if (__virtio_has_feature(features, VIRTIO_F_VERSION_1)) { + if (s->conf.scsi) { + error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); + return 0; + } + } else { + virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); + virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); + } + + if (s->conf.config_wce) { + virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); + } + if (blk_enable_write_cache(s->blk)) { + virtio_add_feature(&features, VIRTIO_BLK_F_WCE); + } + if (blk_is_read_only(s->blk)) { + virtio_add_feature(&features, VIRTIO_BLK_F_RO); + } + + return features; +} + +static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + + if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | + VIRTIO_CONFIG_S_DRIVER_OK))) { + virtio_blk_data_plane_stop(s->dataplane); + } + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send + * cache flushes. Thus, the "auto writethrough" behavior is never + * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. + * Leaving it enabled would break the following sequence: + * + * Guest started with "-drive cache=writethrough" + * Guest sets status to 0 + * Guest sets DRIVER bit in status field + * Guest reads host features (WCE=0, CONFIG_WCE=1) + * Guest writes guest features (WCE=0, CONFIG_WCE=1) + * Guest writes 1 to the WCE configuration field (writeback mode) + * Guest sets DRIVER_OK bit in status field + * + * s->blk would erroneously be placed in writethrough mode. + */ + if (!virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { + aio_context_acquire(blk_get_aio_context(s->blk)); + blk_set_enable_write_cache(s->blk, + virtio_has_feature(vdev, VIRTIO_BLK_F_WCE)); + aio_context_release(blk_get_aio_context(s->blk)); + } +} + +static void virtio_blk_save(QEMUFile *f, void *opaque) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + + virtio_save(vdev, f); +} + +static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + VirtIOBlockReq *req = s->rq; + + while (req) { + qemu_put_sbyte(f, 1); + qemu_put_buffer(f, (unsigned char *)&req->elem, + sizeof(VirtQueueElement)); + req = req->next; + } + qemu_put_sbyte(f, 0); +} + +static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) +{ + VirtIOBlock *s = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + if (version_id != 2) + return -EINVAL; + + return virtio_load(vdev, f, version_id); +} + +static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, + int version_id) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + + while (qemu_get_sbyte(f)) { + VirtIOBlockReq *req = virtio_blk_alloc_request(s); + qemu_get_buffer(f, (unsigned char *)&req->elem, + sizeof(VirtQueueElement)); + req->next = s->rq; + s->rq = req; + + virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, + req->elem.in_num, 1); + virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, + req->elem.out_num, 0); + } + + return 0; +} + +static void virtio_blk_resize(void *opaque) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + + virtio_notify_config(vdev); +} + +static const BlockDevOps virtio_block_ops = { + .resize_cb = virtio_blk_resize, +}; + +/* Disable dataplane thread during live migration since it does not + * update the dirty memory bitmap yet. + */ +static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) +{ + VirtIOBlock *s = container_of(notifier, VirtIOBlock, + migration_state_notifier); + MigrationState *mig = data; + Error *err = NULL; + + if (migration_in_setup(mig)) { + if (!s->dataplane) { + return; + } + virtio_blk_data_plane_destroy(s->dataplane); + s->dataplane = NULL; + } else if (migration_has_finished(mig) || + migration_has_failed(mig)) { + if (s->dataplane) { + return; + } + blk_drain_all(); /* complete in-flight non-dataplane requests */ + virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->conf, + &s->dataplane, &err); + if (err != NULL) { + error_report_err(err); + } + } +} + +static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOBlock *s = VIRTIO_BLK(dev); + VirtIOBlkConf *conf = &s->conf; + Error *err = NULL; + static int virtio_blk_id; + + if (!conf->conf.blk) { + error_setg(errp, "drive property not set"); + return; + } + if (!blk_is_inserted(conf->conf.blk)) { + error_setg(errp, "Device needs media, but drive is empty"); + return; + } + + blkconf_serial(&conf->conf, &conf->serial); + s->original_wce = blk_enable_write_cache(conf->conf.blk); + blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); + if (err) { + error_propagate(errp, err); + return; + } + blkconf_blocksizes(&conf->conf); + + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + sizeof(struct virtio_blk_config)); + + s->blk = conf->conf.blk; + s->rq = NULL; + s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; + + s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); + s->complete_request = virtio_blk_complete_request; + virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + if (err != NULL) { + error_propagate(errp, err); + virtio_cleanup(vdev); + return; + } + s->migration_state_notifier.notify = virtio_blk_migration_state_changed; + add_migration_state_change_notifier(&s->migration_state_notifier); + + s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); + register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, + virtio_blk_save, virtio_blk_load, s); + blk_set_dev_ops(s->blk, &virtio_block_ops, s); + blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size); + + blk_iostatus_enable(s->blk); +} + +static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOBlock *s = VIRTIO_BLK(dev); + + remove_migration_state_change_notifier(&s->migration_state_notifier); + virtio_blk_data_plane_destroy(s->dataplane); + s->dataplane = NULL; + qemu_del_vm_change_state_handler(s->change); + unregister_savevm(dev, "virtio-blk", s); + blockdev_mark_auto_del(s->blk); + virtio_cleanup(vdev); +} + +static void virtio_blk_instance_init(Object *obj) +{ + VirtIOBlock *s = VIRTIO_BLK(obj); + + object_property_add_link(obj, "iothread", TYPE_IOTHREAD, + (Object **)&s->conf.iothread, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); + device_add_bootindex_property(obj, &s->conf.conf.bootindex, + "bootindex", "/disk@0,0", + DEVICE(obj), NULL); +} + +static Property virtio_blk_properties[] = { + DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), + DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), + DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), + DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true), +#ifdef __linux__ + DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true), +#endif + DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, + true), + DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_blk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = virtio_blk_properties; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = virtio_blk_device_realize; + vdc->unrealize = virtio_blk_device_unrealize; + vdc->get_config = virtio_blk_update_config; + vdc->set_config = virtio_blk_set_config; + vdc->get_features = virtio_blk_get_features; + vdc->set_status = virtio_blk_set_status; + vdc->reset = virtio_blk_reset; + vdc->save = virtio_blk_save_device; + vdc->load = virtio_blk_load_device; +} + +static const TypeInfo virtio_device_info = { + .name = TYPE_VIRTIO_BLK, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOBlock), + .instance_init = virtio_blk_instance_init, + .class_init = virtio_blk_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_device_info); +} + +type_init(virtio_register_types) diff --git a/qemu/hw/block/xen_blkif.h b/qemu/hw/block/xen_blkif.h new file mode 100644 index 000000000..711b69274 --- /dev/null +++ b/qemu/hw/block/xen_blkif.h @@ -0,0 +1,115 @@ +#ifndef __XEN_BLKIF_H__ +#define __XEN_BLKIF_H__ + +#include <xen/io/ring.h> +#include <xen/io/blkif.h> +#include <xen/io/protocols.h> + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ +struct blkif_common_request { + char dummy; +}; +struct blkif_common_response { + char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t __attribute__((__aligned__(8))) id; + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { + uint64_t __attribute__((__aligned__(8))) id; + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); + +union blkif_back_rings { + blkif_back_ring_t native; + blkif_common_back_ring_t common; + blkif_x86_32_back_ring_t x86_32_part; + blkif_x86_64_back_ring_t x86_64_part; +}; +typedef union blkif_back_rings blkif_back_rings_t; + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + +static inline void blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + if (src->operation == BLKIF_OP_DISCARD) { + struct blkif_request_discard *s = (void *)src; + struct blkif_request_discard *d = (void *)dst; + d->nr_sectors = s->nr_sectors; + return; + } + if (n > src->nr_segments) + n = src->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +static inline void blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + if (src->operation == BLKIF_OP_DISCARD) { + struct blkif_request_discard *s = (void *)src; + struct blkif_request_discard *d = (void *)dst; + d->nr_sectors = s->nr_sectors; + return; + } + if (n > src->nr_segments) + n = src->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +#endif /* __XEN_BLKIF_H__ */ diff --git a/qemu/hw/block/xen_disk.c b/qemu/hw/block/xen_disk.c new file mode 100644 index 000000000..267d8a8c7 --- /dev/null +++ b/qemu/hw/block/xen_disk.c @@ -0,0 +1,1106 @@ +/* + * xen paravirt block device backend + * + * (c) Gerd Hoffmann <kraxel@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <time.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/uio.h> + +#include "hw/hw.h" +#include "hw/xen/xen_backend.h" +#include "xen_blkif.h" +#include "sysemu/blockdev.h" +#include "sysemu/block-backend.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" + +/* ------------------------------------------------------------- */ + +static int batch_maps = 0; + +static int max_requests = 32; + +/* ------------------------------------------------------------- */ + +#define BLOCK_SIZE 512 +#define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2) + +struct PersistentGrant { + void *page; + struct XenBlkDev *blkdev; +}; + +typedef struct PersistentGrant PersistentGrant; + +struct PersistentRegion { + void *addr; + int num; +}; + +typedef struct PersistentRegion PersistentRegion; + +struct ioreq { + blkif_request_t req; + int16_t status; + + /* parsed request */ + off_t start; + QEMUIOVector v; + int presync; + int postsync; + uint8_t mapped; + + /* grant mapping */ + uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int prot; + void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + void *pages; + int num_unmap; + + /* aio status */ + int aio_inflight; + int aio_errors; + + struct XenBlkDev *blkdev; + QLIST_ENTRY(ioreq) list; + BlockAcctCookie acct; +}; + +struct XenBlkDev { + struct XenDevice xendev; /* must be first */ + char *params; + char *mode; + char *type; + char *dev; + char *devtype; + bool directiosafe; + const char *fileproto; + const char *filename; + int ring_ref; + void *sring; + int64_t file_blk; + int64_t file_size; + int protocol; + blkif_back_rings_t rings; + int more_work; + int cnt_map; + + /* request lists */ + QLIST_HEAD(inflight_head, ioreq) inflight; + QLIST_HEAD(finished_head, ioreq) finished; + QLIST_HEAD(freelist_head, ioreq) freelist; + int requests_total; + int requests_inflight; + int requests_finished; + + /* Persistent grants extension */ + gboolean feature_discard; + gboolean feature_persistent; + GTree *persistent_gnts; + GSList *persistent_regions; + unsigned int persistent_gnt_count; + unsigned int max_grants; + + /* qemu block driver */ + DriveInfo *dinfo; + BlockBackend *blk; + QEMUBH *bh; +}; + +/* ------------------------------------------------------------- */ + +static void ioreq_reset(struct ioreq *ioreq) +{ + memset(&ioreq->req, 0, sizeof(ioreq->req)); + ioreq->status = 0; + ioreq->start = 0; + ioreq->presync = 0; + ioreq->postsync = 0; + ioreq->mapped = 0; + + memset(ioreq->domids, 0, sizeof(ioreq->domids)); + memset(ioreq->refs, 0, sizeof(ioreq->refs)); + ioreq->prot = 0; + memset(ioreq->page, 0, sizeof(ioreq->page)); + ioreq->pages = NULL; + + ioreq->aio_inflight = 0; + ioreq->aio_errors = 0; + + ioreq->blkdev = NULL; + memset(&ioreq->list, 0, sizeof(ioreq->list)); + memset(&ioreq->acct, 0, sizeof(ioreq->acct)); + + qemu_iovec_reset(&ioreq->v); +} + +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + uint ua = GPOINTER_TO_UINT(a); + uint ub = GPOINTER_TO_UINT(b); + return (ua > ub) - (ua < ub); +} + +static void destroy_grant(gpointer pgnt) +{ + PersistentGrant *grant = pgnt; + XenGnttab gnt = grant->blkdev->xendev.gnttabdev; + + if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) { + xen_be_printf(&grant->blkdev->xendev, 0, + "xc_gnttab_munmap failed: %s\n", + strerror(errno)); + } + grant->blkdev->persistent_gnt_count--; + xen_be_printf(&grant->blkdev->xendev, 3, + "unmapped grant %p\n", grant->page); + g_free(grant); +} + +static void remove_persistent_region(gpointer data, gpointer dev) +{ + PersistentRegion *region = data; + struct XenBlkDev *blkdev = dev; + XenGnttab gnt = blkdev->xendev.gnttabdev; + + if (xc_gnttab_munmap(gnt, region->addr, region->num) != 0) { + xen_be_printf(&blkdev->xendev, 0, + "xc_gnttab_munmap region %p failed: %s\n", + region->addr, strerror(errno)); + } + xen_be_printf(&blkdev->xendev, 3, + "unmapped grant region %p with %d pages\n", + region->addr, region->num); + g_free(region); +} + +static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) +{ + struct ioreq *ioreq = NULL; + + if (QLIST_EMPTY(&blkdev->freelist)) { + if (blkdev->requests_total >= max_requests) { + goto out; + } + /* allocate new struct */ + ioreq = g_malloc0(sizeof(*ioreq)); + ioreq->blkdev = blkdev; + blkdev->requests_total++; + qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); + } else { + /* get one from freelist */ + ioreq = QLIST_FIRST(&blkdev->freelist); + QLIST_REMOVE(ioreq, list); + } + QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); + blkdev->requests_inflight++; + +out: + return ioreq; +} + +static void ioreq_finish(struct ioreq *ioreq) +{ + struct XenBlkDev *blkdev = ioreq->blkdev; + + QLIST_REMOVE(ioreq, list); + QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list); + blkdev->requests_inflight--; + blkdev->requests_finished++; +} + +static void ioreq_release(struct ioreq *ioreq, bool finish) +{ + struct XenBlkDev *blkdev = ioreq->blkdev; + + QLIST_REMOVE(ioreq, list); + ioreq_reset(ioreq); + ioreq->blkdev = blkdev; + QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); + if (finish) { + blkdev->requests_finished--; + } else { + blkdev->requests_inflight--; + } +} + +/* + * translate request into iovec + start offset + * do sanity checks along the way + */ +static int ioreq_parse(struct ioreq *ioreq) +{ + struct XenBlkDev *blkdev = ioreq->blkdev; + uintptr_t mem; + size_t len; + int i; + + xen_be_printf(&blkdev->xendev, 3, + "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", + ioreq->req.operation, ioreq->req.nr_segments, + ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); + switch (ioreq->req.operation) { + case BLKIF_OP_READ: + ioreq->prot = PROT_WRITE; /* to memory */ + break; + case BLKIF_OP_FLUSH_DISKCACHE: + ioreq->presync = 1; + if (!ioreq->req.nr_segments) { + return 0; + } + /* fall through */ + case BLKIF_OP_WRITE: + ioreq->prot = PROT_READ; /* from memory */ + break; + case BLKIF_OP_DISCARD: + return 0; + default: + xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", + ioreq->req.operation); + goto err; + }; + + if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') { + xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n"); + goto err; + } + + ioreq->start = ioreq->req.sector_number * blkdev->file_blk; + for (i = 0; i < ioreq->req.nr_segments; i++) { + if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { + xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); + goto err; + } + if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { + xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n"); + goto err; + } + if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { + xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n"); + goto err; + } + + ioreq->domids[i] = blkdev->xendev.dom; + ioreq->refs[i] = ioreq->req.seg[i].gref; + + mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; + len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; + qemu_iovec_add(&ioreq->v, (void*)mem, len); + } + if (ioreq->start + ioreq->v.size > blkdev->file_size) { + xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); + goto err; + } + return 0; + +err: + ioreq->status = BLKIF_RSP_ERROR; + return -1; +} + +static void ioreq_unmap(struct ioreq *ioreq) +{ + XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; + int i; + + if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { + return; + } + if (batch_maps) { + if (!ioreq->pages) { + return; + } + if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) { + xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", + strerror(errno)); + } + ioreq->blkdev->cnt_map -= ioreq->num_unmap; + ioreq->pages = NULL; + } else { + for (i = 0; i < ioreq->num_unmap; i++) { + if (!ioreq->page[i]) { + continue; + } + if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) { + xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", + strerror(errno)); + } + ioreq->blkdev->cnt_map--; + ioreq->page[i] = NULL; + } + } + ioreq->mapped = 0; +} + +static int ioreq_map(struct ioreq *ioreq) +{ + XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; + uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i, j, new_maps = 0; + PersistentGrant *grant; + PersistentRegion *region; + /* domids and refs variables will contain the information necessary + * to map the grants that are needed to fulfill this request. + * + * After mapping the needed grants, the page array will contain the + * memory address of each granted page in the order specified in ioreq + * (disregarding if it's a persistent grant or not). + */ + + if (ioreq->v.niov == 0 || ioreq->mapped == 1) { + return 0; + } + if (ioreq->blkdev->feature_persistent) { + for (i = 0; i < ioreq->v.niov; i++) { + grant = g_tree_lookup(ioreq->blkdev->persistent_gnts, + GUINT_TO_POINTER(ioreq->refs[i])); + + if (grant != NULL) { + page[i] = grant->page; + xen_be_printf(&ioreq->blkdev->xendev, 3, + "using persistent-grant %" PRIu32 "\n", + ioreq->refs[i]); + } else { + /* Add the grant to the list of grants that + * should be mapped + */ + domids[new_maps] = ioreq->domids[i]; + refs[new_maps] = ioreq->refs[i]; + page[i] = NULL; + new_maps++; + } + } + /* Set the protection to RW, since grants may be reused later + * with a different protection than the one needed for this request + */ + ioreq->prot = PROT_WRITE | PROT_READ; + } else { + /* All grants in the request should be mapped */ + memcpy(refs, ioreq->refs, sizeof(refs)); + memcpy(domids, ioreq->domids, sizeof(domids)); + memset(page, 0, sizeof(page)); + new_maps = ioreq->v.niov; + } + + if (batch_maps && new_maps) { + ioreq->pages = xc_gnttab_map_grant_refs + (gnt, new_maps, domids, refs, ioreq->prot); + if (ioreq->pages == NULL) { + xen_be_printf(&ioreq->blkdev->xendev, 0, + "can't map %d grant refs (%s, %d maps)\n", + new_maps, strerror(errno), ioreq->blkdev->cnt_map); + return -1; + } + for (i = 0, j = 0; i < ioreq->v.niov; i++) { + if (page[i] == NULL) { + page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE; + } + } + ioreq->blkdev->cnt_map += new_maps; + } else if (new_maps) { + for (i = 0; i < new_maps; i++) { + ioreq->page[i] = xc_gnttab_map_grant_ref + (gnt, domids[i], refs[i], ioreq->prot); + if (ioreq->page[i] == NULL) { + xen_be_printf(&ioreq->blkdev->xendev, 0, + "can't map grant ref %d (%s, %d maps)\n", + refs[i], strerror(errno), ioreq->blkdev->cnt_map); + ioreq->mapped = 1; + ioreq_unmap(ioreq); + return -1; + } + ioreq->blkdev->cnt_map++; + } + for (i = 0, j = 0; i < ioreq->v.niov; i++) { + if (page[i] == NULL) { + page[i] = ioreq->page[j++]; + } + } + } + if (ioreq->blkdev->feature_persistent && new_maps != 0 && + (!batch_maps || (ioreq->blkdev->persistent_gnt_count + new_maps <= + ioreq->blkdev->max_grants))) { + /* + * If we are using persistent grants and batch mappings only + * add the new maps to the list of persistent grants if the whole + * area can be persistently mapped. + */ + if (batch_maps) { + region = g_malloc0(sizeof(*region)); + region->addr = ioreq->pages; + region->num = new_maps; + ioreq->blkdev->persistent_regions = g_slist_append( + ioreq->blkdev->persistent_regions, + region); + } + while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants) + && new_maps) { + /* Go through the list of newly mapped grants and add as many + * as possible to the list of persistently mapped grants. + * + * Since we start at the end of ioreq->page(s), we only need + * to decrease new_maps to prevent this granted pages from + * being unmapped in ioreq_unmap. + */ + grant = g_malloc0(sizeof(*grant)); + new_maps--; + if (batch_maps) { + grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; + } else { + grant->page = ioreq->page[new_maps]; + } + grant->blkdev = ioreq->blkdev; + xen_be_printf(&ioreq->blkdev->xendev, 3, + "adding grant %" PRIu32 " page: %p\n", + refs[new_maps], grant->page); + g_tree_insert(ioreq->blkdev->persistent_gnts, + GUINT_TO_POINTER(refs[new_maps]), + grant); + ioreq->blkdev->persistent_gnt_count++; + } + assert(!batch_maps || new_maps == 0); + } + for (i = 0; i < ioreq->v.niov; i++) { + ioreq->v.iov[i].iov_base += (uintptr_t)page[i]; + } + ioreq->mapped = 1; + ioreq->num_unmap = new_maps; + return 0; +} + +static int ioreq_runio_qemu_aio(struct ioreq *ioreq); + +static void qemu_aio_complete(void *opaque, int ret) +{ + struct ioreq *ioreq = opaque; + + if (ret != 0) { + xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n", + ioreq->req.operation == BLKIF_OP_READ ? "read" : "write"); + ioreq->aio_errors++; + } + + ioreq->aio_inflight--; + if (ioreq->presync) { + ioreq->presync = 0; + ioreq_runio_qemu_aio(ioreq); + return; + } + if (ioreq->aio_inflight > 0) { + return; + } + if (ioreq->postsync) { + ioreq->postsync = 0; + ioreq->aio_inflight++; + blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); + return; + } + + ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; + ioreq_unmap(ioreq); + ioreq_finish(ioreq); + switch (ioreq->req.operation) { + case BLKIF_OP_WRITE: + case BLKIF_OP_FLUSH_DISKCACHE: + if (!ioreq->req.nr_segments) { + break; + } + case BLKIF_OP_READ: + block_acct_done(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); + break; + case BLKIF_OP_DISCARD: + default: + break; + } + qemu_bh_schedule(ioreq->blkdev->bh); +} + +static int ioreq_runio_qemu_aio(struct ioreq *ioreq) +{ + struct XenBlkDev *blkdev = ioreq->blkdev; + + if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { + goto err_no_map; + } + + ioreq->aio_inflight++; + if (ioreq->presync) { + blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); + return 0; + } + + switch (ioreq->req.operation) { + case BLKIF_OP_READ: + block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, + ioreq->v.size, BLOCK_ACCT_READ); + ioreq->aio_inflight++; + blk_aio_readv(blkdev->blk, ioreq->start / BLOCK_SIZE, + &ioreq->v, ioreq->v.size / BLOCK_SIZE, + qemu_aio_complete, ioreq); + break; + case BLKIF_OP_WRITE: + case BLKIF_OP_FLUSH_DISKCACHE: + if (!ioreq->req.nr_segments) { + break; + } + + block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, + ioreq->v.size, BLOCK_ACCT_WRITE); + ioreq->aio_inflight++; + blk_aio_writev(blkdev->blk, ioreq->start / BLOCK_SIZE, + &ioreq->v, ioreq->v.size / BLOCK_SIZE, + qemu_aio_complete, ioreq); + break; + case BLKIF_OP_DISCARD: + { + struct blkif_request_discard *discard_req = (void *)&ioreq->req; + ioreq->aio_inflight++; + blk_aio_discard(blkdev->blk, + discard_req->sector_number, discard_req->nr_sectors, + qemu_aio_complete, ioreq); + break; + } + default: + /* unknown operation (shouldn't happen -- parse catches this) */ + goto err; + } + + qemu_aio_complete(ioreq, 0); + + return 0; + +err: + ioreq_unmap(ioreq); +err_no_map: + ioreq_finish(ioreq); + ioreq->status = BLKIF_RSP_ERROR; + return -1; +} + +static int blk_send_response_one(struct ioreq *ioreq) +{ + struct XenBlkDev *blkdev = ioreq->blkdev; + int send_notify = 0; + int have_requests = 0; + blkif_response_t resp; + void *dst; + + resp.id = ioreq->req.id; + resp.operation = ioreq->req.operation; + resp.status = ioreq->status; + + /* Place on the response ring for the relevant domain. */ + switch (blkdev->protocol) { + case BLKIF_PROTOCOL_NATIVE: + dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); + break; + case BLKIF_PROTOCOL_X86_32: + dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, + blkdev->rings.x86_32_part.rsp_prod_pvt); + break; + case BLKIF_PROTOCOL_X86_64: + dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, + blkdev->rings.x86_64_part.rsp_prod_pvt); + break; + default: + dst = NULL; + return 0; + } + memcpy(dst, &resp, sizeof(resp)); + blkdev->rings.common.rsp_prod_pvt++; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); + if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) { + have_requests = 1; + } + + if (have_requests) { + blkdev->more_work++; + } + return send_notify; +} + +/* walk finished list, send outstanding responses, free requests */ +static void blk_send_response_all(struct XenBlkDev *blkdev) +{ + struct ioreq *ioreq; + int send_notify = 0; + + while (!QLIST_EMPTY(&blkdev->finished)) { + ioreq = QLIST_FIRST(&blkdev->finished); + send_notify += blk_send_response_one(ioreq); + ioreq_release(ioreq, true); + } + if (send_notify) { + xen_be_send_notify(&blkdev->xendev); + } +} + +static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc) +{ + switch (blkdev->protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), + sizeof(ioreq->req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&ioreq->req, + RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&ioreq->req, + RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc)); + break; + } + return 0; +} + +static void blk_handle_requests(struct XenBlkDev *blkdev) +{ + RING_IDX rc, rp; + struct ioreq *ioreq; + + blkdev->more_work = 0; + + rc = blkdev->rings.common.req_cons; + rp = blkdev->rings.common.sring->req_prod; + xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ + + blk_send_response_all(blkdev); + while (rc != rp) { + /* pull request from ring */ + if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { + break; + } + ioreq = ioreq_start(blkdev); + if (ioreq == NULL) { + blkdev->more_work++; + break; + } + blk_get_request(blkdev, ioreq, rc); + blkdev->rings.common.req_cons = ++rc; + + /* parse them */ + if (ioreq_parse(ioreq) != 0) { + if (blk_send_response_one(ioreq)) { + xen_be_send_notify(&blkdev->xendev); + } + ioreq_release(ioreq, false); + continue; + } + + ioreq_runio_qemu_aio(ioreq); + } + + if (blkdev->more_work && blkdev->requests_inflight < max_requests) { + qemu_bh_schedule(blkdev->bh); + } +} + +/* ------------------------------------------------------------- */ + +static void blk_bh(void *opaque) +{ + struct XenBlkDev *blkdev = opaque; + blk_handle_requests(blkdev); +} + +/* + * We need to account for the grant allocations requiring contiguous + * chunks; the worst case number would be + * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1, + * but in order to keep things simple just use + * 2 * max_req * max_seg. + */ +#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg)) + +static void blk_alloc(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + + QLIST_INIT(&blkdev->inflight); + QLIST_INIT(&blkdev->finished); + QLIST_INIT(&blkdev->freelist); + blkdev->bh = qemu_bh_new(blk_bh, blkdev); + if (xen_mode != XEN_EMULATE) { + batch_maps = 1; + } + if (xc_gnttab_set_max_grants(xendev->gnttabdev, + MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { + xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n", + strerror(errno)); + } +} + +static void blk_parse_discard(struct XenBlkDev *blkdev) +{ + int enable; + + blkdev->feature_discard = true; + + if (xenstore_read_be_int(&blkdev->xendev, "discard-enable", &enable) == 0) { + blkdev->feature_discard = !!enable; + } + + if (blkdev->feature_discard) { + xenstore_write_be_int(&blkdev->xendev, "feature-discard", 1); + } +} + +static int blk_init(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + int info = 0; + char *directiosafe = NULL; + + /* read xenstore entries */ + if (blkdev->params == NULL) { + char *h = NULL; + blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); + if (blkdev->params != NULL) { + h = strchr(blkdev->params, ':'); + } + if (h != NULL) { + blkdev->fileproto = blkdev->params; + blkdev->filename = h+1; + *h = 0; + } else { + blkdev->fileproto = "<unset>"; + blkdev->filename = blkdev->params; + } + } + if (!strcmp("aio", blkdev->fileproto)) { + blkdev->fileproto = "raw"; + } + if (blkdev->mode == NULL) { + blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); + } + if (blkdev->type == NULL) { + blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); + } + if (blkdev->dev == NULL) { + blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); + } + if (blkdev->devtype == NULL) { + blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); + } + directiosafe = xenstore_read_be_str(&blkdev->xendev, "direct-io-safe"); + blkdev->directiosafe = (directiosafe && atoi(directiosafe)); + + /* do we have all we need? */ + if (blkdev->params == NULL || + blkdev->mode == NULL || + blkdev->type == NULL || + blkdev->dev == NULL) { + goto out_error; + } + + /* read-only ? */ + if (strcmp(blkdev->mode, "w")) { + info |= VDISK_READONLY; + } + + /* cdrom ? */ + if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) { + info |= VDISK_CDROM; + } + + blkdev->file_blk = BLOCK_SIZE; + + /* fill info + * blk_connect supplies sector-size and sectors + */ + xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); + xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); + xenstore_write_be_int(&blkdev->xendev, "info", info); + + blk_parse_discard(blkdev); + + g_free(directiosafe); + return 0; + +out_error: + g_free(blkdev->params); + blkdev->params = NULL; + g_free(blkdev->mode); + blkdev->mode = NULL; + g_free(blkdev->type); + blkdev->type = NULL; + g_free(blkdev->dev); + blkdev->dev = NULL; + g_free(blkdev->devtype); + blkdev->devtype = NULL; + g_free(directiosafe); + blkdev->directiosafe = false; + return -1; +} + +static int blk_connect(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + int pers, index, qflags; + bool readonly = true; + + /* read-only ? */ + if (blkdev->directiosafe) { + qflags = BDRV_O_NOCACHE | BDRV_O_NATIVE_AIO; + } else { + qflags = BDRV_O_CACHE_WB; + } + if (strcmp(blkdev->mode, "w") == 0) { + qflags |= BDRV_O_RDWR; + readonly = false; + } + if (blkdev->feature_discard) { + qflags |= BDRV_O_UNMAP; + } + + /* init qemu block driver */ + index = (blkdev->xendev.dev - 202 * 256) / 16; + blkdev->dinfo = drive_get(IF_XEN, 0, index); + if (!blkdev->dinfo) { + Error *local_err = NULL; + QDict *options = NULL; + + if (strcmp(blkdev->fileproto, "<unset>")) { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(blkdev->fileproto)); + } + + /* setup via xenbus -> create new block driver instance */ + xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); + blkdev->blk = blk_new_open(blkdev->dev, blkdev->filename, NULL, options, + qflags, &local_err); + if (!blkdev->blk) { + xen_be_printf(&blkdev->xendev, 0, "error: %s\n", + error_get_pretty(local_err)); + error_free(local_err); + return -1; + } + } else { + /* setup via qemu cmdline -> already setup for us */ + xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); + blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo); + if (blk_is_read_only(blkdev->blk) && !readonly) { + xen_be_printf(&blkdev->xendev, 0, "Unexpected read-only drive"); + blkdev->blk = NULL; + return -1; + } + /* blkdev->blk is not create by us, we get a reference + * so we can blk_unref() unconditionally */ + blk_ref(blkdev->blk); + } + blk_attach_dev_nofail(blkdev->blk, blkdev); + blkdev->file_size = blk_getlength(blkdev->blk); + if (blkdev->file_size < 0) { + xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n", + (int)blkdev->file_size, strerror(-blkdev->file_size), + bdrv_get_format_name(blk_bs(blkdev->blk)) ?: "-"); + blkdev->file_size = 0; + } + + xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," + " size %" PRId64 " (%" PRId64 " MB)\n", + blkdev->type, blkdev->fileproto, blkdev->filename, + blkdev->file_size, blkdev->file_size >> 20); + + /* Fill in number of sector size and number of sectors */ + xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); + xenstore_write_be_int64(&blkdev->xendev, "sectors", + blkdev->file_size / blkdev->file_blk); + + if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { + return -1; + } + if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", + &blkdev->xendev.remote_port) == -1) { + return -1; + } + if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) { + blkdev->feature_persistent = FALSE; + } else { + blkdev->feature_persistent = !!pers; + } + + blkdev->protocol = BLKIF_PROTOCOL_NATIVE; + if (blkdev->xendev.protocol) { + if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { + blkdev->protocol = BLKIF_PROTOCOL_X86_32; + } + if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { + blkdev->protocol = BLKIF_PROTOCOL_X86_64; + } + } + + blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev, + blkdev->xendev.dom, + blkdev->ring_ref, + PROT_READ | PROT_WRITE); + if (!blkdev->sring) { + return -1; + } + blkdev->cnt_map++; + + switch (blkdev->protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring_native = blkdev->sring; + BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; + + BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; + + BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); + break; + } + } + + if (blkdev->feature_persistent) { + /* Init persistent grants */ + blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST; + blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, + batch_maps ? + (GDestroyNotify)g_free : + (GDestroyNotify)destroy_grant); + blkdev->persistent_regions = NULL; + blkdev->persistent_gnt_count = 0; + } + + xen_be_bind_evtchn(&blkdev->xendev); + + xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " + "remote port %d, local port %d\n", + blkdev->xendev.protocol, blkdev->ring_ref, + blkdev->xendev.remote_port, blkdev->xendev.local_port); + return 0; +} + +static void blk_disconnect(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + + if (blkdev->blk) { + blk_detach_dev(blkdev->blk, blkdev); + blk_unref(blkdev->blk); + blkdev->blk = NULL; + } + xen_be_unbind_evtchn(&blkdev->xendev); + + if (blkdev->sring) { + xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); + blkdev->cnt_map--; + blkdev->sring = NULL; + } + + /* + * Unmap persistent grants before switching to the closed state + * so the frontend can free them. + * + * In the !batch_maps case g_tree_destroy will take care of unmapping + * the grant, but in the batch_maps case we need to iterate over every + * region in persistent_regions and unmap it. + */ + if (blkdev->feature_persistent) { + g_tree_destroy(blkdev->persistent_gnts); + assert(batch_maps || blkdev->persistent_gnt_count == 0); + if (batch_maps) { + blkdev->persistent_gnt_count = 0; + g_slist_foreach(blkdev->persistent_regions, + (GFunc)remove_persistent_region, blkdev); + g_slist_free(blkdev->persistent_regions); + } + blkdev->feature_persistent = false; + } +} + +static int blk_free(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + struct ioreq *ioreq; + + if (blkdev->blk || blkdev->sring) { + blk_disconnect(xendev); + } + + while (!QLIST_EMPTY(&blkdev->freelist)) { + ioreq = QLIST_FIRST(&blkdev->freelist); + QLIST_REMOVE(ioreq, list); + qemu_iovec_destroy(&ioreq->v); + g_free(ioreq); + } + + g_free(blkdev->params); + g_free(blkdev->mode); + g_free(blkdev->type); + g_free(blkdev->dev); + g_free(blkdev->devtype); + qemu_bh_delete(blkdev->bh); + return 0; +} + +static void blk_event(struct XenDevice *xendev) +{ + struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + + qemu_bh_schedule(blkdev->bh); +} + +struct XenDevOps xen_blkdev_ops = { + .size = sizeof(struct XenBlkDev), + .flags = DEVOPS_FLAG_NEED_GNTDEV, + .alloc = blk_alloc, + .init = blk_init, + .initialise = blk_connect, + .disconnect = blk_disconnect, + .event = blk_event, + .free = blk_free, +}; |