summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/block/xen-blkback
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/block/xen-blkback')
-rw-r--r--kernel/drivers/block/xen-blkback/blkback.c45
-rw-r--r--kernel/drivers/block/xen-blkback/common.h31
-rw-r--r--kernel/drivers/block/xen-blkback/xenbus.c206
3 files changed, 180 insertions, 102 deletions
diff --git a/kernel/drivers/block/xen-blkback/blkback.c b/kernel/drivers/block/xen-blkback/blkback.c
index 3e9ec9523..41fb1a917 100644
--- a/kernel/drivers/block/xen-blkback/blkback.c
+++ b/kernel/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+/*
* The LRU mechanism to clean the lists of persistent grants needs to
* be executed periodically. The time interval between consecutive executions
* of the purge mechanism is set in ms.
@@ -729,7 +736,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
struct grant_page **pages = req->segments;
unsigned int invcount;
- invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
+ invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
req->unmap, req->unmap_pages);
work->data = req;
@@ -915,7 +922,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
int rc;
rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
- pending_req->nr_pages,
+ pending_req->nr_segs,
(pending_req->operation != BLKIF_OP_READ));
return rc;
@@ -931,7 +938,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment *segments = NULL;
- nseg = pending_req->nr_pages;
+ nseg = pending_req->nr_segs;
indirect_grefs = INDIRECT_PAGES(nseg);
BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@@ -943,6 +950,8 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
goto unmap;
for (n = 0, i = 0; n < nseg; n++) {
+ uint8_t first_sect, last_sect;
+
if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
/* Map indirect segments */
if (segments)
@@ -950,15 +959,18 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
}
i = n % SEGS_PER_INDIRECT_FRAME;
+
pending_req->segments[n]->gref = segments[i].gref;
- seg[n].nsec = segments[i].last_sect -
- segments[i].first_sect + 1;
- seg[n].offset = (segments[i].first_sect << 9);
- if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
- (segments[i].last_sect < segments[i].first_sect)) {
+
+ first_sect = READ_ONCE(segments[i].first_sect);
+ last_sect = READ_ONCE(segments[i].last_sect);
+ if (last_sect >= (XEN_PAGE_SIZE >> 9) || last_sect < first_sect) {
rc = -EINVAL;
goto unmap;
}
+
+ seg[n].nsec = last_sect - first_sect + 1;
+ seg[n].offset = first_sect << 9;
preq->nr_sects += seg[n].nsec;
}
@@ -1071,9 +1083,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
/*
* bio callback.
*/
-static void end_block_io_op(struct bio *bio, int error)
+static void end_block_io_op(struct bio *bio)
{
- __end_block_io_op(bio->bi_private, error);
+ __end_block_io_op(bio->bi_private, bio->bi_error);
bio_put(bio);
}
@@ -1203,6 +1215,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
req_operation = req->operation == BLKIF_OP_INDIRECT ?
req->u.indirect.indirect_op : req->operation;
+
if ((req->operation == BLKIF_OP_INDIRECT) &&
(req_operation != BLKIF_OP_READ) &&
(req_operation != BLKIF_OP_WRITE)) {
@@ -1251,7 +1264,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
pending_req->id = req->u.rw.id;
pending_req->operation = req_operation;
pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nseg;
+ pending_req->nr_segs = nseg;
if (req->operation != BLKIF_OP_INDIRECT) {
preq.dev = req->u.rw.handle;
@@ -1261,7 +1274,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
seg[i].nsec = req->u.rw.seg[i].last_sect -
req->u.rw.seg[i].first_sect + 1;
seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
- if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+ if ((req->u.rw.seg[i].last_sect >= (XEN_PAGE_SIZE >> 9)) ||
(req->u.rw.seg[i].last_sect <
req->u.rw.seg[i].first_sect))
goto fail_response;
@@ -1372,7 +1385,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
fail_flush:
xen_blkbk_unmap(blkif, pending_req->segments,
- pending_req->nr_pages);
+ pending_req->nr_segs);
fail_response:
/* Haven't submitted any bio's yet. */
make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
@@ -1438,6 +1451,12 @@ static int __init xen_blkif_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
+ }
+
rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
diff --git a/kernel/drivers/block/xen-blkback/common.h b/kernel/drivers/block/xen-blkback/common.h
index f620b5d3f..c929ae227 100644
--- a/kernel/drivers/block/xen-blkback/common.h
+++ b/kernel/drivers/block/xen-blkback/common.h
@@ -39,23 +39,33 @@
#include <asm/pgalloc.h>
#include <asm/hypervisor.h>
#include <xen/grant_table.h>
+#include <xen/page.h>
#include <xen/xenbus.h>
#include <xen/interface/io/ring.h>
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
+extern unsigned int xen_blkif_max_ring_order;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
*/
#define MAX_INDIRECT_SEGMENTS 256
-#define SEGS_PER_INDIRECT_FRAME \
- (PAGE_SIZE/sizeof(struct blkif_request_segment))
+/*
+ * Xen use 4K pages. The guest may use different page size (4K or 64K)
+ * Number of Xen pages per segment
+ */
+#define XEN_PAGES_PER_SEGMENT (PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+ (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
+#define SEGS_PER_INDIRECT_FRAME \
+ (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
+
#define MAX_INDIRECT_PAGES \
((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
-#define INDIRECT_PAGES(_segs) \
- ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
/* Not a real protocol. Used to generate ring structs which contain
* the elements common to all protocols only. This way we get a
@@ -248,7 +258,7 @@ struct backend_info;
#define PERSISTENT_GNT_WAS_ACTIVE 1
/* Number of requests that we can fit in a ring */
-#define XEN_BLKIF_REQS 32
+#define XEN_BLKIF_REQS_PER_PAGE 32
struct persistent_gnt {
struct page *page;
@@ -320,6 +330,7 @@ struct xen_blkif {
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
+ unsigned int nr_ring_pages;
};
struct seg_buf {
@@ -343,7 +354,7 @@ struct grant_page {
struct pending_req {
struct xen_blkif *blkif;
u64 id;
- int nr_pages;
+ int nr_segs;
atomic_t pendcnt;
unsigned short operation;
int status;
@@ -397,8 +408,8 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
struct blkif_x86_32_request *src)
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
- dst->operation = src->operation;
- switch (src->operation) {
+ dst->operation = READ_ONCE(src->operation);
+ switch (dst->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
@@ -445,8 +456,8 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
struct blkif_x86_64_request *src)
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
- dst->operation = src->operation;
- switch (src->operation) {
+ dst->operation = READ_ONCE(src->operation);
+ switch (dst->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
diff --git a/kernel/drivers/block/xen-blkback/xenbus.c b/kernel/drivers/block/xen-blkback/xenbus.c
index 6ab69ad61..f53cff42f 100644
--- a/kernel/drivers/block/xen-blkback/xenbus.c
+++ b/kernel/drivers/block/xen-blkback/xenbus.c
@@ -25,6 +25,7 @@
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
+#define RINGREF_NAME_LEN (20)
struct backend_info {
struct xenbus_device *dev;
@@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
- struct pending_req *req, *n;
- int i, j;
BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-
- for (i = 0; i < XEN_BLKIF_REQS; i++) {
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- goto fail;
- list_add_tail(&req->free_list,
- &blkif->pending_free);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- req->segments[j] = kzalloc(sizeof(*req->segments[0]),
- GFP_KERNEL);
- if (!req->segments[j])
- goto fail;
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
- GFP_KERNEL);
- if (!req->indirect_pages[j])
- goto fail;
- }
- }
spin_lock_init(&blkif->pending_free_lock);
init_waitqueue_head(&blkif->pending_free_wq);
init_waitqueue_head(&blkif->shutdown_wq);
return blkif;
-
-fail:
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- if (!req->segments[j])
- break;
- kfree(req->segments[j]);
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- if (!req->indirect_pages[j])
- break;
- kfree(req->indirect_pages[j]);
- }
- kfree(req);
- }
-
- kmem_cache_free(xen_blkif_cachep, blkif);
-
- return ERR_PTR(-ENOMEM);
}
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
- unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+ unsigned int nr_grefs, unsigned int evtchn)
{
int err;
@@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
if (blkif->irq)
return 0;
- err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+ err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
&blkif->blk_ring);
if (err < 0)
return err;
@@ -217,21 +176,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
{
struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.native, sring,
+ XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+ XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+ XEN_PAGE_SIZE * nr_grefs);
break;
}
default:
@@ -253,6 +215,9 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
+ struct pending_req *req, *n;
+ int i = 0, j;
+
if (blkif->xenblkd) {
kthread_stop(blkif->xenblkd);
wake_up(&blkif->shutdown_wq);
@@ -279,13 +244,28 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
/* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(blkif);
+ /* Check that there is no request in use */
+ list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+ list_del(&req->free_list);
+
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+ kfree(req->segments[j]);
+
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+ kfree(req->indirect_pages[j]);
+
+ kfree(req);
+ i++;
+ }
+
+ WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+ blkif->nr_ring_pages = 0;
+
return 0;
}
static void xen_blkif_free(struct xen_blkif *blkif)
{
- struct pending_req *req, *n;
- int i = 0, j;
xen_blkif_disconnect(blkif);
xen_vbd_free(&blkif->vbd);
@@ -298,22 +278,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
BUG_ON(!list_empty(&blkif->free_pages));
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
- /* Check that there is no request in use */
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
-
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
- kfree(req->segments[j]);
-
- for (j = 0; j < MAX_INDIRECT_PAGES; j++)
- kfree(req->indirect_pages[j]);
-
- kfree(req);
- i++;
- }
-
- WARN_ON(i != XEN_BLKIF_REQS);
-
kmem_cache_free(xen_blkif_cachep, blkif);
}
@@ -597,6 +561,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
if (err)
goto fail;
+ err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
+ xen_blkif_max_ring_order);
+ if (err)
+ pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
@@ -860,22 +829,66 @@ again:
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
- unsigned long ring_ref;
- unsigned int evtchn;
+ unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
+ unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
- int err;
+ struct pending_req *req, *n;
+ int err, i, j;
pr_debug("%s %s\n", __func__, dev->otherend);
- err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
- &ring_ref, "event-channel", "%u", &evtchn, NULL);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "reading %s/ring-ref and event-channel",
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+ &evtchn);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/event-channel",
dev->otherend);
return err;
}
+ pr_info("event-channel %u\n", evtchn);
+
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+ &ring_page_order);
+ if (err != 1) {
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+ "%u", &ring_ref[0]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+ dev->otherend);
+ return err;
+ }
+ nr_grefs = 1;
+ pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
+ ring_ref[0]);
+ } else {
+ unsigned int i;
+
+ if (ring_page_order > xen_blkif_max_ring_order) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
+ dev->otherend, ring_page_order,
+ xen_blkif_max_ring_order);
+ return err;
+ }
+
+ nr_grefs = 1 << ring_page_order;
+ for (i = 0; i < nr_grefs; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+ "%u", &ring_ref[i]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/%s",
+ dev->otherend, ring_ref_name);
+ return err;
+ }
+ pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
+ }
+ }
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -900,20 +913,55 @@ static int connect_ring(struct backend_info *be)
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
+ be->blkif->nr_ring_pages = nr_grefs;
- pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
- ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+ pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
+ nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
+ for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ goto fail;
+ list_add_tail(&req->free_list, &be->blkif->pending_free);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
+ if (!req->segments[j])
+ goto fail;
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+ GFP_KERNEL);
+ if (!req->indirect_pages[j])
+ goto fail;
+ }
+ }
+
/* Map the shared frame, irq etc. */
- err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+ err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
if (err) {
- xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
- ring_ref, evtchn);
+ xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
}
return 0;
+
+fail:
+ list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+ list_del(&req->free_list);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ if (!req->segments[j])
+ break;
+ kfree(req->segments[j]);
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ if (!req->indirect_pages[j])
+ break;
+ kfree(req->indirect_pages[j]);
+ }
+ kfree(req);
+ }
+ return -ENOMEM;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {