summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/block/rbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/block/rbd.c')
-rw-r--r--kernel/drivers/block/rbd.c350
1 files changed, 175 insertions, 175 deletions
diff --git a/kernel/drivers/block/rbd.c b/kernel/drivers/block/rbd.c
index 010ce0b1f..81ea69fee 100644
--- a/kernel/drivers/block/rbd.c
+++ b/kernel/drivers/block/rbd.c
@@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_MINORS_PER_MAJOR 256
#define RBD_SINGLE_MAJOR_PART_SHIFT 4
+#define RBD_MAX_PARENT_CHAIN_LEN 16
+
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
#define RBD_MAX_SNAP_NAME_LEN \
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
@@ -346,6 +348,7 @@ struct rbd_device {
struct rbd_image_header header;
unsigned long flags; /* possibly lock protected */
struct rbd_spec *spec;
+ struct rbd_options *opts;
char *header_name;
@@ -415,8 +418,6 @@ MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (d
static int rbd_img_request_submit(struct rbd_img_request *img_request);
-static void rbd_dev_device_release(struct device *dev);
-
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
@@ -425,7 +426,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
size_t count);
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
static void rbd_spec_put(struct rbd_spec *spec);
static int rbd_dev_id_to_minor(int dev_id)
@@ -725,34 +726,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
}
/*
- * mount options
+ * (Per device) rbd map options
*/
enum {
+ Opt_queue_depth,
Opt_last_int,
/* int args above */
Opt_last_string,
/* string args above */
Opt_read_only,
Opt_read_write,
- /* Boolean args above */
- Opt_last_bool,
+ Opt_err
};
static match_table_t rbd_opts_tokens = {
+ {Opt_queue_depth, "queue_depth=%d"},
/* int args above */
/* string args above */
{Opt_read_only, "read_only"},
{Opt_read_only, "ro"}, /* Alternate spelling */
{Opt_read_write, "read_write"},
{Opt_read_write, "rw"}, /* Alternate spelling */
- /* Boolean args above */
- {-1, NULL}
+ {Opt_err, NULL}
};
struct rbd_options {
+ int queue_depth;
bool read_only;
};
+#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
#define RBD_READ_ONLY_DEFAULT false
static int parse_rbd_opts_token(char *c, void *private)
@@ -762,27 +765,27 @@ static int parse_rbd_opts_token(char *c, void *private)
int token, intval, ret;
token = match_token(c, rbd_opts_tokens, argstr);
- if (token < 0)
- return -EINVAL;
-
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
- pr_err("bad mount option arg (not int) "
- "at '%s'\n", c);
+ pr_err("bad mount option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
- dout("got string token %d val %s\n", token,
- argstr[0].from);
- } else if (token > Opt_last_string && token < Opt_last_bool) {
- dout("got Boolean token %d\n", token);
+ dout("got string token %d val %s\n", token, argstr[0].from);
} else {
dout("got token %d\n", token);
}
switch (token) {
+ case Opt_queue_depth:
+ if (intval < 1) {
+ pr_err("queue_depth out of range\n");
+ return -EINVAL;
+ }
+ rbd_opts->queue_depth = intval;
+ break;
case Opt_read_only:
rbd_opts->read_only = true;
break;
@@ -790,9 +793,10 @@ static int parse_rbd_opts_token(char *c, void *private)
rbd_opts->read_only = false;
break;
default:
- rbd_assert(false);
- break;
+ /* libceph prints "bad option" msg */
+ return -EINVAL;
}
+
return 0;
}
@@ -1564,22 +1568,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
/*
* Wait for an object request to complete. If interrupted, cancel the
* underlying osd request.
+ *
+ * @timeout: in jiffies, 0 means "wait forever"
*/
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
+ unsigned long timeout)
{
- int ret;
+ long ret;
dout("%s %p\n", __func__, obj_request);
-
- ret = wait_for_completion_interruptible(&obj_request->completion);
- if (ret < 0) {
- dout("%s %p interrupted\n", __func__, obj_request);
+ ret = wait_for_completion_interruptible_timeout(
+ &obj_request->completion,
+ ceph_timeout_jiffies(timeout));
+ if (ret <= 0) {
+ if (ret == 0)
+ ret = -ETIMEDOUT;
rbd_obj_request_end(obj_request);
- return ret;
+ } else {
+ ret = 0;
}
- dout("%s %p done\n", __func__, obj_request);
- return 0;
+ dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
+ return ret;
+}
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+ return __rbd_obj_request_wait(obj_request, 0);
+}
+
+static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request,
+ unsigned long timeout)
+{
+ return __rbd_obj_request_wait(obj_request, timeout);
}
static void rbd_img_request_complete(struct rbd_img_request *img_request)
@@ -1842,9 +1863,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
rbd_osd_read_callback(obj_request);
break;
case CEPH_OSD_OP_SETALLOCHINT:
- rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE);
+ rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
+ osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
/* fall through */
case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_WRITEFULL:
rbd_osd_write_callback(obj_request);
break;
case CEPH_OSD_OP_STAT:
@@ -2380,7 +2403,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
opcode = CEPH_OSD_OP_ZERO;
}
} else if (op_type == OBJ_OP_WRITE) {
- opcode = CEPH_OSD_OP_WRITE;
+ if (!offset && length == object_size)
+ opcode = CEPH_OSD_OP_WRITEFULL;
+ else
+ opcode = CEPH_OSD_OP_WRITE;
osd_req_op_alloc_hint_init(osd_request, num_ops,
object_size, object_size);
num_ops++;
@@ -2389,7 +2415,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
}
if (opcode == CEPH_OSD_OP_DELETE)
- osd_req_op_init(osd_request, num_ops, opcode);
+ osd_req_op_init(osd_request, num_ops, opcode, 0);
else
osd_req_op_extent_init(osd_request, num_ops, opcode,
offset, length, 0, 0);
@@ -2860,7 +2886,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
goto out;
stat_request->callback = rbd_img_obj_exists_callback;
- osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+ osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
false, false);
rbd_osd_req_format_read(stat_request);
@@ -3134,6 +3160,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
bool watch)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_options *opts = osdc->client->options;
struct rbd_obj_request *obj_request;
int ret;
@@ -3160,7 +3187,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
if (ret)
goto out;
- ret = rbd_obj_request_wait(obj_request);
+ ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout);
if (ret)
goto out;
@@ -3415,6 +3442,7 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
img_request->rq = rq;
+ snapc = NULL; /* img_request consumes a ref */
if (op_type == OBJ_OP_DISCARD)
result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
@@ -3452,52 +3480,6 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK;
}
-/*
- * a queue callback. Makes sure that we don't create a bio that spans across
- * multiple osd objects. One exception would be with a single page bios,
- * which we handle later at bio_chain_clone_range()
- */
-static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
- struct bio_vec *bvec)
-{
- struct rbd_device *rbd_dev = q->queuedata;
- sector_t sector_offset;
- sector_t sectors_per_obj;
- sector_t obj_sector_offset;
- int ret;
-
- /*
- * Find how far into its rbd object the partition-relative
- * bio start sector is to offset relative to the enclosing
- * device.
- */
- sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
- sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
- obj_sector_offset = sector_offset & (sectors_per_obj - 1);
-
- /*
- * Compute the number of bytes from that offset to the end
- * of the object. Account for what's already used by the bio.
- */
- ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
- if (ret > bmd->bi_size)
- ret -= bmd->bi_size;
- else
- ret = 0;
-
- /*
- * Don't send back more than was asked for. And if the bio
- * was empty, let the whole thing through because: "Note
- * that a block device *must* allow a single page to be
- * added to an empty bio."
- */
- rbd_assert(bvec->bv_len <= PAGE_SIZE);
- if (ret > (int) bvec->bv_len || !bmd->bi_size)
- ret = (int) bvec->bv_len;
-
- return ret;
-}
-
static void rbd_free_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk = rbd_dev->disk;
@@ -3762,10 +3744,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
rbd_dev->tag_set.ops = &rbd_mq_ops;
- rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ;
+ rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
- rbd_dev->tag_set.flags =
- BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
rbd_dev->tag_set.nr_hw_queues = 1;
rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
@@ -3785,6 +3766,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ q->limits.max_sectors = queue_max_hw_sectors(q);
+ blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
@@ -3793,10 +3776,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = segment_size;
q->limits.discard_alignment = segment_size;
- q->limits.max_discard_sectors = segment_size / SECTOR_SIZE;
+ blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
q->limits.discard_zeroes_data = 1;
- blk_queue_merge_bvec(q, rbd_merge_bvec);
+ if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
+ q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
+
disk->queue = q;
q->queuedata = rbd_dev;
@@ -4005,14 +3990,12 @@ static const struct attribute_group *rbd_attr_groups[] = {
NULL
};
-static void rbd_sysfs_dev_release(struct device *dev)
-{
-}
+static void rbd_dev_release(struct device *dev);
static struct device_type rbd_device_type = {
.name = "rbd",
.groups = rbd_attr_groups,
- .release = rbd_sysfs_dev_release,
+ .release = rbd_dev_release,
};
static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
@@ -4055,8 +4038,28 @@ static void rbd_spec_free(struct kref *kref)
kfree(spec);
}
+static void rbd_dev_release(struct device *dev)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ bool need_put = !!rbd_dev->opts;
+
+ rbd_put_client(rbd_dev->rbd_client);
+ rbd_spec_put(rbd_dev->spec);
+ kfree(rbd_dev->opts);
+ kfree(rbd_dev);
+
+ /*
+ * This is racy, but way better than putting module outside of
+ * the release callback. The race window is pretty small, so
+ * doing something similar to dm (dm-builtin.c) is overkill.
+ */
+ if (need_put)
+ module_put(THIS_MODULE);
+}
+
static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
- struct rbd_spec *spec)
+ struct rbd_spec *spec,
+ struct rbd_options *opts)
{
struct rbd_device *rbd_dev;
@@ -4070,8 +4073,14 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
INIT_LIST_HEAD(&rbd_dev->node);
init_rwsem(&rbd_dev->header_rwsem);
- rbd_dev->spec = spec;
+ rbd_dev->dev.bus = &rbd_bus_type;
+ rbd_dev->dev.type = &rbd_device_type;
+ rbd_dev->dev.parent = &rbd_root_dev;
+ device_initialize(&rbd_dev->dev);
+
rbd_dev->rbd_client = rbdc;
+ rbd_dev->spec = spec;
+ rbd_dev->opts = opts;
/* Initialize the layout used for all rbd requests */
@@ -4080,14 +4089,21 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
+ /*
+ * If this is a mapping rbd_dev (as opposed to a parent one),
+ * pin our module. We have a ref from do_rbd_add(), so use
+ * __module_get().
+ */
+ if (rbd_dev->opts)
+ __module_get(THIS_MODULE);
+
return rbd_dev;
}
static void rbd_dev_destroy(struct rbd_device *rbd_dev)
{
- rbd_put_client(rbd_dev->rbd_client);
- rbd_spec_put(rbd_dev->spec);
- kfree(rbd_dev);
+ if (rbd_dev)
+ put_device(&rbd_dev->dev);
}
/*
@@ -4695,7 +4711,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
}
ret = rbd_dev_v2_snap_context(rbd_dev);
- dout("rbd_dev_v2_snap_context returned %d\n", ret);
+ if (ret && first_time) {
+ kfree(rbd_dev->header.object_prefix);
+ rbd_dev->header.object_prefix = NULL;
+ }
return ret;
}
@@ -4710,27 +4729,6 @@ static int rbd_dev_header_info(struct rbd_device *rbd_dev)
return rbd_dev_v2_header_info(rbd_dev);
}
-static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
-{
- struct device *dev;
- int ret;
-
- dev = &rbd_dev->dev;
- dev->bus = &rbd_bus_type;
- dev->type = &rbd_device_type;
- dev->parent = &rbd_root_dev;
- dev->release = rbd_dev_device_release;
- dev_set_name(dev, "%d", rbd_dev->dev_id);
- ret = device_register(dev);
-
- return ret;
-}
-
-static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
-{
- device_unregister(&rbd_dev->dev);
-}
-
/*
* Get a unique rbd identifier for the given new rbd_dev, and add
* the rbd_dev to the global list.
@@ -4945,6 +4943,7 @@ static int rbd_add_parse_args(const char *buf,
goto out_mem;
rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+ rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
copts = ceph_parse_options(options, mon_addrs,
mon_addrs + mon_addrs_size - 1,
@@ -4975,8 +4974,8 @@ out_err:
*/
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
{
+ struct ceph_options *opts = rbdc->client->options;
u64 newest_epoch;
- unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
int tries = 0;
int ret;
@@ -4991,7 +4990,8 @@ again:
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
ceph_monc_request_next_osdmap(&rbdc->client->monc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
- newest_epoch, timeout);
+ newest_epoch,
+ opts->mount_timeout);
goto again;
} else {
/* the osdmap we have is new enough */
@@ -5142,45 +5142,51 @@ out_err:
return ret;
}
-static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+/*
+ * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
+ * rbd_dev_image_probe() recursion depth, which means it's also the
+ * length of the already discovered part of the parent chain.
+ */
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
{
struct rbd_device *parent = NULL;
- struct rbd_spec *parent_spec;
- struct rbd_client *rbdc;
int ret;
if (!rbd_dev->parent_spec)
return 0;
- /*
- * We need to pass a reference to the client and the parent
- * spec when creating the parent rbd_dev. Images related by
- * parent/child relationships always share both.
- */
- parent_spec = rbd_spec_get(rbd_dev->parent_spec);
- rbdc = __rbd_get_client(rbd_dev->rbd_client);
- ret = -ENOMEM;
- parent = rbd_dev_create(rbdc, parent_spec);
- if (!parent)
+ if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
+ pr_info("parent chain is too long (%d)\n", depth);
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec,
+ NULL);
+ if (!parent) {
+ ret = -ENOMEM;
goto out_err;
+ }
+
+ /*
+ * Images related by parent/child relationships always share
+ * rbd_client and spec/parent_spec, so bump their refcounts.
+ */
+ __rbd_get_client(rbd_dev->rbd_client);
+ rbd_spec_get(rbd_dev->parent_spec);
- ret = rbd_dev_image_probe(parent, false);
+ ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;
+
rbd_dev->parent = parent;
atomic_set(&rbd_dev->parent_ref, 1);
-
return 0;
+
out_err:
- if (parent) {
- rbd_dev_unparent(rbd_dev);
- kfree(rbd_dev->header_name);
+ rbd_dev_unparent(rbd_dev);
+ if (parent)
rbd_dev_destroy(parent);
- } else {
- rbd_put_client(rbdc);
- rbd_spec_put(parent_spec);
- }
-
return ret;
}
@@ -5225,7 +5231,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
- ret = rbd_bus_add_dev(rbd_dev);
+ dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
+ ret = device_add(&rbd_dev->dev);
if (ret)
goto err_out_mapping;
@@ -5248,8 +5255,6 @@ err_out_blkdev:
unregister_blkdev(rbd_dev->major, rbd_dev->name);
err_out_id:
rbd_dev_id_put(rbd_dev);
- rbd_dev_mapping_clear(rbd_dev);
-
return ret;
}
@@ -5298,7 +5303,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
*/
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
int ret;
@@ -5316,7 +5321,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (ret)
goto err_out_format;
- if (mapping) {
+ if (!depth) {
ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret) {
if (ret == -ENOENT)
@@ -5337,7 +5342,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Otherwise this is a parent image, identified by pool, image
* and snap ids - need to fill in names for those ids.
*/
- if (mapping)
+ if (!depth)
ret = rbd_spec_fill_snap_id(rbd_dev);
else
ret = rbd_spec_fill_names(rbd_dev);
@@ -5359,12 +5364,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Need to warn users if this image is the one being
* mapped and has a parent.
*/
- if (mapping && rbd_dev->parent_spec)
+ if (!depth && rbd_dev->parent_spec)
rbd_warn(rbd_dev,
"WARNING: kernel layering is EXPERIMENTAL!");
}
- ret = rbd_dev_probe_parent(rbd_dev);
+ ret = rbd_dev_probe_parent(rbd_dev, depth);
if (ret)
goto err_out_probe;
@@ -5375,7 +5380,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (mapping)
+ if (!depth)
rbd_dev_header_unwatch_sync(rbd_dev);
out_header_name:
kfree(rbd_dev->header_name);
@@ -5397,7 +5402,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
struct rbd_spec *spec = NULL;
struct rbd_client *rbdc;
bool read_only;
- int rc = -ENOMEM;
+ int rc;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
@@ -5405,10 +5410,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
/* parse add command */
rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
if (rc < 0)
- goto err_out_module;
- read_only = rbd_opts->read_only;
- kfree(rbd_opts);
- rbd_opts = NULL; /* done with this */
+ goto out;
rbdc = rbd_get_client(ceph_opts);
if (IS_ERR(rbdc)) {
@@ -5434,18 +5436,22 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_client;
}
- rbd_dev = rbd_dev_create(rbdc, spec);
- if (!rbd_dev)
+ rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
+ if (!rbd_dev) {
+ rc = -ENOMEM;
goto err_out_client;
+ }
rbdc = NULL; /* rbd_dev now owns this */
spec = NULL; /* rbd_dev now owns this */
+ rbd_opts = NULL; /* rbd_dev now owns this */
- rc = rbd_dev_image_probe(rbd_dev, true);
+ rc = rbd_dev_image_probe(rbd_dev, 0);
if (rc < 0)
goto err_out_rbd_dev;
/* If we are mapping a snapshot it must be marked read-only */
+ read_only = rbd_dev->opts->read_only;
if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
read_only = true;
rbd_dev->mapping.read_only = read_only;
@@ -5459,10 +5465,13 @@ static ssize_t do_rbd_add(struct bus_type *bus,
*/
rbd_dev_header_unwatch_sync(rbd_dev);
rbd_dev_image_release(rbd_dev);
- goto err_out_module;
+ goto out;
}
- return count;
+ rc = count;
+out:
+ module_put(THIS_MODULE);
+ return rc;
err_out_rbd_dev:
rbd_dev_destroy(rbd_dev);
@@ -5470,12 +5479,8 @@ err_out_client:
rbd_put_client(rbdc);
err_out_args:
rbd_spec_put(spec);
-err_out_module:
- module_put(THIS_MODULE);
-
- dout("Error adding device %s\n", buf);
-
- return (ssize_t)rc;
+ kfree(rbd_opts);
+ goto out;
}
static ssize_t rbd_add(struct bus_type *bus,
@@ -5495,17 +5500,15 @@ static ssize_t rbd_add_single_major(struct bus_type *bus,
return do_rbd_add(bus, buf, count);
}
-static void rbd_dev_device_release(struct device *dev)
+static void rbd_dev_device_release(struct rbd_device *rbd_dev)
{
- struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
-
rbd_free_disk(rbd_dev);
clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+ device_del(&rbd_dev->dev);
rbd_dev_mapping_clear(rbd_dev);
if (!single_major)
unregister_blkdev(rbd_dev->major, rbd_dev->name);
rbd_dev_id_put(rbd_dev);
- rbd_dev_mapping_clear(rbd_dev);
}
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
@@ -5590,9 +5593,8 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
* rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
* in a potential use after free of rbd_dev->disk or rbd_dev.
*/
- rbd_bus_del_dev(rbd_dev);
+ rbd_dev_device_release(rbd_dev);
rbd_dev_image_release(rbd_dev);
- module_put(THIS_MODULE);
return count;
}
@@ -5663,10 +5665,8 @@ static int rbd_slab_init(void)
if (rbd_segment_name_cache)
return 0;
out_err:
- if (rbd_obj_request_cache) {
- kmem_cache_destroy(rbd_obj_request_cache);
- rbd_obj_request_cache = NULL;
- }
+ kmem_cache_destroy(rbd_obj_request_cache);
+ rbd_obj_request_cache = NULL;
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;