summaryrefslogtreecommitdiffstats
path: root/kernel/fs/ceph/snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fs/ceph/snap.c')
-rw-r--r--kernel/fs/ceph/snap.c174
1 files changed, 100 insertions, 74 deletions
diff --git a/kernel/fs/ceph/snap.c b/kernel/fs/ceph/snap.c
index a97e39f09..4aa7122a8 100644
--- a/kernel/fs/ceph/snap.c
+++ b/kernel/fs/ceph/snap.c
@@ -296,7 +296,7 @@ static int cmpu64_rev(const void *a, const void *b)
}
-static struct ceph_snap_context *empty_snapc;
+struct ceph_snap_context *ceph_empty_snapc;
/*
* build the snap context for a given realm.
@@ -338,12 +338,6 @@ static int build_snap_context(struct ceph_snap_realm *realm)
return 0;
}
- if (num == 0 && realm->seq == empty_snapc->seq) {
- ceph_get_snap_context(empty_snapc);
- snapc = empty_snapc;
- goto done;
- }
-
/* alloc new snap context */
err = -ENOMEM;
if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -381,7 +375,6 @@ static int build_snap_context(struct ceph_snap_realm *realm)
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
-done:
ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
return 0;
@@ -436,6 +429,14 @@ static int dup_array(u64 **dst, __le64 *src, u32 num)
return 0;
}
+static bool has_new_snaps(struct ceph_snap_context *o,
+ struct ceph_snap_context *n)
+{
+ if (n->num_snaps == 0)
+ return false;
+ /* snaps are in descending order */
+ return n->snaps[0] > o->seq;
+}
/*
* When a snapshot is applied, the size/mtime inode metadata is queued
@@ -455,6 +456,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_cap_snap *capsnap;
+ struct ceph_snap_context *old_snapc, *new_snapc;
int used, dirty;
capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
@@ -467,6 +469,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
used = __ceph_caps_used(ci);
dirty = __ceph_caps_dirty(ci);
+ old_snapc = ci->i_head_snapc;
+ new_snapc = ci->i_snap_realm->cached_context;
+
/*
* If there is a write in progress, treat that as a dirty Fw,
* even though it hasn't completed yet; by the time we finish
@@ -481,76 +486,95 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
writes in progress now were started before the previous
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
- kfree(capsnap);
- } else if (ci->i_snap_realm->cached_context == empty_snapc) {
- dout("queue_cap_snap %p empty snapc\n", inode);
- kfree(capsnap);
- } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
- CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
- struct ceph_snap_context *snapc = ci->i_head_snapc;
-
- /*
- * if we are a sync write, we may need to go to the snaprealm
- * to get the current snapc.
- */
- if (!snapc)
- snapc = ci->i_snap_realm->cached_context;
+ goto update_snapc;
+ }
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) {
+ dout("queue_cap_snap %p nothing dirty|writing\n", inode);
+ goto update_snapc;
+ }
- dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
- inode, capsnap, snapc, ceph_cap_string(dirty));
- ihold(inode);
+ BUG_ON(!old_snapc);
- atomic_set(&capsnap->nref, 1);
- capsnap->ci = ci;
- INIT_LIST_HEAD(&capsnap->ci_item);
- INIT_LIST_HEAD(&capsnap->flushing_item);
-
- capsnap->follows = snapc->seq;
- capsnap->issued = __ceph_caps_issued(ci, NULL);
- capsnap->dirty = dirty;
-
- capsnap->mode = inode->i_mode;
- capsnap->uid = inode->i_uid;
- capsnap->gid = inode->i_gid;
-
- if (dirty & CEPH_CAP_XATTR_EXCL) {
- __ceph_build_xattrs_blob(ci);
- capsnap->xattr_blob =
- ceph_buffer_get(ci->i_xattrs.blob);
- capsnap->xattr_version = ci->i_xattrs.version;
- } else {
- capsnap->xattr_blob = NULL;
- capsnap->xattr_version = 0;
+ /*
+ * There is no need to send FLUSHSNAP message to MDS if there is
+ * no new snapshot. But when there is dirty pages or on-going
+ * writes, we still need to create cap_snap. cap_snap is needed
+ * by the write path and page writeback path.
+ *
+ * also see ceph_try_drop_cap_snap()
+ */
+ if (has_new_snaps(old_snapc, new_snapc)) {
+ if (dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))
+ capsnap->need_flush = true;
+ } else {
+ if (!(used & CEPH_CAP_FILE_WR) &&
+ ci->i_wrbuffer_ref_head == 0) {
+ dout("queue_cap_snap %p "
+ "no new_snap|dirty_page|writing\n", inode);
+ goto update_snapc;
}
+ }
- capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
-
- /* dirty page count moved from _head to this cap_snap;
- all subsequent writes page dirties occur _after_ this
- snapshot. */
- capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
- ci->i_wrbuffer_ref_head = 0;
- capsnap->context = snapc;
- ci->i_head_snapc =
- ceph_get_snap_context(ci->i_snap_realm->cached_context);
- dout(" new snapc is %p\n", ci->i_head_snapc);
- list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
-
- if (used & CEPH_CAP_FILE_WR) {
- dout("queue_cap_snap %p cap_snap %p snapc %p"
- " seq %llu used WR, now pending\n", inode,
- capsnap, snapc, snapc->seq);
- capsnap->writing = 1;
- } else {
- /* note mtime, size NOW. */
- __ceph_finish_cap_snap(ci, capsnap);
- }
+ dout("queue_cap_snap %p cap_snap %p queuing under %p %s %s\n",
+ inode, capsnap, old_snapc, ceph_cap_string(dirty),
+ capsnap->need_flush ? "" : "no_flush");
+ ihold(inode);
+
+ atomic_set(&capsnap->nref, 1);
+ capsnap->ci = ci;
+ INIT_LIST_HEAD(&capsnap->ci_item);
+ INIT_LIST_HEAD(&capsnap->flushing_item);
+
+ capsnap->follows = old_snapc->seq;
+ capsnap->issued = __ceph_caps_issued(ci, NULL);
+ capsnap->dirty = dirty;
+
+ capsnap->mode = inode->i_mode;
+ capsnap->uid = inode->i_uid;
+ capsnap->gid = inode->i_gid;
+
+ if (dirty & CEPH_CAP_XATTR_EXCL) {
+ __ceph_build_xattrs_blob(ci);
+ capsnap->xattr_blob =
+ ceph_buffer_get(ci->i_xattrs.blob);
+ capsnap->xattr_version = ci->i_xattrs.version;
} else {
- dout("queue_cap_snap %p nothing dirty|writing\n", inode);
- kfree(capsnap);
+ capsnap->xattr_blob = NULL;
+ capsnap->xattr_version = 0;
}
+ capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
+ /* dirty page count moved from _head to this cap_snap;
+ all subsequent writes page dirties occur _after_ this
+ snapshot. */
+ capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
+ ci->i_wrbuffer_ref_head = 0;
+ capsnap->context = old_snapc;
+ list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
+ old_snapc = NULL;
+
+ if (used & CEPH_CAP_FILE_WR) {
+ dout("queue_cap_snap %p cap_snap %p snapc %p"
+ " seq %llu used WR, now pending\n", inode,
+ capsnap, old_snapc, old_snapc->seq);
+ capsnap->writing = 1;
+ } else {
+ /* note mtime, size NOW. */
+ __ceph_finish_cap_snap(ci, capsnap);
+ }
+ capsnap = NULL;
+
+update_snapc:
+ if (ci->i_head_snapc) {
+ ci->i_head_snapc = ceph_get_snap_context(new_snapc);
+ dout(" new snapc is %p\n", new_snapc);
+ }
spin_unlock(&ci->i_ceph_lock);
+
+ kfree(capsnap);
+ ceph_put_snap_context(old_snapc);
}
/*
@@ -699,6 +723,8 @@ more:
/* queue realm for cap_snap creation */
list_add(&realm->dirty_item, &dirty_realms);
+ if (realm->seq > mdsc->last_snap_seq)
+ mdsc->last_snap_seq = realm->seq;
invalidate = 1;
} else if (!realm->cached_context) {
@@ -964,14 +990,14 @@ out:
int __init ceph_snap_init(void)
{
- empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
- if (!empty_snapc)
+ ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+ if (!ceph_empty_snapc)
return -ENOMEM;
- empty_snapc->seq = 1;
+ ceph_empty_snapc->seq = 1;
return 0;
}
void ceph_snap_exit(void)
{
- ceph_put_snap_context(empty_snapc);
+ ceph_put_snap_context(ceph_empty_snapc);
}