Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph/super.c: quiet sparse noise ceph/mds_client.c: quiet sparse noise ceph: use new D_COMPLETE dentry flag ceph: clear parent D_COMPLETE flag when on dentry prune

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph/super.c: quiet sparse noise ceph/mds_client.c: quiet sparse noise ceph: use new D_COMPLETE dentry flag ceph: clear parent D_COMPLETE flag when on dentry prune
5d5a8d2d · Linus Torvalds · d4a2e61f · 0c6d4b4e · 5d5a8d2d · 5d5a8d2d
6 changed file
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -487,17 +487,15 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 		ci->i_rdcache_gen++;
 	/*
-	 * if we are newly issued FILE_SHARED, clear I_COMPLETE; we
+	 * if we are newly issued FILE_SHARED, clear D_COMPLETE; we
 	 * don't know what happened to this directory while we didn't
 	 * have the cap.
 	 */
 	if ((issued & CEPH_CAP_FILE_SHARED) &&
 	    (had & CEPH_CAP_FILE_SHARED) == 0) {
 		ci->i_shared_gen++;
-		if (S_ISDIR(ci->vfs_inode.i_mode)) {
+		if (S_ISDIR(ci->vfs_inode.i_mode))
-			dout(" marking %p NOT complete\n", &ci->vfs_inode);
+			ceph_dir_clear_complete(&ci->vfs_inode);
-			ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
-		}
 	}
 }

--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -108,7 +108,7 @@ static unsigned fpos_off(loff_t p)
 * falling back to a "normal" sync readdir if any dentries in the dir
 * are dropped.
 *
- * I_COMPLETE tells indicates we have all dentries in the dir.  It is
+ * D_COMPLETE tells indicates we have all dentries in the dir.  It is
 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
 * the MDS if/when the directory is modified).
 */
@@ -199,8 +199,8 @@ static int __dcache_readdir(struct file *filp,
 	filp->f_pos++;
 	/* make sure a dentry wasn't dropped while we didn't have parent lock */
-	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
+	if (!ceph_dir_test_complete(dir)) {
-		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
+		dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
 		err = -EAGAIN;
 		goto out;
 	}
@@ -285,7 +285,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if ((filp->f_pos == 2 || fi->dentry) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
-	    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
+	    ceph_dir_test_complete(inode) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
 		spin_unlock(&inode->i_lock);
 		err = __dcache_readdir(filp, dirent, filldir);
@@ -351,7 +351,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		if (!req->r_did_prepopulate) {
 			dout("readdir !did_prepopulate");
-			fi->dir_release_count--;    /* preclude I_COMPLETE */
+			fi->dir_release_count--;    /* preclude D_COMPLETE */
 		}
 		/* note next offset and last dentry name */
@@ -430,8 +430,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	 */
 	spin_lock(&inode->i_lock);
 	if (ci->i_release_count == fi->dir_release_count) {
-		dout(" marking %p complete\n", inode);
+		ceph_dir_set_complete(inode);
-		/* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
 		ci->i_max_offset = filp->f_pos;
 	}
 	spin_unlock(&inode->i_lock);
@@ -614,7 +613,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 			    fsc->mount_options->snapdir_name,
 			    dentry->d_name.len) &&
 		    !is_root_ceph_dentry(dir, dentry) &&
-		    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
+		    ceph_dir_test_complete(dir) &&
 		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
 			spin_unlock(&dir->i_lock);
 			dout(" dir %p complete, -ENOENT\n", dir);
@@ -934,7 +933,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 */
 		/* d_move screws up d_subdirs order */
-		ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+		ceph_dir_clear_complete(new_dir);
 		d_move(old_dentry, new_dentry);
@@ -1092,7 +1091,75 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
 	return 1;
 }
+/*
+ * Set/clear/test dir complete flag on the dir's dentry.
+ */
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+	struct dentry *alias;
+	if (list_empty(&inode->i_dentry))
+		return NULL;
+	alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+	return alias;
+}
+void ceph_dir_set_complete(struct inode *inode)
+{
+	struct dentry *dentry = __d_find_any_alias(inode);
+	if (dentry && ceph_dentry(dentry)) {
+		dout(" marking %p (%p) complete\n", inode, dentry);
+		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+	}
+}
+void ceph_dir_clear_complete(struct inode *inode)
+{
+	struct dentry *dentry = __d_find_any_alias(inode);
+	if (dentry && ceph_dentry(dentry)) {
+		dout(" marking %p (%p) NOT complete\n", inode, dentry);
+		clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+	}
+}
+bool ceph_dir_test_complete(struct inode *inode)
+{
+	struct dentry *dentry = __d_find_any_alias(inode);
+	if (dentry && ceph_dentry(dentry))
+		return test_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+	return false;
+}
+/*
+ * When the VFS prunes a dentry from the cache, we need to clear the
+ * complete flag on the parent directory.
+ *
+ * Called under dentry->d_lock.
+ */
+static void ceph_d_prune(struct dentry *dentry)
+{
+	struct ceph_dentry_info *di;
+	dout("d_release %p\n", dentry);
+	/* do we have a valid parent? */
+	if (!dentry->d_parent || IS_ROOT(dentry))
+		return;
+	/* if we are not hashed, we don't affect D_COMPLETE */
+	if (d_unhashed(dentry))
+		return;
+	/*
+	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
+	 * cleared until d_release
+	 */
+	di = ceph_dentry(dentry->d_parent);
+	clear_bit(CEPH_D_COMPLETE, &di->flags);
+}
 /*
 * read() on a dir.  This weird interface hack only works if mounted
@@ -1306,6 +1373,7 @@ const struct inode_operations ceph_dir_iops = {
 const struct dentry_operations ceph_dentry_ops = {
 	.d_revalidate = ceph_d_revalidate,
 	.d_release = ceph_d_release,
+	.d_prune = ceph_d_prune,
 };
 const struct dentry_operations ceph_snapdir_dentry_ops = {
@@ -1315,4 +1383,5 @@ const struct dentry_operations ceph_snapdir_dentry_ops = {
 const struct dentry_operations ceph_snap_dentry_ops = {
 	.d_release = ceph_d_release,
+	.d_prune = ceph_d_prune,
 };
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -771,9 +771,9 @@ static int fill_inode(struct inode *inode,
 	    ceph_snap(inode) == CEPH_NOSNAP &&
 	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
 	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
-	    (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
+	    !ceph_dir_test_complete(inode)) {
 		dout(" marking %p complete (empty)\n", inode);
-		/* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
+		ceph_dir_set_complete(inode);
 		ci->i_max_offset = 2;
 	}
@@ -856,7 +856,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 	di = ceph_dentry(dn);
 	spin_lock(&inode->i_lock);
-	if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
+	if (!ceph_dir_test_complete(inode)) {
 		spin_unlock(&inode->i_lock);
 		return;
 	}
@@ -1056,7 +1056,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			 * d_move() puts the renamed dentry at the end of
 			 * d_subdirs.  We need to assign it an appropriate
 			 * directory offset so we can behave when holding
-			 * I_COMPLETE.
+			 * D_COMPLETE.
 			 */
 			ceph_set_dentry_offset(req->r_old_dentry);
 			dout("dn %p gets new offset %lld\n", req->r_old_dentry, 

--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -619,7 +619,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 *
 * Called under mdsc->mutex.
 */
-struct dentry *get_nonsnap_parent(struct dentry *dentry)
+static struct dentry *get_nonsnap_parent(struct dentry *dentry)
 {
 	/*
 	 * we don't need to worry about protecting the d_parent access
@@ -2002,7 +2002,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 }
 /*
- * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS
+ * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
 * namespace request.
 */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
@@ -2010,9 +2010,9 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 	struct inode *inode = req->r_locked_dir;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode);
+	dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
 	spin_lock(&inode->i_lock);
-	ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
+	ceph_dir_clear_complete(inode);
 	ci->i_release_count++;
 	spin_unlock(&inode->i_lock);
@@ -3154,7 +3154,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 /*
 * true if all sessions are closed, or we force unmount
 */
-bool done_closing_sessions(struct ceph_mds_client *mdsc)
+static bool done_closing_sessions(struct ceph_mds_client *mdsc)
 {
 	int i, n = 0;

--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -426,7 +426,7 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 /*
 * create a new fs client
 */
-struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
+static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 					struct ceph_options *opt)
 {
 	struct ceph_fs_client *fsc;
@@ -502,7 +502,7 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	return ERR_PTR(err);
 }
-void destroy_fs_client(struct ceph_fs_client *fsc)
+static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
 	dout("destroy_fs_client %p\n", fsc);

--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -203,6 +203,7 @@ struct ceph_inode_xattr {
 * Ceph dentry state
 */
 struct ceph_dentry_info {
+	unsigned long flags;
 	struct ceph_mds_session *lease_session;
 	u32 lease_gen, lease_shared_gen;
 	u32 lease_seq;
@@ -213,6 +214,18 @@ struct ceph_dentry_info {
 	u64 offset;
 };
+/*
+ * dentry flags
+ *
+ * The locking for D_COMPLETE is a bit odd:
+ *  - we can clear it at almost any time (see ceph_d_prune)
+ *  - it is only meaningful if:
+ *    - we hold dir inode i_lock
+ *    - we hold dir FILE_SHARED caps
+ *    - the dentry D_COMPLETE is set
+ */
+#define CEPH_D_COMPLETE 1  /* if set, d_u.d_subdirs is complete directory */
 struct ceph_inode_xattrs_info {
 	/*
 	 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -251,7 +264,7 @@ struct ceph_inode_info {
 	struct timespec i_rctime;
 	u64 i_rbytes, i_rfiles, i_rsubdirs;
 	u64 i_files, i_subdirs;
-	u64 i_max_offset;  /* largest readdir offset, set with I_COMPLETE */
+	u64 i_max_offset;  /* largest readdir offset, set with D_COMPLETE */
 	struct rb_root i_fragtree;
 	struct mutex i_fragtree_mutex;
@@ -416,7 +429,6 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
 * Ceph inode.
 */
-#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
 #define CEPH_I_NODELAY   4  /* do not delay cap release */
 #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
 #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
@@ -473,6 +485,13 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
 	return ((loff_t)frag << 32) | (loff_t)off;
 }
+/*
+ * set/clear directory D_COMPLETE flag
+ */
+void ceph_dir_set_complete(struct inode *inode);
+void ceph_dir_clear_complete(struct inode *inode);
+bool ceph_dir_test_complete(struct inode *inode);
 /*
 * caps helpers
 */