ceph: use I_COMPLETE inode flag instead of D_COMPLETE flag

commit c6ffe100 moved the flag that tracks if the dcache contents for a directory are complete to dentry. The problem is there are lots of places that use ceph_dir_{set,clear,test}_complete() while holding i_ceph_lock. but ceph_dir_{set,clear,test}_complete() may sleep because they call dput(). This patch basically reverts that commit. For ceph_d_prune(), it's called with both the dentry to prune and the parent dentry are locked. So it's safe to access the parent dentry's d_inode and clear I_COMPLETE flag. Signed-off-by: N Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: N Greg Farnum <greg@inktank.com> Reviewed-by: N Sage Weil <sage@inktank.com>

ceph: use I_COMPLETE inode flag instead of D_COMPLETE flag
commit c6ffe100 moved the flag that tracks if the dcache contents for a directory are complete to dentry. The problem is there are lots of places that use ceph_dir_{set,clear,test}_complete() while holding i_ceph_lock. but ceph_dir_{set,clear,test}_complete() may sleep because they call dput(). This patch basically reverts that commit. For ceph_d_prune(), it's called with both the dentry to prune and the parent dentry are locked. So it's safe to access the parent dentry's d_inode and clear I_COMPLETE flag. Signed-off-by: N Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: N Greg Farnum <greg@inktank.com> Reviewed-by: N Sage Weil <sage@inktank.com>
a8673d61 · Yan, Zheng · Sage Weil · 964266cc · a8673d61 · a8673d61
Showing with 34 addition and 95 deletion

fs/ceph/caps.c fs/ceph/caps.c +5 -3

fs/ceph/dir.c fs/ceph/dir.c +11 -51

fs/ceph/inode.c fs/ceph/inode.c +13 -17

fs/ceph/mds_client.c fs/ceph/mds_client.c +3 -3

fs/ceph/super.h fs/ceph/super.h +2 -21

未找到文件。
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -490,15 +490,17 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 		ci->i_rdcache_gen++;

 	/*
-	 * if we are newly issued FILE_SHARED, clear D_COMPLETE; we
+	 * if we are newly issued FILE_SHARED, clear I_COMPLETE; we
 	 * don't know what happened to this directory while we didn't
 	 * have the cap.
 	 */
 	if ((issued & CEPH_CAP_FILE_SHARED) &&
 	    (had & CEPH_CAP_FILE_SHARED) == 0) {
 		ci->i_shared_gen++;
-		if (S_ISDIR(ci->vfs_inode.i_mode))
-			ceph_dir_clear_complete(&ci->vfs_inode);
+		if (S_ISDIR(ci->vfs_inode.i_mode)) {
+			dout(" marking %p NOT complete\n", &ci->vfs_inode);
+			ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
+		}
 	}
 }


--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -107,7 +107,7 @@ static unsigned fpos_off(loff_t p)
 * falling back to a "normal" sync readdir if any dentries in the dir
 * are dropped.
 *
- * D_COMPLETE tells indicates we have all dentries in the dir.  It is
+ * I_COMPLETE tells indicates we have all dentries in the dir.  It is
 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
 * the MDS if/when the directory is modified).
 */
@@ -198,8 +198,8 @@ static int __dcache_readdir(struct file *filp,
 	filp->f_pos++;

 	/* make sure a dentry wasn't dropped while we didn't have parent lock */
-	if (!ceph_dir_test_complete(dir)) {
-		dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
+	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
+		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
 		err = -EAGAIN;
 		goto out;
 	}
@@ -284,7 +284,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if ((filp->f_pos == 2 || fi->dentry) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
-	    ceph_dir_test_complete(inode) &&
+	    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
 		spin_unlock(&ci->i_ceph_lock);
 		err = __dcache_readdir(filp, dirent, filldir);
@@ -350,7 +350,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)

 		if (!req->r_did_prepopulate) {
 			dout("readdir !did_prepopulate");
-			fi->dir_release_count--;    /* preclude D_COMPLETE */
+			fi->dir_release_count--;    /* preclude I_COMPLETE */
 		}

 		/* note next offset and last dentry name */
@@ -429,7 +429,8 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	 */
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_release_count == fi->dir_release_count) {
-		ceph_dir_set_complete(inode);
+		dout(" marking %p complete\n", inode);
+		ci->i_ceph_flags |= CEPH_I_COMPLETE;
 		ci->i_max_offset = filp->f_pos;
 	}
 	spin_unlock(&ci->i_ceph_lock);
@@ -604,7 +605,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 			    fsc->mount_options->snapdir_name,
 			    dentry->d_name.len) &&
 		    !is_root_ceph_dentry(dir, dentry) &&
-		    ceph_dir_test_complete(dir) &&
+		    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
 		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
 			spin_unlock(&ci->i_ceph_lock);
 			dout(" dir %p complete, -ENOENT\n", dir);
@@ -908,7 +909,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 */

 		/* d_move screws up d_subdirs order */
-		ceph_dir_clear_complete(new_dir);
+		ceph_i_clear(new_dir, CEPH_I_COMPLETE);

 		d_move(old_dentry, new_dentry);

@@ -1064,44 +1065,6 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
 	return 1;
 }

-/*
- * Set/clear/test dir complete flag on the dir's dentry.
- */
-void ceph_dir_set_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-	
-	if (dentry && ceph_dentry(dentry) &&
-	    ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
-		dout(" marking %p (%p) complete\n", inode, dentry);
-		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-}
-
-void ceph_dir_clear_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-
-	if (dentry && ceph_dentry(dentry)) {
-		dout(" marking %p (%p) complete\n", inode, dentry);
-		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-}
-
-bool ceph_dir_test_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-
-	if (dentry && ceph_dentry(dentry)) {
-		dout(" marking %p (%p) NOT complete\n", inode, dentry);
-		clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-	return false;
-}
-
 /*
 * When the VFS prunes a dentry from the cache, we need to clear the
 * complete flag on the parent directory.
@@ -1110,15 +1073,13 @@ bool ceph_dir_test_complete(struct inode *inode)
 */
 static void ceph_d_prune(struct dentry *dentry)
 {
-	struct ceph_dentry_info *di;
-
 	dout("ceph_d_prune %p\n", dentry);

 	/* do we have a valid parent? */
 	if (IS_ROOT(dentry))
 		return;

-	/* if we are not hashed, we don't affect D_COMPLETE */
+	/* if we are not hashed, we don't affect I_COMPLETE */
 	if (d_unhashed(dentry))
 		return;

@@ -1126,8 +1087,7 @@ static void ceph_d_prune(struct dentry *dentry)
 	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
 	 * cleared until d_release
 	 */
-	di = ceph_dentry(dentry->d_parent);
-	clear_bit(CEPH_D_COMPLETE, &di->flags);
+	ceph_i_clear(dentry->d_parent->d_inode, CEPH_I_COMPLETE);
 }

 /*

--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -561,7 +561,6 @@ static int fill_inode(struct inode *inode,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int i;
 	int issued = 0, implemented;
-	int updating_inode = 0;
 	struct timespec mtime, atime, ctime;
 	u32 nsplits;
 	struct ceph_buffer *xattr_blob = NULL;
@@ -601,7 +600,6 @@ static int fill_inode(struct inode *inode,
 	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
 		goto no_change;
 	
-	updating_inode = 1;
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);

@@ -717,6 +715,17 @@ static int fill_inode(struct inode *inode,
 		       ceph_vinop(inode), inode->i_mode);
 	}

+	/* set dir completion flag? */
+	if (S_ISDIR(inode->i_mode) &&
+	    ci->i_files == 0 && ci->i_subdirs == 0 &&
+	    ceph_snap(inode) == CEPH_NOSNAP &&
+	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+	    (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
+		dout(" marking %p complete (empty)\n", inode);
+		ci->i_ceph_flags |= CEPH_I_COMPLETE;
+		ci->i_max_offset = 2;
+	}
 no_change:
 	spin_unlock(&ci->i_ceph_lock);

@@ -767,19 +776,6 @@ static int fill_inode(struct inode *inode,
 		__ceph_get_fmode(ci, cap_fmode);
 	}

-	/* set dir completion flag? */
-	if (S_ISDIR(inode->i_mode) &&
-	    updating_inode &&                 /* didn't jump to no_change */
-	    ci->i_files == 0 && ci->i_subdirs == 0 &&
-	    ceph_snap(inode) == CEPH_NOSNAP &&
-	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
-	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
-	    !ceph_dir_test_complete(inode)) {
-		dout(" marking %p complete (empty)\n", inode);
-		ceph_dir_set_complete(inode);
-		ci->i_max_offset = 2;
-	}
-
 	/* update delegation info? */
 	if (dirinfo)
 		ceph_fill_dirfrag(inode, dirinfo);
@@ -861,7 +857,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 	di = ceph_dentry(dn);

 	spin_lock(&ci->i_ceph_lock);
-	if (!ceph_dir_test_complete(inode)) {
+	if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
 		spin_unlock(&ci->i_ceph_lock);
 		return;
 	}
@@ -1066,7 +1062,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			 * d_move() puts the renamed dentry at the end of
 			 * d_subdirs.  We need to assign it an appropriate
 			 * directory offset so we can behave when holding
-			 * D_COMPLETE.
+			 * I_COMPLETE.
 			 */
 			ceph_set_dentry_offset(req->r_old_dentry);
 			dout("dn %p gets new offset %lld\n", req->r_old_dentry, 

--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2029,7 +2029,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 }

 /*
- * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
+ * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS
 * namespace request.
 */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
@@ -2037,9 +2037,9 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 	struct inode *inode = req->r_locked_dir;
 	struct ceph_inode_info *ci = ceph_inode(inode);

-	dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
+	dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode);
 	spin_lock(&ci->i_ceph_lock);
-	ceph_dir_clear_complete(inode);
+	ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
 	ci->i_release_count++;
 	spin_unlock(&ci->i_ceph_lock);


--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -204,7 +204,6 @@ struct ceph_inode_xattr {
 * Ceph dentry state
 */
 struct ceph_dentry_info {
-	unsigned long flags;
 	struct ceph_mds_session *lease_session;
 	u32 lease_gen, lease_shared_gen;
 	u32 lease_seq;
@@ -215,18 +214,6 @@ struct ceph_dentry_info {
 	u64 offset;
 };

-/*
- * dentry flags
- *
- * The locking for D_COMPLETE is a bit odd:
- *  - we can clear it at almost any time (see ceph_d_prune)
- *  - it is only meaningful if:
- *    - we hold dir inode i_ceph_lock
- *    - we hold dir FILE_SHARED caps
- *    - the dentry D_COMPLETE is set
- */
-#define CEPH_D_COMPLETE 1  /* if set, d_u.d_subdirs is complete directory */
-
 struct ceph_inode_xattrs_info {
 	/*
 	 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -267,7 +254,7 @@ struct ceph_inode_info {
 	struct timespec i_rctime;
 	u64 i_rbytes, i_rfiles, i_rsubdirs;
 	u64 i_files, i_subdirs;
-	u64 i_max_offset;  /* largest readdir offset, set with D_COMPLETE */
+	u64 i_max_offset;  /* largest readdir offset, set with I_COMPLETE */

 	struct rb_root i_fragtree;
 	struct mutex i_fragtree_mutex;
@@ -432,6 +419,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
 * Ceph inode.
 */
+#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
 #define CEPH_I_NODELAY   4  /* do not delay cap release */
 #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
 #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
@@ -488,13 +476,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
 	return ((loff_t)frag << 32) | (loff_t)off;
 }

-/*
- * set/clear directory D_COMPLETE flag
- */
-void ceph_dir_set_complete(struct inode *inode);
-void ceph_dir_clear_complete(struct inode *inode);
-bool ceph_dir_test_complete(struct inode *inode);
-
 /*
 * caps helpers
 */