提交 8974eebd 编写于 作者: Y Yan, Zheng 提交者: Ilya Dryomov

ceph: record 'offset' for each entry of readdir result

This is preparation for using hash value as dentry 'offset'
Signed-off-by: NYan, Zheng <zyan@redhat.com>
上级 956d39d6
...@@ -277,12 +277,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -277,12 +277,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
unsigned frag = fpos_frag(ctx->pos); unsigned frag = fpos_frag(ctx->pos);
int off = fpos_off(ctx->pos); int i;
int err; int err;
u32 ftype; u32 ftype;
struct ceph_mds_reply_info_parsed *rinfo; struct ceph_mds_reply_info_parsed *rinfo;
dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
if (fi->flags & CEPH_F_ATEND) if (fi->flags & CEPH_F_ATEND)
return 0; return 0;
...@@ -294,7 +294,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -294,7 +294,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
inode->i_mode >> 12)) inode->i_mode >> 12))
return 0; return 0;
ctx->pos = 1; ctx->pos = 1;
off = 1;
} }
if (ctx->pos == 1) { if (ctx->pos == 1) {
ino_t ino = parent_ino(file->f_path.dentry); ino_t ino = parent_ino(file->f_path.dentry);
...@@ -304,7 +303,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -304,7 +303,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
inode->i_mode >> 12)) inode->i_mode >> 12))
return 0; return 0;
ctx->pos = 2; ctx->pos = 2;
off = 2;
} }
/* can we use the dcache? */ /* can we use the dcache? */
...@@ -320,7 +318,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -320,7 +318,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
if (err != -EAGAIN) if (err != -EAGAIN)
return err; return err;
frag = fpos_frag(ctx->pos); frag = fpos_frag(ctx->pos);
off = fpos_off(ctx->pos);
} else { } else {
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
} }
...@@ -386,12 +383,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -386,12 +383,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
rinfo = &req->r_reply_info; rinfo = &req->r_reply_info;
if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
frag = le32_to_cpu(rinfo->dir_dir->frag); frag = le32_to_cpu(rinfo->dir_dir->frag);
off = req->r_readdir_offset; fi->next_offset = req->r_readdir_offset;
fi->next_offset = off; /* adjust ctx->pos to beginning of frag */
ctx->pos = ceph_make_fpos(frag, fi->next_offset);
} }
fi->frag = frag; fi->frag = frag;
fi->offset = fi->next_offset;
fi->last_readdir = req; fi->last_readdir = req;
if (req->r_did_prepopulate) { if (req->r_did_prepopulate) {
...@@ -399,7 +396,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -399,7 +396,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
if (fi->readdir_cache_idx < 0) { if (fi->readdir_cache_idx < 0) {
/* preclude from marking dir ordered */ /* preclude from marking dir ordered */
fi->dir_ordered_count = 0; fi->dir_ordered_count = 0;
} else if (ceph_frag_is_leftmost(frag) && off == 2) { } else if (ceph_frag_is_leftmost(frag) &&
fi->next_offset == 2) {
/* note dir version at start of readdir so /* note dir version at start of readdir so
* we can tell if any dentries get dropped */ * we can tell if any dentries get dropped */
fi->dir_release_count = req->r_dir_release_cnt; fi->dir_release_count = req->r_dir_release_cnt;
...@@ -421,37 +419,54 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -421,37 +419,54 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
struct ceph_mds_reply_dir_entry *rde = struct ceph_mds_reply_dir_entry *rde =
rinfo->dir_entries + (rinfo->dir_nr-1); rinfo->dir_entries + (rinfo->dir_nr-1);
err = note_last_dentry(fi, rde->name, rde->name_len, err = note_last_dentry(fi, rde->name, rde->name_len,
fi->next_offset + rinfo->dir_nr); fpos_off(rde->offset) + 1);
if (err) if (err)
return err; return err;
} }
} }
rinfo = &fi->last_readdir->r_reply_info; rinfo = &fi->last_readdir->r_reply_info;
dout("readdir frag %x num %d off %d chunkoff %d\n", frag, dout("readdir frag %x num %d pos %llx chunk first %llx\n",
rinfo->dir_nr, off, fi->offset); frag, rinfo->dir_nr, ctx->pos,
rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
ctx->pos = ceph_make_fpos(frag, off); i = 0;
while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { /* search start position */
struct ceph_mds_reply_dir_entry *rde = if (rinfo->dir_nr > 0) {
rinfo->dir_entries + (off - fi->offset); int step, nr = rinfo->dir_nr;
while (nr > 0) {
step = nr >> 1;
if (rinfo->dir_entries[i + step].offset < ctx->pos) {
i += step + 1;
nr -= step + 1;
} else {
nr = step;
}
}
}
for (; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
struct ceph_vino vino; struct ceph_vino vino;
ino_t ino; ino_t ino;
dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", BUG_ON(rde->offset < ctx->pos);
off, off - fi->offset, rinfo->dir_nr, ctx->pos,
ctx->pos = rde->offset;
dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
i, rinfo->dir_nr, ctx->pos,
rde->name_len, rde->name, &rde->inode.in); rde->name_len, rde->name, &rde->inode.in);
BUG_ON(!rde->inode.in); BUG_ON(!rde->inode.in);
ftype = le32_to_cpu(rde->inode.in->mode) >> 12; ftype = le32_to_cpu(rde->inode.in->mode) >> 12;
vino.ino = le64_to_cpu(rde->inode.in->ino); vino.ino = le64_to_cpu(rde->inode.in->ino);
vino.snap = le64_to_cpu(rde->inode.in->snapid); vino.snap = le64_to_cpu(rde->inode.in->snapid);
ino = ceph_vino_to_ino(vino); ino = ceph_vino_to_ino(vino);
if (!dir_emit(ctx, rde->name, rde->name_len, if (!dir_emit(ctx, rde->name, rde->name_len,
ceph_translate_ino(inode->i_sb, ino), ftype)) { ceph_translate_ino(inode->i_sb, ino), ftype)) {
dout("filldir stopping us...\n"); dout("filldir stopping us...\n");
return 0; return 0;
} }
off++;
ctx->pos++; ctx->pos++;
} }
...@@ -464,8 +479,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -464,8 +479,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* more frags? */ /* more frags? */
if (!ceph_frag_is_rightmost(frag)) { if (!ceph_frag_is_rightmost(frag)) {
frag = ceph_frag_next(frag); frag = ceph_frag_next(frag);
off = 2; ctx->pos = ceph_make_fpos(frag, 2);
ctx->pos = ceph_make_fpos(frag, off);
dout("readdir next frag is %x\n", frag); dout("readdir next frag is %x\n", frag);
goto more; goto more;
} }
...@@ -497,7 +511,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -497,7 +511,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
return 0; return 0;
} }
static void reset_readdir(struct ceph_file_info *fi, unsigned frag) static void reset_readdir(struct ceph_file_info *fi)
{ {
if (fi->last_readdir) { if (fi->last_readdir) {
ceph_mdsc_put_request(fi->last_readdir); ceph_mdsc_put_request(fi->last_readdir);
...@@ -511,6 +525,23 @@ static void reset_readdir(struct ceph_file_info *fi, unsigned frag) ...@@ -511,6 +525,23 @@ static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
fi->flags &= ~CEPH_F_ATEND; fi->flags &= ~CEPH_F_ATEND;
} }
/*
* discard buffered readdir content on seekdir(0), or seek to new frag,
* or seek prior to current chunk
*/
static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
{
struct ceph_mds_reply_info_parsed *rinfo;
if (new_pos == 0)
return true;
if (fpos_frag(new_pos) != fi->frag)
return true;
rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
if (!rinfo || !rinfo->dir_nr)
return true;
return new_pos < rinfo->dir_entries[0].offset;;
}
static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
{ {
struct ceph_file_info *fi = file->private_data; struct ceph_file_info *fi = file->private_data;
...@@ -539,13 +570,9 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) ...@@ -539,13 +570,9 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
} }
retval = offset; retval = offset;
if (offset == 0 || if (need_reset_readdir(fi, offset)) {
fpos_frag(offset) != fi->frag ||
fpos_off(offset) < fi->offset) {
/* discard buffered readdir content on seekdir(0), or
* seek to new frag, or seek prior to current chunk */
dout("dir_llseek dropping %p content\n", file); dout("dir_llseek dropping %p content\n", file);
reset_readdir(fi, fpos_frag(offset)); reset_readdir(fi);
} else if (fpos_cmp(offset, old_offset) > 0) { } else if (fpos_cmp(offset, old_offset) > 0) {
/* reset dir_release_count if we did a forward seek */ /* reset dir_release_count if we did a forward seek */
fi->dir_release_count = 0; fi->dir_release_count = 0;
......
...@@ -1523,6 +1523,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1523,6 +1523,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
di = dn->d_fsdata; di = dn->d_fsdata;
di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
rde->offset = di->offset;
update_dentry_lease(dn, rde->lease, req->r_session, update_dentry_lease(dn, rde->lease, req->r_session,
req->r_request_started); req->r_request_started);
......
...@@ -214,6 +214,8 @@ static int parse_reply_info_dir(void **p, void *end, ...@@ -214,6 +214,8 @@ static int parse_reply_info_dir(void **p, void *end,
err = parse_reply_info_in(p, end, &rde->inode, features); err = parse_reply_info_in(p, end, &rde->inode, features);
if (err < 0) if (err < 0)
goto out_bad; goto out_bad;
/* ceph_readdir_prepopulate() will update it */
rde->offset = 0;
i++; i++;
num--; num--;
} }
......
...@@ -52,6 +52,7 @@ struct ceph_mds_reply_dir_entry { ...@@ -52,6 +52,7 @@ struct ceph_mds_reply_dir_entry {
u32 name_len; u32 name_len;
struct ceph_mds_reply_lease *lease; struct ceph_mds_reply_lease *lease;
struct ceph_mds_reply_info_in inode; struct ceph_mds_reply_info_in inode;
loff_t offset;
}; };
/* /*
......
...@@ -635,7 +635,6 @@ struct ceph_file_info { ...@@ -635,7 +635,6 @@ struct ceph_file_info {
struct ceph_mds_request *last_readdir; struct ceph_mds_request *last_readdir;
/* readdir: position within a frag */ /* readdir: position within a frag */
unsigned offset; /* offset of last chunk, adjusted for . and .. */
unsigned next_offset; /* offset of next chunk (last_name's + 1) */ unsigned next_offset; /* offset of next chunk (last_name's + 1) */
char *last_name; /* last entry in previous chunk */ char *last_name; /* last entry in previous chunk */
long long dir_release_count; long long dir_release_count;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册