提交 198a49a9 编写于 作者: J Jinshan Xiong 提交者: Greg Kroah-Hartman

staging: lustre: clio: revise readahead to support 16MB IO

Read ahead currently doesn't handle 16MB RPC packets correctly
by assuming the packets are a default size instead of querying
the size. This work adjust the read ahead policy to issue
read ahead RPC by the underlying RPC size.
Signed-off-by: NJinshan Xiong <jinshan.xiong@intel.com>
Signed-off-by: NGu Zheng <gzheng@ddn.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7990
Reviewed-on: http://review.whamcloud.com/19368Reviewed-by: NAndreas Dilger <andreas.dilger@intel.com>
Reviewed-by: NLi Xi <lixi@ddn.com>
Reviewed-by: NOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: NJames Simmons <jsimmons@infradead.org>
Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
上级 ea3f00df
...@@ -1452,8 +1452,10 @@ struct cl_read_ahead { ...@@ -1452,8 +1452,10 @@ struct cl_read_ahead {
* cra_end is included. * cra_end is included.
*/ */
pgoff_t cra_end; pgoff_t cra_end;
/* optimal RPC size for this read, by pages */
unsigned long cra_rpc_size;
/* /*
* Release routine. If readahead holds resources underneath, this * Release callback. If readahead holds resources underneath, this
* function should be called to release it. * function should be called to release it.
*/ */
void (*cra_release)(const struct lu_env *env, void *cbdata); void (*cra_release)(const struct lu_env *env, void *cbdata);
......
...@@ -351,13 +351,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) ...@@ -351,13 +351,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
/* This value may be reduced at connect time in /*
* ptlrpc_connect_interpret() . We initialize it to only * Set it to possible maximum size. It may be reduced by ocd_brw_size
* 1MB until we know what the performance looks like. * from OFD after connecting.
* In the future this should likely be increased. LU-1431
*/ */
cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES, cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
LNET_MTU >> PAGE_SHIFT);
/* /*
* set cl_chunkbits default value to PAGE_CACHE_SHIFT, * set cl_chunkbits default value to PAGE_CACHE_SHIFT,
......
...@@ -281,10 +281,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode) ...@@ -281,10 +281,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode)
return container_of(inode, struct ll_inode_info, lli_vfs_inode); return container_of(inode, struct ll_inode_info, lli_vfs_inode);
} }
/* default to about 40meg of readahead on a given system. That much tied /* default to about 64M of readahead on a given system. */
* up in 512k readahead requests serviced at 40ms each is about 1GB/s. #define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT))
*/
#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_SHIFT))
/* default to read-ahead full files smaller than 2MB on the second read */ /* default to read-ahead full files smaller than 2MB on the second read */
#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT)) #define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
...@@ -321,6 +319,9 @@ struct ll_ra_info { ...@@ -321,6 +319,9 @@ struct ll_ra_info {
struct ra_io_arg { struct ra_io_arg {
unsigned long ria_start; /* start offset of read-ahead*/ unsigned long ria_start; /* start offset of read-ahead*/
unsigned long ria_end; /* end offset of read-ahead*/ unsigned long ria_end; /* end offset of read-ahead*/
unsigned long ria_reserved; /* reserved pages for read-ahead */
unsigned long ria_end_min; /* minimum end to cover current read */
bool ria_eof; /* reach end of file */
/* If stride read pattern is detected, ria_stoff means where /* If stride read pattern is detected, ria_stoff means where
* stride read is started. Note: for normal read-ahead, the * stride read is started. Note: for normal read-ahead, the
* value here is meaningless, and also it will not be accessed * value here is meaningless, and also it will not be accessed
...@@ -550,6 +551,11 @@ struct ll_readahead_state { ...@@ -550,6 +551,11 @@ struct ll_readahead_state {
* PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages. * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
*/ */
unsigned long ras_window_start, ras_window_len; unsigned long ras_window_start, ras_window_len;
/*
* Optimal RPC size. It decides how many pages will be sent
* for each read-ahead.
*/
unsigned long ras_rpc_size;
/* /*
* Where next read-ahead should start at. This lies within read-ahead * Where next read-ahead should start at. This lies within read-ahead
* window. Read-ahead window is read in pieces rather than at once * window. Read-ahead window is read in pieces rather than at once
......
...@@ -92,25 +92,6 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, ...@@ -92,25 +92,6 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
goto out; goto out;
} }
/* If the non-strided (ria_pages == 0) readahead window
* (ria_start + ret) has grown across an RPC boundary, then trim
* readahead size by the amount beyond the RPC so it ends on an
* RPC boundary. If the readahead window is already ending on
* an RPC boundary (beyond_rpc == 0), or smaller than a full
* RPC (beyond_rpc < ret) the readahead size is unchanged.
* The (beyond_rpc != 0) check is skipped since the conditional
* branch is more expensive than subtracting zero from the result.
*
* Strided read is left unaligned to avoid small fragments beyond
* the RPC boundary from needing an extra read RPC.
*/
if (ria->ria_pages == 0) {
long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES;
if (/* beyond_rpc != 0 && */ beyond_rpc < ret)
ret -= beyond_rpc;
}
if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) { if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
atomic_sub(ret, &ra->ra_cur_pages); atomic_sub(ret, &ra->ra_cur_pages);
ret = 0; ret = 0;
...@@ -147,11 +128,12 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which) ...@@ -147,11 +128,12 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
#define RAS_CDEBUG(ras) \ #define RAS_CDEBUG(ras) \
CDEBUG(D_READA, \ CDEBUG(D_READA, \
"lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \ "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \
"csr %lu sf %lu sp %lu sl %lu\n", \ "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \
ras->ras_last_readpage, ras->ras_consecutive_requests, \ ras->ras_last_readpage, ras->ras_consecutive_requests, \
ras->ras_consecutive_pages, ras->ras_window_start, \ ras->ras_consecutive_pages, ras->ras_window_start, \
ras->ras_window_len, ras->ras_next_readahead, \ ras->ras_window_len, ras->ras_next_readahead, \
ras->ras_rpc_size, \
ras->ras_requests, ras->ras_request_index, \ ras->ras_requests, ras->ras_request_index, \
ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \ ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
ras->ras_stride_pages, ras->ras_stride_length) ras->ras_stride_pages, ras->ras_stride_length)
...@@ -261,20 +243,6 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, ...@@ -261,20 +243,6 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\ ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
ria->ria_pages) ria->ria_pages)
/* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't
* know what the actual RPC size is. If this needs to change, it makes more
* sense to tune the i_blkbits value for the file based on the OSTs it is
* striped over, rather than having a constant value for all files here.
*/
/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_SHIFT)).
* Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
* by default, this should be adjusted corresponding with max_read_ahead_mb
* and max_read_ahead_per_file_mb otherwise the readahead budget can be used
* up quickly which will affect read performance significantly. See LU-2816
*/
#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_SHIFT)
static inline int stride_io_mode(struct ll_readahead_state *ras) static inline int stride_io_mode(struct ll_readahead_state *ras)
{ {
return ras->ras_consecutive_stride_requests > 1; return ras->ras_consecutive_stride_requests > 1;
...@@ -345,6 +313,17 @@ static int ria_page_count(struct ra_io_arg *ria) ...@@ -345,6 +313,17 @@ static int ria_page_count(struct ra_io_arg *ria)
length); length);
} }
static unsigned long ras_align(struct ll_readahead_state *ras,
unsigned long index,
unsigned long *remainder)
{
unsigned long rem = index % ras->ras_rpc_size;
if (remainder)
*remainder = rem;
return index - rem;
}
/*Check whether the index is in the defined ra-window */ /*Check whether the index is in the defined ra-window */
static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria) static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
{ {
...@@ -358,42 +337,63 @@ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria) ...@@ -358,42 +337,63 @@ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
ria->ria_length < ria->ria_pages); ria->ria_length < ria->ria_pages);
} }
static int ll_read_ahead_pages(const struct lu_env *env, static unsigned long
struct cl_io *io, struct cl_page_list *queue, ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
struct ra_io_arg *ria, struct cl_page_list *queue, struct ll_readahead_state *ras,
unsigned long *reserved_pages, struct ra_io_arg *ria)
pgoff_t *ra_end)
{ {
struct cl_read_ahead ra = { 0 }; struct cl_read_ahead ra = { 0 };
int rc, count = 0; unsigned long ra_end = 0;
bool stride_ria; bool stride_ria;
pgoff_t page_idx; pgoff_t page_idx;
int rc;
LASSERT(ria); LASSERT(ria);
RIA_DEBUG(ria); RIA_DEBUG(ria);
stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0; stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
for (page_idx = ria->ria_start; for (page_idx = ria->ria_start;
page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) { page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) {
if (ras_inside_ra_window(page_idx, ria)) { if (ras_inside_ra_window(page_idx, ria)) {
if (!ra.cra_end || ra.cra_end < page_idx) { if (!ra.cra_end || ra.cra_end < page_idx) {
unsigned long end;
cl_read_ahead_release(env, &ra); cl_read_ahead_release(env, &ra);
rc = cl_io_read_ahead(env, io, page_idx, &ra); rc = cl_io_read_ahead(env, io, page_idx, &ra);
if (rc < 0) if (rc < 0)
break; break;
CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n",
page_idx, ra.cra_end, ra.cra_rpc_size);
LASSERTF(ra.cra_end >= page_idx, LASSERTF(ra.cra_end >= page_idx,
"object: %p, indcies %lu / %lu\n", "object: %p, indcies %lu / %lu\n",
io->ci_obj, ra.cra_end, page_idx); io->ci_obj, ra.cra_end, page_idx);
/*
* update read ahead RPC size.
* NB: it's racy but doesn't matter
*/
if (ras->ras_rpc_size > ra.cra_rpc_size &&
ra.cra_rpc_size > 0)
ras->ras_rpc_size = ra.cra_rpc_size;
/* trim it to align with optimal RPC size */
end = ras_align(ras, ria->ria_end + 1, NULL);
if (end > 0 && !ria->ria_eof)
ria->ria_end = end - 1;
if (ria->ria_end < ria->ria_end_min)
ria->ria_end = ria->ria_end_min;
if (ria->ria_end > ra.cra_end)
ria->ria_end = ra.cra_end;
} }
/* If the page is inside the read-ahead window*/ /* If the page is inside the read-ahead window */
rc = ll_read_ahead_page(env, io, queue, page_idx); rc = ll_read_ahead_page(env, io, queue, page_idx);
if (!rc) { if (rc < 0)
(*reserved_pages)--; break;
count++;
} ra_end = page_idx;
if (!rc)
ria->ria_reserved--;
} else if (stride_ria) { } else if (stride_ria) {
/* If it is not in the read-ahead window, and it is /* If it is not in the read-ahead window, and it is
* read-ahead mode, then check whether it should skip * read-ahead mode, then check whether it should skip
...@@ -420,8 +420,7 @@ static int ll_read_ahead_pages(const struct lu_env *env, ...@@ -420,8 +420,7 @@ static int ll_read_ahead_pages(const struct lu_env *env,
} }
cl_read_ahead_release(env, &ra); cl_read_ahead_release(env, &ra);
*ra_end = page_idx; return ra_end;
return count;
} }
static int ll_readahead(const struct lu_env *env, struct cl_io *io, static int ll_readahead(const struct lu_env *env, struct cl_io *io,
...@@ -431,7 +430,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -431,7 +430,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
struct vvp_io *vio = vvp_env_io(env); struct vvp_io *vio = vvp_env_io(env);
struct ll_thread_info *lti = ll_env_info(env); struct ll_thread_info *lti = ll_env_info(env);
struct cl_attr *attr = vvp_env_thread_attr(env); struct cl_attr *attr = vvp_env_thread_attr(env);
unsigned long len, mlen = 0, reserved; unsigned long len, mlen = 0;
pgoff_t ra_end, start = 0, end = 0; pgoff_t ra_end, start = 0, end = 0;
struct inode *inode; struct inode *inode;
struct ra_io_arg *ria = &lti->lti_ria; struct ra_io_arg *ria = &lti->lti_ria;
...@@ -478,29 +477,15 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -478,29 +477,15 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
end < vio->vui_ra_start + vio->vui_ra_count - 1) end < vio->vui_ra_start + vio->vui_ra_count - 1)
end = vio->vui_ra_start + vio->vui_ra_count - 1; end = vio->vui_ra_start + vio->vui_ra_count - 1;
if (end != 0) { if (end) {
unsigned long rpc_boundary; unsigned long end_index;
/*
* Align RA window to an optimal boundary.
*
* XXX This would be better to align to cl_max_pages_per_rpc
* instead of PTLRPC_MAX_BRW_PAGES, because the RPC size may
* be aligned to the RAID stripe size in the future and that
* is more important than the RPC size.
*/
/* Note: we only trim the RPC, instead of extending the RPC
* to the boundary, so to avoid reading too much pages during
* random reading.
*/
rpc_boundary = (end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1));
if (rpc_boundary > 0)
rpc_boundary--;
if (rpc_boundary > start)
end = rpc_boundary;
/* Truncate RA window to end of file */ /* Truncate RA window to end of file */
end = min(end, (unsigned long)((kms - 1) >> PAGE_SHIFT)); end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
if (end_index <= end) {
end = end_index;
ria->ria_eof = true;
}
ras->ras_next_readahead = max(end, end + 1); ras->ras_next_readahead = max(end, end + 1);
RAS_CDEBUG(ras); RAS_CDEBUG(ras);
...@@ -535,28 +520,31 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -535,28 +520,31 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
/* at least to extend the readahead window to cover current read */ /* at least to extend the readahead window to cover current read */
if (!hit && vio->vui_ra_valid && if (!hit && vio->vui_ra_valid &&
vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) { vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
unsigned long remainder;
/* to the end of current read window. */ /* to the end of current read window. */
mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start; mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
/* trim to RPC boundary */ /* trim to RPC boundary */
start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1); ras_align(ras, ria->ria_start, &remainder);
mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start); mlen = min(mlen, ras->ras_rpc_size - remainder);
ria->ria_end_min = ria->ria_start + mlen;
} }
reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
if (reserved < len) if (ria->ria_reserved < len)
ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT); ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n", CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
reserved, len, mlen, ria->ria_reserved, len, mlen,
atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages), atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
ll_i2sbi(inode)->ll_ra_info.ra_max_pages); ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end); ra_end = ll_read_ahead_pages(env, io, queue, ras, ria);
if (reserved != 0) if (ria->ria_reserved)
ll_ra_count_put(ll_i2sbi(inode), reserved); ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT)) if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
ll_ra_stats_inc(inode, RA_STAT_EOF); ll_ra_stats_inc(inode, RA_STAT_EOF);
/* if we didn't get to the end of the region we reserved from /* if we didn't get to the end of the region we reserved from
...@@ -568,13 +556,13 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -568,13 +556,13 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n", CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
ra_end, end, ria->ria_end, ret); ra_end, end, ria->ria_end, ret);
if (ra_end != end + 1) { if (ra_end > 0 && ra_end != end) {
ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
spin_lock(&ras->ras_lock); spin_lock(&ras->ras_lock);
if (ra_end < ras->ras_next_readahead && if (ra_end <= ras->ras_next_readahead &&
index_in_window(ra_end, ras->ras_window_start, 0, index_in_window(ra_end, ras->ras_window_start, 0,
ras->ras_window_len)) { ras->ras_window_len)) {
ras->ras_next_readahead = ra_end; ras->ras_next_readahead = ra_end + 1;
RAS_CDEBUG(ras); RAS_CDEBUG(ras);
} }
spin_unlock(&ras->ras_lock); spin_unlock(&ras->ras_lock);
...@@ -586,7 +574,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -586,7 +574,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras, static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras,
unsigned long index) unsigned long index)
{ {
ras->ras_window_start = index & (~(RAS_INCREASE_STEP(inode) - 1)); ras->ras_window_start = ras_align(ras, index, NULL);
} }
/* called with the ras_lock held or from places where it doesn't matter */ /* called with the ras_lock held or from places where it doesn't matter */
...@@ -615,6 +603,7 @@ static void ras_stride_reset(struct ll_readahead_state *ras) ...@@ -615,6 +603,7 @@ static void ras_stride_reset(struct ll_readahead_state *ras)
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
{ {
spin_lock_init(&ras->ras_lock); spin_lock_init(&ras->ras_lock);
ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES;
ras_reset(inode, ras, 0); ras_reset(inode, ras, 0);
ras->ras_requests = 0; ras->ras_requests = 0;
} }
...@@ -719,12 +708,15 @@ static void ras_increase_window(struct inode *inode, ...@@ -719,12 +708,15 @@ static void ras_increase_window(struct inode *inode,
* but current clio architecture does not support retrieve such * but current clio architecture does not support retrieve such
* information from lower layer. FIXME later * information from lower layer. FIXME later
*/ */
if (stride_io_mode(ras)) if (stride_io_mode(ras)) {
ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP(inode)); ras_stride_increase_window(ras, ra, ras->ras_rpc_size);
else } else {
ras->ras_window_len = min(ras->ras_window_len + unsigned long wlen;
RAS_INCREASE_STEP(inode),
ra->ra_max_pages_per_file); wlen = min(ras->ras_window_len + ras->ras_rpc_size,
ra->ra_max_pages_per_file);
ras->ras_window_len = ras_align(ras, wlen, NULL);
}
} }
static void ras_update(struct ll_sb_info *sbi, struct inode *inode, static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
...@@ -852,6 +844,8 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -852,6 +844,8 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
* instead of ras_window_start, which is RPC aligned * instead of ras_window_start, which is RPC aligned
*/ */
ras->ras_next_readahead = max(index, ras->ras_next_readahead); ras->ras_next_readahead = max(index, ras->ras_next_readahead);
ras->ras_window_start = max(ras->ras_stride_offset,
ras->ras_window_start);
} else { } else {
if (ras->ras_next_readahead < ras->ras_window_start) if (ras->ras_next_readahead < ras->ras_window_start)
ras->ras_next_readahead = ras->ras_window_start; ras->ras_next_readahead = ras->ras_window_start;
...@@ -881,7 +875,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -881,7 +875,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
*/ */
ras->ras_next_readahead = max(index, ras->ras_next_readahead); ras->ras_next_readahead = max(index, ras->ras_next_readahead);
ras->ras_stride_offset = index; ras->ras_stride_offset = index;
ras->ras_window_len = RAS_INCREASE_STEP(inode); ras->ras_window_start = max(index, ras->ras_window_start);
} }
/* The initial ras_window_len is set to the request size. To avoid /* The initial ras_window_len is set to the request size. To avoid
...@@ -1098,38 +1092,39 @@ static int ll_io_read_page(const struct lu_env *env, struct cl_io *io, ...@@ -1098,38 +1092,39 @@ static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
struct cl_2queue *queue = &io->ci_queue; struct cl_2queue *queue = &io->ci_queue;
struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_sb_info *sbi = ll_i2sbi(inode);
struct vvp_page *vpg; struct vvp_page *vpg;
bool uptodate;
int rc = 0; int rc = 0;
vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page)); vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
uptodate = vpg->vpg_defer_uptodate;
if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
sbi->ll_ra_info.ra_max_pages > 0) { sbi->ll_ra_info.ra_max_pages > 0) {
struct vvp_io *vio = vvp_env_io(env); struct vvp_io *vio = vvp_env_io(env);
enum ras_update_flags flags = 0; enum ras_update_flags flags = 0;
if (vpg->vpg_defer_uptodate) if (uptodate)
flags |= LL_RAS_HIT; flags |= LL_RAS_HIT;
if (!vio->vui_ra_valid) if (!vio->vui_ra_valid)
flags |= LL_RAS_MMAP; flags |= LL_RAS_MMAP;
ras_update(sbi, inode, ras, vvp_index(vpg), flags); ras_update(sbi, inode, ras, vvp_index(vpg), flags);
} }
if (vpg->vpg_defer_uptodate) { cl_2queue_init(queue);
if (uptodate) {
vpg->vpg_ra_used = 1; vpg->vpg_ra_used = 1;
cl_page_export(env, page, 1); cl_page_export(env, page, 1);
cl_page_disown(env, io, page);
} else {
cl_page_list_add(&queue->c2_qin, page);
} }
cl_2queue_init(queue);
/*
* Add page into the queue even when it is marked uptodate above.
* this will unlock it automatically as part of cl_page_list_disown().
*/
cl_page_list_add(&queue->c2_qin, page);
if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
sbi->ll_ra_info.ra_max_pages > 0) { sbi->ll_ra_info.ra_max_pages > 0) {
int rc2; int rc2;
rc2 = ll_readahead(env, io, &queue->c2_qin, ras, rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
vpg->vpg_defer_uptodate); uptodate);
CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n", CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg)); PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
} }
......
...@@ -99,6 +99,7 @@ static int osc_io_read_ahead(const struct lu_env *env, ...@@ -99,6 +99,7 @@ static int osc_io_read_ahead(const struct lu_env *env,
ldlm_lock_decref(&lockh, dlmlock->l_req_mode); ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
} }
ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc;
ra->cra_end = cl_index(osc2cl(osc), ra->cra_end = cl_index(osc2cl(osc),
dlmlock->l_policy_data.l_extent.end); dlmlock->l_policy_data.l_extent.end);
ra->cra_release = osc_read_ahead_release; ra->cra_release = osc_read_ahead_release;
...@@ -138,7 +139,7 @@ static int osc_io_submit(const struct lu_env *env, ...@@ -138,7 +139,7 @@ static int osc_io_submit(const struct lu_env *env,
LASSERT(qin->pl_nr > 0); LASSERT(qin->pl_nr > 0);
CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt); CDEBUG(D_CACHE | D_READA, "%d %d\n", qin->pl_nr, crt);
osc = cl2osc(ios->cis_obj); osc = cl2osc(ios->cis_obj);
cli = osc_cli(osc); cli = osc_cli(osc);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册