提交 f742dc4a 编写于 作者: P Peng Tao 提交者: Trond Myklebust

pnfsblock: fix non-aligned DIO read

For DIO read, if it is not sector aligned, we should reject it
and resend via MDS. Otherwise there might be data corruption.
Also teach bl_read_pagelist to handle partial page reads for DIO.

Cc: stable <stable@vger.kernel.org> [since v3.4]
Signed-off-by: NPeng Tao <tao.peng@emc.com>
Signed-off-by: NTrond Myklebust <Trond.Myklebust@netapp.com>
上级 fe6e1e8d
...@@ -252,8 +252,11 @@ bl_read_pagelist(struct nfs_read_data *rdata) ...@@ -252,8 +252,11 @@ bl_read_pagelist(struct nfs_read_data *rdata)
sector_t isect, extent_length = 0; sector_t isect, extent_length = 0;
struct parallel_io *par; struct parallel_io *par;
loff_t f_offset = rdata->args.offset; loff_t f_offset = rdata->args.offset;
size_t bytes_left = rdata->args.count;
unsigned int pg_offset, pg_len;
struct page **pages = rdata->args.pages; struct page **pages = rdata->args.pages;
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
...@@ -287,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *rdata) ...@@ -287,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *rdata)
extent_length = min(extent_length, cow_length); extent_length = min(extent_length, cow_length);
} }
} }
if (is_dio) {
pg_offset = f_offset & ~PAGE_CACHE_MASK;
if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
pg_len = PAGE_CACHE_SIZE - pg_offset;
else
pg_len = bytes_left;
f_offset += pg_len;
bytes_left -= pg_len;
isect += (pg_offset >> SECTOR_SHIFT);
} else {
pg_offset = 0;
pg_len = PAGE_CACHE_SIZE;
}
hole = is_hole(be, isect); hole = is_hole(be, isect);
if (hole && !cow_read) { if (hole && !cow_read) {
bio = bl_submit_bio(READ, bio); bio = bl_submit_bio(READ, bio);
/* Fill hole w/ zeroes w/o accessing device */ /* Fill hole w/ zeroes w/o accessing device */
dprintk("%s Zeroing page for hole\n", __func__); dprintk("%s Zeroing page for hole\n", __func__);
zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); zero_user_segment(pages[i], pg_offset, pg_len);
print_page(pages[i]); print_page(pages[i]);
SetPageUptodate(pages[i]); SetPageUptodate(pages[i]);
} else { } else {
struct pnfs_block_extent *be_read; struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be; be_read = (hole && cow_read) ? cow_read : be;
bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
READ, READ,
isect, pages[i], be_read, isect, pages[i], be_read,
bl_end_io_read, par); bl_end_io_read, par,
pg_offset, pg_len);
if (IS_ERR(bio)) { if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio); header->pnfs_error = PTR_ERR(bio);
bio = NULL; bio = NULL;
goto out; goto out;
} }
} }
isect += PAGE_CACHE_SECTORS; isect += (pg_len >> SECTOR_SHIFT);
extent_length -= PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS;
} }
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1; rdata->res.eof = 1;
rdata->res.count = header->inode->i_size - f_offset; rdata->res.count = header->inode->i_size - rdata->args.offset;
} else { } else {
rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
} }
out: out:
bl_put_extent(be); bl_put_extent(be);
...@@ -1149,9 +1169,37 @@ bl_clear_layoutdriver(struct nfs_server *server) ...@@ -1149,9 +1169,37 @@ bl_clear_layoutdriver(struct nfs_server *server)
return 0; return 0;
} }
static bool
is_aligned_req(struct nfs_page *req, unsigned int alignment)
{
return IS_ALIGNED(req->wb_offset, alignment) &&
IS_ALIGNED(req->wb_bytes, alignment);
}
static void
bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, SECTOR_SIZE))
nfs_pageio_reset_read_mds(pgio);
else
pnfs_generic_pg_init_read(pgio, req);
}
static bool
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, SECTOR_SIZE))
return false;
return pnfs_generic_pg_test(pgio, prev, req);
}
static const struct nfs_pageio_ops bl_pg_read_ops = { static const struct nfs_pageio_ops bl_pg_read_ops = {
.pg_init = pnfs_generic_pg_init_read, .pg_init = bl_pg_init_read,
.pg_test = pnfs_generic_pg_test, .pg_test = bl_pg_test_read,
.pg_doio = pnfs_generic_pg_readpages, .pg_doio = pnfs_generic_pg_readpages,
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册