提交 2b2b15c3 编写于 作者: L Linus Torvalds

Merge tag 'nfs-for-3.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

   - stable fix for an infinite loop in RPC state machine
   - stable fix for a use after free situation in the NFSv4 trunking discovery
   - stable fix for error handling in the NFSv4 trunking discovery
   - stable fix for the page write update code
   - stable fix for the NFSv4.1 mount time security negotiation
   - stable fix for the NFSv4 open code.
   - O_DIRECT locking fixes
   - fix an Oops in the pnfs file commit code
   - RPC layer needs finer grained handling of connection errors
   - more RPC GSS upcall fixes"

* tag 'nfs-for-3.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (30 commits)
  pnfs: Proper delay for NFS4ERR_RECALLCONFLICT in layout_get_done
  pnfs: fix BUG in filelayout_recover_commit_reqs
  nfs4: fix discover_server_trunking use after free
  NFSv4.1: Handle errors correctly in nfs41_walk_client_list
  nfs: always make sure page is up-to-date before extending a write to cover the entire page
  nfs: page cache invalidation for dio
  nfs: take i_mutex during direct I/O reads
  nfs: merge nfs_direct_write into nfs_file_direct_write
  nfs: merge nfs_direct_read into nfs_file_direct_read
  nfs: increment i_dio_count for reads, too
  nfs: defer inode_dio_done call until size update is done
  nfs: fix size updates for aio writes
  nfs4.1: properly handle ENOTSUP in SECINFO_NO_NAME
  NFSv4.1: Fix a race in nfs4_write_inode
  NFSv4.1: Don't trust attributes if a pNFS LAYOUTCOMMIT is outstanding
  point to the right include file in a comment (left over from a9004abc)
  NFS: dprintk() should not print negative fileids and inode numbers
  nfs: fix dead code of ipv6_addr_scope
  sunrpc: Fix infinite loop in RPC state machine
  SUNRPC: Add tracepoint for socket errors
  ...
......@@ -1404,7 +1404,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
/* Expect a negative dentry */
BUG_ON(dentry->d_inode);
dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n",
dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
err = nfs_check_flags(open_flags);
......@@ -1594,7 +1594,7 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
dfprintk(VFS, "NFS: create(%s/%ld), %pd\n",
dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_mode = mode;
......@@ -1621,7 +1621,7 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
struct iattr attr;
int status;
dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n",
dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
if (!new_valid_dev(rdev))
......@@ -1650,7 +1650,7 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct iattr attr;
int error;
dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n",
dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_valid = ATTR_MODE;
......@@ -1678,7 +1678,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n",
dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
trace_nfs_rmdir_enter(dir, dentry);
......@@ -1747,7 +1747,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
int error;
int need_rehash = 0;
dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id,
dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
dir->i_ino, dentry);
trace_nfs_unlink_enter(dir, dentry);
......@@ -1798,7 +1798,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
unsigned int pathlen = strlen(symname);
int error;
dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id,
dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
dir->i_ino, dentry, symname);
if (pathlen > PAGE_SIZE)
......@@ -1821,7 +1821,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
trace_nfs_symlink_exit(dir, dentry, error);
if (error != 0) {
dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n",
dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
dentry, symname, error);
d_drop(dentry);
......@@ -2304,7 +2304,7 @@ int nfs_permission(struct inode *inode, int mask)
if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
res = -EACCES;
dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
......
......@@ -222,14 +222,31 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
* Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
* the iocb is still valid here if this is a synchronous request.
*/
static void nfs_direct_complete(struct nfs_direct_req *dreq)
static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
{
struct inode *inode = dreq->inode;
if (dreq->iocb && write) {
loff_t pos = dreq->iocb->ki_pos + dreq->count;
spin_lock(&inode->i_lock);
if (i_size_read(inode) < pos)
i_size_write(inode, pos);
spin_unlock(&inode->i_lock);
}
if (write)
nfs_zap_mapping(inode, inode->i_mapping);
inode_dio_done(inode);
if (dreq->iocb) {
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
aio_complete(dreq->iocb, res, 0);
}
complete_all(&dreq->completion);
nfs_direct_req_release(dreq);
......@@ -237,9 +254,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
static void nfs_direct_readpage_release(struct nfs_page *req)
{
dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
......@@ -272,7 +289,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
}
out_put:
if (put_dreq(dreq))
nfs_direct_complete(dreq);
nfs_direct_complete(dreq, false);
hdr->release(hdr);
}
......@@ -402,6 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;
......@@ -410,6 +428,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
&nfs_direct_read_completion_ops);
get_dreq(dreq);
desc.pg_dreq = dreq;
atomic_inc(&inode->i_dio_count);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
......@@ -429,26 +448,69 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
inode_dio_done(inode);
nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
if (put_dreq(dreq))
nfs_direct_complete(dreq);
nfs_direct_complete(dreq, false);
return 0;
}
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, bool uio)
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @iov: vector of user buffers into which to read data
* @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
* READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
*
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
ssize_t result = -EINVAL;
size_t count;
count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
file, count, (long long) pos);
result = 0;
if (!count)
goto out;
mutex_lock(&inode->i_mutex);
result = nfs_sync_mapping(mapping);
if (result)
goto out_unlock;
task_io_account_read(count);
result = -ENOMEM;
dreq = nfs_direct_req_alloc();
if (dreq == NULL)
goto out;
goto out_unlock;
dreq->inode = inode;
dreq->bytes_left = iov_length(iov, nr_segs);
......@@ -464,20 +526,26 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
NFS_I(inode)->read_io += iov_length(iov, nr_segs);
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
mutex_unlock(&inode->i_mutex);
if (!result) {
result = nfs_direct_wait(dreq);
if (result > 0)
iocb->ki_pos = pos + result;
}
nfs_direct_req_release(dreq);
return result;
out_release:
nfs_direct_req_release(dreq);
out_unlock:
mutex_unlock(&inode->i_mutex);
out:
return result;
}
static void nfs_inode_dio_write_done(struct inode *inode)
{
nfs_zap_mapping(inode, inode->i_mapping);
inode_dio_done(inode);
}
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
......@@ -593,8 +661,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
nfs_direct_write_reschedule(dreq);
break;
default:
nfs_inode_dio_write_done(dreq->inode);
nfs_direct_complete(dreq);
nfs_direct_complete(dreq, true);
}
}
......@@ -610,8 +677,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
nfs_inode_dio_write_done(inode);
nfs_direct_complete(dreq);
nfs_direct_complete(dreq, true);
}
#endif
......@@ -842,93 +908,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
return 0;
}
static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
size_t count, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
dreq = nfs_direct_req_alloc();
if (!dreq)
goto out;
dreq->inode = inode;
dreq->bytes_left = count;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
result = PTR_ERR(l_ctx);
goto out_release;
}
dreq->l_ctx = l_ctx;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
nfs_direct_req_release(dreq);
out:
return result;
}
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @iov: vector of user buffers into which to read data
* @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
* READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
*
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
size_t count;
count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
file, count, (long long) pos);
retval = 0;
if (!count)
goto out;
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
task_io_account_read(count);
retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
if (retval > 0)
iocb->ki_pos = pos + retval;
out:
return retval;
}
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
......@@ -954,46 +933,96 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
ssize_t result = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
loff_t end;
size_t count;
count = iov_length(iov, nr_segs);
end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, count, (long long) pos);
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
result = generic_write_checks(file, &pos, &count, 0);
if (result)
goto out;
retval = -EINVAL;
result = -EINVAL;
if ((ssize_t) count < 0)
goto out;
retval = 0;
result = 0;
if (!count)
goto out;
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
mutex_lock(&inode->i_mutex);
result = nfs_sync_mapping(mapping);
if (result)
goto out_unlock;
if (mapping->nrpages) {
result = invalidate_inode_pages2_range(mapping,
pos >> PAGE_CACHE_SHIFT, end);
if (result)
goto out_unlock;
}
task_io_account_write(count);
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
if (retval > 0) {
struct inode *inode = mapping->host;
result = -ENOMEM;
dreq = nfs_direct_req_alloc();
if (!dreq)
goto out_unlock;
iocb->ki_pos = pos + retval;
spin_lock(&inode->i_lock);
if (i_size_read(inode) < iocb->ki_pos)
i_size_write(inode, iocb->ki_pos);
spin_unlock(&inode->i_lock);
dreq->inode = inode;
dreq->bytes_left = count;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
result = PTR_ERR(l_ctx);
goto out_release;
}
dreq->l_ctx = l_ctx;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
pos >> PAGE_CACHE_SHIFT, end);
}
mutex_unlock(&inode->i_mutex);
if (!result) {
result = nfs_direct_wait(dreq);
if (result > 0) {
struct inode *inode = mapping->host;
iocb->ki_pos = pos + result;
spin_lock(&inode->i_lock);
if (i_size_read(inode) < iocb->ki_pos)
i_size_write(inode, iocb->ki_pos);
spin_unlock(&inode->i_lock);
}
}
nfs_direct_req_release(dreq);
return result;
out_release:
nfs_direct_req_release(dreq);
out_unlock:
mutex_unlock(&inode->i_mutex);
out:
return retval;
return result;
}
/**
......
......@@ -354,7 +354,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
struct page *page;
int once_thru = 0;
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n",
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
start:
......@@ -395,7 +395,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n",
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
/*
......@@ -585,7 +585,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
int ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n",
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
(long long)page_offset(page));
......
......@@ -458,9 +458,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)NFS_FILEID(inode),
nfs_display_fhandle_hash(fh),
atomic_read(&inode->i_count));
......@@ -870,8 +870,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
struct nfs_fattr *fattr = NULL;
struct nfs_inode *nfsi = NFS_I(inode);
dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
inode->i_sb->s_id, (long long)NFS_FILEID(inode));
dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n",
inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode));
trace_nfs_revalidate_inode_enter(inode);
......@@ -895,9 +895,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label);
if (status != 0) {
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode), status);
(unsigned long long)NFS_FILEID(inode), status);
if (status == -ESTALE) {
nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
......@@ -908,9 +908,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
status = nfs_refresh_inode(inode, fattr);
if (status) {
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode), status);
(unsigned long long)NFS_FILEID(inode), status);
goto err_out;
}
......@@ -919,9 +919,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
nfs_setsecurity(inode, fattr, label);
dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode));
(unsigned long long)NFS_FILEID(inode));
err_out:
nfs4_label_free(label);
......@@ -985,8 +985,9 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
nfs_fscache_wait_on_invalidate(inode);
dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
inode->i_sb->s_id, (long long)NFS_FILEID(inode));
dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n",
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode));
return 0;
}
......@@ -1282,12 +1283,28 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
/*
* Don't trust the change_attribute, mtime, ctime or size if
* a pnfs LAYOUTCOMMIT is outstanding
*/
static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode,
struct nfs_fattr *fattr)
{
if (pnfs_layoutcommit_outstanding(inode))
fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE |
NFS_ATTR_FATTR_MTIME |
NFS_ATTR_FATTR_CTIME |
NFS_ATTR_FATTR_SIZE);
}
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
int ret;
trace_nfs_refresh_inode_enter(inode);
nfs_inode_attrs_handle_layoutcommit(inode, fattr);
if (nfs_inode_attrs_need_update(inode, fattr))
ret = nfs_update_inode(inode, fattr);
else
......@@ -1434,7 +1451,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
unsigned long now = jiffies;
unsigned long save_cache_validity;
dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
......@@ -1455,7 +1472,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/*
* Big trouble! The inode has become a different object.
*/
printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n",
__func__, inode->i_ino, inode->i_mode, fattr->mode);
goto out_err;
}
......@@ -1517,8 +1534,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
new_isize > cur_isize) {
if ((nfsi->npages == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
}
......
......@@ -10,6 +10,7 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include "internal.h"
#include "callback.h"
#include "delegation.h"
......@@ -370,7 +371,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
error = -EINVAL;
if (gssd_running(clp->cl_net))
error = nfs_create_rpc_client(clp, timeparms,
RPC_AUTH_GSS_KRB5I);
if (error == -EINVAL)
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
if (error < 0)
......@@ -409,13 +414,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
nfs_put_client(clp);
if (clp != old) {
clp->cl_preserve_clid = true;
clp = old;
}
return clp;
if (clp != old)
clp->cl_preserve_clid = true;
nfs_put_client(clp);
return old;
error:
nfs_mark_client_ready(clp, error);
......@@ -493,9 +496,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
spin_lock(&nn->nfs_client_lock);
if (status < 0)
continue;
goto out;
status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
......@@ -633,7 +637,8 @@ int nfs41_walk_client_list(struct nfs_client *new,
}
spin_lock(&nn->nfs_client_lock);
if (status < 0)
continue;
break;
status = -NFS4ERR_STALE_CLIENTID;
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
......
......@@ -91,10 +91,10 @@ static void filelayout_reset_write(struct nfs_write_data *data)
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid,
hdr->inode->i_sb->s_id,
(long long)NFS_FILEID(hdr->inode),
(unsigned long long)NFS_FILEID(hdr->inode),
data->args.count,
(unsigned long long)data->args.offset);
......@@ -112,10 +112,10 @@ static void filelayout_reset_read(struct nfs_read_data *data)
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid,
hdr->inode->i_sb->s_id,
(long long)NFS_FILEID(hdr->inode),
(unsigned long long)NFS_FILEID(hdr->inode),
data->args.count,
(unsigned long long)data->args.offset);
......@@ -1216,17 +1216,17 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
struct pnfs_commit_bucket *b;
int i;
/* NOTE cinfo->lock is NOT held, relying on fact that this is
* only called on single thread per dreq.
* Can't take the lock because need to do pnfs_put_lseg
*/
spin_lock(cinfo->lock);
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
spin_unlock(cinfo->lock);
pnfs_put_lseg(b->wlseg);
b->wlseg = NULL;
spin_lock(cinfo->lock);
}
}
cinfo->ds->nwritten = 0;
spin_unlock(cinfo->lock);
}
static unsigned int
......
......@@ -95,7 +95,7 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
b6 = (struct sockaddr_in6 *)addr2;
/* LINKLOCAL addresses must have matching scope_id */
if (ipv6_addr_scope(&a6->sin6_addr) ==
if (ipv6_addr_src_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
return false;
......
......@@ -7409,9 +7409,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
unsigned long timeo, giveup;
unsigned long timeo, now, giveup;
dprintk("--> %s\n", __func__);
dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
......@@ -7419,12 +7419,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
/*
* NFS4ERR_LAYOUTTRYLATER is a conflict with another client
* (or clients) writing to the same RAID stripe
*/
case -NFS4ERR_LAYOUTTRYLATER:
/*
* NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
* existing layout before getting a new one).
*/
case -NFS4ERR_RECALLCONFLICT:
timeo = rpc_get_timeout(task->tk_client);
giveup = lgp->args.timestamp + timeo;
if (time_after(giveup, jiffies))
task->tk_status = -NFS4ERR_DELAY;
now = jiffies;
if (time_after(giveup, now)) {
unsigned long delay;
/* Delay for:
* - Not less then NFS4_POLL_RETRY_MIN.
* - One last time a jiffie before we give up
* - exponential backoff (time_now minus start_attempt)
*/
delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
min((giveup - now - 1),
now - lgp->args.timestamp));
dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
__func__, delay);
rpc_delay(task, delay);
task->tk_status = 0;
rpc_restart_call_prepare(task);
goto out; /* Do not call nfs4_async_handle_error() */
}
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
......@@ -7780,10 +7806,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_BADLAYOUT: /* no layout */
case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
break;
case 0:
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
break;
default:
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
......@@ -7798,6 +7821,8 @@ static void nfs4_layoutcommit_release(void *calldata)
struct nfs4_layoutcommit_data *data = calldata;
pnfs_cleanup_layoutcommit(data);
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
put_rpccred(data->cred);
kfree(data);
}
......@@ -7920,7 +7945,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_NOTSUPP:
case -ENOTSUPP:
goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
......@@ -7954,7 +7979,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
......
......@@ -1071,7 +1071,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
/*
* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
* failed with a seqid incrementing error -
* see comments nfs_fs.h:seqid_mutating_error()
* see comments nfs4.h:seqid_mutating_error()
*/
static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
{
......@@ -1116,7 +1116,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
/*
* Increment the seqid if the LOCK/LOCKU succeeded, or
* failed with a seqid incrementing error -
* see comments nfs_fs.h:seqid_mutating_error()
* see comments nfs4.h:seqid_mutating_error()
*/
void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
{
......
......@@ -77,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret = nfs_write_inode(inode, wbc);
if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
int status;
bool sync = true;
if (wbc->sync_mode == WB_SYNC_NONE)
sync = false;
status = pnfs_layoutcommit_inode(inode, sync);
if (status < 0)
return status;
}
if (ret == 0)
ret = pnfs_layoutcommit_inode(inode,
wbc->sync_mode == WB_SYNC_ALL);
return ret;
}
......
......@@ -3097,7 +3097,8 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
return -EIO;
}
static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
int *nfs_retval)
{
__be32 *p;
uint32_t opnum;
......@@ -3107,19 +3108,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
if (unlikely(!p))
goto out_overflow;
opnum = be32_to_cpup(p++);
if (opnum != expected) {
dprintk("nfs: Server returned operation"
" %d but we issued a request for %d\n",
opnum, expected);
return -EIO;
}
if (unlikely(opnum != expected))
goto out_bad_operation;
nfserr = be32_to_cpup(p);
if (nfserr != NFS_OK)
return nfs4_stat_to_errno(nfserr);
return 0;
if (nfserr == NFS_OK)
*nfs_retval = 0;
else
*nfs_retval = nfs4_stat_to_errno(nfserr);
return true;
out_bad_operation:
dprintk("nfs: Server returned operation"
" %d but we issued a request for %d\n",
opnum, expected);
*nfs_retval = -EREMOTEIO;
return false;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
*nfs_retval = -EIO;
return false;
}
static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
{
int retval;
__decode_op_hdr(xdr, expected, &retval);
return retval;
}
/* Dummy routine */
......@@ -5001,11 +5015,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
uint32_t savewords, bmlen, i;
int status;
status = decode_op_hdr(xdr, OP_OPEN);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
if (!status)
status = decode_stateid(xdr, &res->stateid);
if (!__decode_op_hdr(xdr, OP_OPEN, &status))
return status;
nfs_increment_open_seqid(status, res->seqid);
if (status)
return status;
status = decode_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
......
......@@ -1790,6 +1790,15 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
static void pnfs_clear_layoutcommitting(struct inode *inode)
{
unsigned long *bitlock = &NFS_I(inode)->flags;
clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
smp_mb__after_clear_bit();
wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
}
/*
* There can be multiple RW segments.
*/
......@@ -1807,7 +1816,6 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
{
struct pnfs_layout_segment *lseg, *tmp;
unsigned long *bitlock = &NFS_I(inode)->flags;
/* Matched by references in pnfs_set_layoutcommit */
list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
......@@ -1815,9 +1823,7 @@ static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *lis
pnfs_put_lseg(lseg);
}
clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
smp_mb__after_clear_bit();
wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
pnfs_clear_layoutcommitting(inode);
}
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
......@@ -1881,43 +1887,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs4_layoutcommit_data *data;
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
int status = 0;
int status;
dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
if (!pnfs_layoutcommit_outstanding(inode))
return 0;
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
data = kzalloc(sizeof(*data), GFP_NOFS);
if (!data) {
status = -ENOMEM;
goto out;
}
if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
goto out_free;
dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
status = -EAGAIN;
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync) {
status = -EAGAIN;
goto out_free;
}
status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (!sync)
goto out;
status = wait_on_bit_lock(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
if (status)
goto out_free;
goto out;
}
INIT_LIST_HEAD(&data->lseg_list);
status = -ENOMEM;
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
data = kzalloc(sizeof(*data), GFP_NOFS);
if (!data)
goto clear_layoutcommitting;
status = 0;
spin_lock(&inode->i_lock);
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
spin_unlock(&inode->i_lock);
wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
goto out_free;
}
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
goto out_unlock;
INIT_LIST_HEAD(&data->lseg_list);
pnfs_list_write_lseg(inode, &data->lseg_list);
end_pos = nfsi->layout->plh_lwb;
......@@ -1940,8 +1940,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
out_free:
out_unlock:
spin_unlock(&inode->i_lock);
kfree(data);
clear_layoutcommitting:
pnfs_clear_layoutcommitting(inode);
goto out;
}
......
......@@ -359,6 +359,15 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode)
PNFS_LAYOUTRET_ON_SETATTR;
}
static inline bool
pnfs_layoutcommit_outstanding(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags) != 0 ||
test_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags) != 0;
}
static inline int pnfs_return_layout(struct inode *ino)
{
struct nfs_inode *nfsi = NFS_I(ino);
......@@ -515,6 +524,13 @@ pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
return false;
}
static inline bool
pnfs_layoutcommit_outstanding(struct inode *inode)
{
return false;
}
static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
{
return NULL;
......
......@@ -163,9 +163,9 @@ static void nfs_readpage_release(struct nfs_page *req)
unlock_page(req->wb_page);
dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
......@@ -228,11 +228,11 @@ int nfs_initiate_read(struct rpc_clnt *clnt,
/* Set up the initial task struct. */
NFS_PROTO(inode)->read_setup(data, &msg);
dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
"offset %llu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)NFS_FILEID(inode),
data->args.count,
(unsigned long long)data->args.offset);
......@@ -630,9 +630,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
unsigned long npages;
int ret = -ESTALE;
dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)NFS_FILEID(inode),
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
......
......@@ -922,19 +922,20 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
* extend the write to cover the entire page in order to avoid fragmentation
* inefficiencies.
*
* If the file is opened for synchronous writes or if we have a write delegation
* from the server then we can just skip the rest of the checks.
* If the file is opened for synchronous writes then we can just skip the rest
* of the checks.
*/
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
{
if (file->f_flags & O_DSYNC)
return 0;
if (!nfs_write_pageuptodate(page, inode))
return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1;
if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL ||
(inode->i_flock->fl_start == 0 &&
if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
inode->i_flock->fl_end == OFFSET_MAX &&
inode->i_flock->fl_type != F_RDLCK)))
inode->i_flock->fl_type != F_RDLCK))
return 1;
return 0;
}
......@@ -1013,10 +1014,10 @@ int nfs_initiate_write(struct rpc_clnt *clnt,
NFS_PROTO(inode)->write_setup(data, &msg);
dprintk("NFS: %5u initiated write call "
"(req %s/%lld, %u bytes @ offset %llu)\n",
"(req %s/%llu, %u bytes @ offset %llu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)NFS_FILEID(inode),
data->args.count,
(unsigned long long)data->args.offset);
......@@ -1606,9 +1607,9 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
nfs_list_remove_request(req);
nfs_clear_page_commit(req->wb_page);
dprintk("NFS: commit (%s/%lld %d@%lld)",
dprintk("NFS: commit (%s/%llu %d@%lld)",
req->wb_context->dentry->d_sb->s_id,
(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
......
......@@ -84,7 +84,8 @@ enum {
extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name);
extern void rpc_pipefs_init_net(struct net *net);
extern int rpc_pipefs_init_net(struct net *net);
extern void rpc_pipefs_exit_net(struct net *net);
extern struct super_block *rpc_get_sb_net(const struct net *net);
extern void rpc_put_sb_net(const struct net *net);
......@@ -130,5 +131,7 @@ extern int rpc_unlink(struct dentry *);
extern int register_rpc_pipefs(void);
extern void unregister_rpc_pipefs(void);
extern bool gssd_running(struct net *net);
#endif
#endif
......@@ -301,6 +301,7 @@ DECLARE_EVENT_CLASS(xs_socket_event_done,
DEFINE_RPC_SOCKET_EVENT(rpc_socket_state_change);
DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_connect);
DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_error);
DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection);
DEFINE_RPC_SOCKET_EVENT(rpc_socket_close);
DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown);
......
......@@ -536,8 +536,7 @@ static void warn_gssd(void)
unsigned long now = jiffies;
if (time_after(now, ratelimit)) {
printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
"Please check user daemon is running.\n");
pr_warn("RPC: AUTH_GSS upcall failed. Please check user daemon is running.\n");
ratelimit = now + 15*HZ;
}
}
......@@ -600,7 +599,6 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
struct rpc_pipe *pipe;
struct rpc_cred *cred = &gss_cred->gc_base;
struct gss_upcall_msg *gss_msg;
unsigned long timeout;
DEFINE_WAIT(wait);
int err;
......@@ -608,17 +606,16 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
__func__, from_kuid(&init_user_ns, cred->cr_uid));
retry:
err = 0;
/* Default timeout is 15s unless we know that gssd is not running */
timeout = 15 * HZ;
if (!sn->gssd_running)
timeout = HZ >> 2;
/* if gssd is down, just skip upcalling altogether */
if (!gssd_running(net)) {
warn_gssd();
return -EACCES;
}
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
err = wait_event_interruptible_timeout(pipe_version_waitqueue,
sn->pipe_version >= 0, timeout);
sn->pipe_version >= 0, 15 * HZ);
if (sn->pipe_version < 0) {
if (err == 0)
sn->gssd_running = 0;
warn_gssd();
err = -EACCES;
}
......
......@@ -1529,9 +1529,13 @@ call_refreshresult(struct rpc_task *task)
task->tk_action = call_refresh;
switch (status) {
case 0:
if (rpcauth_uptodatecred(task))
if (rpcauth_uptodatecred(task)) {
task->tk_action = call_allocate;
return;
return;
}
/* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred.
*/
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
case -EAGAIN:
......@@ -1729,6 +1733,7 @@ call_bind_status(struct rpc_task *task)
return;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
case -EHOSTUNREACH:
......@@ -1799,7 +1804,9 @@ call_connect_status(struct rpc_task *task)
return;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
if (RPC_IS_SOFTCONN(task))
......@@ -1902,6 +1909,7 @@ call_transmit_status(struct rpc_task *task)
break;
}
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
case -EPIPE:
rpc_task_force_reencode(task);
......@@ -2011,8 +2019,9 @@ call_status(struct rpc_task *task)
xprt_conditional_disconnect(req->rq_xprt,
req->rq_connect_cookie);
break;
case -ECONNRESET:
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
rpc_delay(task, 3*HZ);
case -EPIPE:
......
......@@ -14,6 +14,7 @@ struct sunrpc_net {
struct cache_detail *rsi_cache;
struct super_block *pipefs_sb;
struct rpc_pipe *gssd_dummy;
struct mutex pipefs_sb_lock;
struct list_head all_clients;
......@@ -32,8 +33,6 @@ struct sunrpc_net {
int pipe_version;
atomic_t pipe_users;
struct proc_dir_entry *use_gssp_proc;
unsigned int gssd_running;
};
extern int sunrpc_net_id;
......
......@@ -17,6 +17,7 @@
#include <linux/fsnotify.h>
#include <linux/kernel.h>
#include <linux/rcupdate.h>
#include <linux/utsname.h>
#include <asm/ioctls.h>
#include <linux/poll.h>
......@@ -38,7 +39,7 @@
#define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "")
static struct file_system_type rpc_pipe_fs_type;
static const struct rpc_pipe_ops gssd_dummy_pipe_ops;
static struct kmem_cache *rpc_inode_cachep __read_mostly;
......@@ -216,14 +217,11 @@ rpc_destroy_inode(struct inode *inode)
static int
rpc_pipe_open(struct inode *inode, struct file *filp)
{
struct net *net = inode->i_sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
int first_open;
int res = -ENXIO;
mutex_lock(&inode->i_mutex);
sn->gssd_running = 1;
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
......@@ -1159,6 +1157,7 @@ enum {
RPCAUTH_nfsd4_cb,
RPCAUTH_cache,
RPCAUTH_nfsd,
RPCAUTH_gssd,
RPCAUTH_RootEOF
};
......@@ -1195,6 +1194,10 @@ static const struct rpc_filelist files[] = {
.name = "nfsd",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_gssd] = {
.name = "gssd",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
};
/*
......@@ -1208,13 +1211,24 @@ struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
}
EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
void rpc_pipefs_init_net(struct net *net)
int rpc_pipefs_init_net(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
sn->gssd_dummy = rpc_mkpipe_data(&gssd_dummy_pipe_ops, 0);
if (IS_ERR(sn->gssd_dummy))
return PTR_ERR(sn->gssd_dummy);
mutex_init(&sn->pipefs_sb_lock);
sn->gssd_running = 1;
sn->pipe_version = -1;
return 0;
}
void rpc_pipefs_exit_net(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
rpc_destroy_pipe_data(sn->gssd_dummy);
}
/*
......@@ -1244,11 +1258,134 @@ void rpc_put_sb_net(const struct net *net)
}
EXPORT_SYMBOL_GPL(rpc_put_sb_net);
static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
[0] = {
.name = "clntXX",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
};
static ssize_t
dummy_downcall(struct file *filp, const char __user *src, size_t len)
{
return -EINVAL;
}
static const struct rpc_pipe_ops gssd_dummy_pipe_ops = {
.upcall = rpc_pipe_generic_upcall,
.downcall = dummy_downcall,
};
/*
* Here we present a bogus "info" file to keep rpc.gssd happy. We don't expect
* that it will ever use this info to handle an upcall, but rpc.gssd expects
* that this file will be there and have a certain format.
*/
static int
rpc_show_dummy_info(struct seq_file *m, void *v)
{
seq_printf(m, "RPC server: %s\n", utsname()->nodename);
seq_printf(m, "service: foo (1) version 0\n");
seq_printf(m, "address: 127.0.0.1\n");
seq_printf(m, "protocol: tcp\n");
seq_printf(m, "port: 0\n");
return 0;
}
static int
rpc_dummy_info_open(struct inode *inode, struct file *file)
{
return single_open(file, rpc_show_dummy_info, NULL);
}
static const struct file_operations rpc_dummy_info_operations = {
.owner = THIS_MODULE,
.open = rpc_dummy_info_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static const struct rpc_filelist gssd_dummy_info_file[] = {
[0] = {
.name = "info",
.i_fop = &rpc_dummy_info_operations,
.mode = S_IFREG | S_IRUSR,
},
};
/**
* rpc_gssd_dummy_populate - create a dummy gssd pipe
* @root: root of the rpc_pipefs filesystem
* @pipe_data: pipe data created when netns is initialized
*
* Create a dummy set of directories and a pipe that gssd can hold open to
* indicate that it is up and running.
*/
static struct dentry *
rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
{
int ret = 0;
struct dentry *gssd_dentry;
struct dentry *clnt_dentry = NULL;
struct dentry *pipe_dentry = NULL;
struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name,
strlen(files[RPCAUTH_gssd].name));
/* We should never get this far if "gssd" doesn't exist */
gssd_dentry = d_hash_and_lookup(root, &q);
if (!gssd_dentry)
return ERR_PTR(-ENOENT);
ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL);
if (ret) {
pipe_dentry = ERR_PTR(ret);
goto out;
}
q.name = gssd_dummy_clnt_dir[0].name;
q.len = strlen(gssd_dummy_clnt_dir[0].name);
clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
if (!clnt_dentry) {
pipe_dentry = ERR_PTR(-ENOENT);
goto out;
}
ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL);
if (ret) {
__rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(ret);
goto out;
}
pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
if (IS_ERR(pipe_dentry)) {
__rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1);
__rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
}
out:
dput(clnt_dentry);
dput(gssd_dentry);
return pipe_dentry;
}
static void
rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
{
struct dentry *clnt_dir = pipe_dentry->d_parent;
struct dentry *gssd_dir = clnt_dir->d_parent;
__rpc_rmpipe(clnt_dir->d_inode, pipe_dentry);
__rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
__rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
dput(pipe_dentry);
}
static int
rpc_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct dentry *root;
struct dentry *root, *gssd_dentry;
struct net *net = data;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
......@@ -1266,6 +1403,13 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
if (IS_ERR(gssd_dentry)) {
__rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
return PTR_ERR(gssd_dentry);
}
dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
net, NET_NAME(net));
mutex_lock(&sn->pipefs_sb_lock);
......@@ -1280,6 +1424,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return 0;
err_depopulate:
rpc_gssd_dummy_depopulate(gssd_dentry);
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
......@@ -1289,6 +1434,16 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
bool
gssd_running(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe = sn->gssd_dummy;
return pipe->nreaders || pipe->nwriters;
}
EXPORT_SYMBOL_GPL(gssd_running);
static struct dentry *
rpc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
......
......@@ -44,12 +44,17 @@ static __net_init int sunrpc_init_net(struct net *net)
if (err)
goto err_unixgid;
rpc_pipefs_init_net(net);
err = rpc_pipefs_init_net(net);
if (err)
goto err_pipefs;
INIT_LIST_HEAD(&sn->all_clients);
spin_lock_init(&sn->rpc_client_lock);
spin_lock_init(&sn->rpcb_clnt_lock);
return 0;
err_pipefs:
unix_gid_cache_destroy(net);
err_unixgid:
ip_map_cache_destroy(net);
err_ipmap:
......@@ -60,6 +65,7 @@ static __net_init int sunrpc_init_net(struct net *net)
static __net_exit void sunrpc_exit_net(struct net *net)
{
rpc_pipefs_exit_net(net);
unix_gid_cache_destroy(net);
ip_map_cache_destroy(net);
rpc_proc_exit(net);
......
......@@ -749,6 +749,11 @@ static void xprt_connect_status(struct rpc_task *task)
}
switch (task->tk_status) {
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
......
......@@ -257,6 +257,7 @@ struct sock_xprt {
void (*old_data_ready)(struct sock *, int);
void (*old_state_change)(struct sock *);
void (*old_write_space)(struct sock *);
void (*old_error_report)(struct sock *);
};
/*
......@@ -274,6 +275,11 @@ struct sock_xprt {
*/
#define TCP_RPC_REPLY (1UL << 6)
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
{
return (struct rpc_xprt *) sk->sk_user_data;
}
static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
{
return (struct sockaddr *) &xprt->addr;
......@@ -799,6 +805,7 @@ static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
transport->old_data_ready = sk->sk_data_ready;
transport->old_state_change = sk->sk_state_change;
transport->old_write_space = sk->sk_write_space;
transport->old_error_report = sk->sk_error_report;
}
static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
......@@ -806,6 +813,34 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
sk->sk_data_ready = transport->old_data_ready;
sk->sk_state_change = transport->old_state_change;
sk->sk_write_space = transport->old_write_space;
sk->sk_error_report = transport->old_error_report;
}
/**
* xs_error_report - callback to handle TCP socket state errors
* @sk: socket
*
* Note: we don't call sock_error() since there may be a rpc_task
* using the socket, and so we don't want to clear sk->sk_err.
*/
static void xs_error_report(struct sock *sk)
{
struct rpc_xprt *xprt;
int err;
read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
goto out;
err = -sk->sk_err;
if (err == 0)
goto out;
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
xprt_wake_pending_tasks(xprt, err);
out:
read_unlock_bh(&sk->sk_callback_lock);
}
static void xs_reset_transport(struct sock_xprt *transport)
......@@ -885,11 +920,6 @@ static void xs_destroy(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
{
return (struct rpc_xprt *) sk->sk_user_data;
}
static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
struct xdr_skb_reader desc = {
......@@ -1869,6 +1899,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_local_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
xprt_clear_connected(xprt);
......@@ -2146,6 +2177,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册