提交 759b2656 编写于 作者: L Linus Torvalds

Merge tag 'nfsd-4.10' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "The one new feature is support for a new NFSv4.2 mode_umask attribute
  that makes ACL inheritance a little more useful in environments that
  default to restrictive umasks. Requires client-side support, also on
  its way for 4.10.

  Other than that, miscellaneous smaller fixes and cleanup, especially
  to the server rdma code"

[ The client side of the umask attribute was merged yesterday ]

* tag 'nfsd-4.10' of git://linux-nfs.org/~bfields/linux:
  nfsd: add support for the umask attribute
  sunrpc: use DEFINE_SPINLOCK()
  svcrdma: Further clean-up of svc_rdma_get_inv_rkey()
  svcrdma: Break up dprintk format in svc_rdma_accept()
  svcrdma: Remove unused variable in rdma_copy_tail()
  svcrdma: Remove unused variables in xprt_rdma_bc_allocate()
  svcrdma: Remove svc_rdma_op_ctxt::wc_status
  svcrdma: Remove DMA map accounting
  svcrdma: Remove BH-disabled spin locking in svc_rdma_send()
  svcrdma: Renovate sendto chunk list parsing
  svcauth_gss: Close connection when dropping an incoming message
  svcrdma: Clear xpt_bc_xps in xprt_setup_rdma_bc() error exit arm
  nfsd: constify reply_cache_stats_operations structure
  nfsd: update workqueue creation
  sunrpc: GFP_KERNEL should be GFP_NOFS in crypto code
  nfsd: catch errors in decode_fattr earlier
  nfsd: clean up supported attribute handling
  nfsd: fix error handling for clients that fail to return the layout
  nfsd: more robust allocation failure handling in nfsd_reply_cache_init
......@@ -1061,7 +1061,7 @@ static const struct rpc_call_ops nfsd4_cb_ops = {
int nfsd4_create_callback_queue(void)
{
callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
if (!callback_wq)
return -ENOMEM;
return 0;
......
......@@ -686,10 +686,6 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
return 0;
}
/* Fallthrough */
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
return 1;
default:
/*
* Unknown error or non-responding client, we'll need to fence.
......@@ -702,6 +698,10 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
else
nfsd4_cb_layout_fail(ls);
return -1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
return 1;
}
}
......
......@@ -96,33 +96,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
/*
* Check about attributes are supported by the NFSv4 server or not.
* According to spec, unsupported attributes return ERR_ATTRNOTSUPP.
*/
if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) ||
(bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) ||
(bmval[2] & ~nfsd_suppattrs2(cstate->minorversion)))
if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp;
/*
* Check FATTR4_WORD0_ACL can be supported
* in current environment or not.
*/
if (bmval[0] & FATTR4_WORD0_ACL) {
if (!IS_POSIXACL(d_inode(dentry)))
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp;
}
/*
* According to spec, read-only attributes return ERR_INVAL.
*/
if (writable) {
if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
(bmval[2] & ~writable[2]))
if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
(bmval[1] & FATTR4_WORD1_MODE))
return nfserr_inval;
}
return nfs_ok;
}
......@@ -695,9 +677,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
return nfserr_inval;
getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion);
getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
getattr->ga_bmval[0] &= nfsd_suppattrs[cstate->minorversion][0];
getattr->ga_bmval[1] &= nfsd_suppattrs[cstate->minorversion][1];
getattr->ga_bmval[2] &= nfsd_suppattrs[cstate->minorversion][2];
getattr->ga_fhp = &cstate->current_fh;
return nfs_ok;
......@@ -799,9 +781,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
return nfserr_inval;
readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion);
readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
readdir->rd_bmval[0] &= nfsd_suppattrs[cstate->minorversion][0];
readdir->rd_bmval[1] &= nfsd_suppattrs[cstate->minorversion][1];
readdir->rd_bmval[2] &= nfsd_suppattrs[cstate->minorversion][2];
if ((cookie == 1) || (cookie == 2) ||
(cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
......
......@@ -33,6 +33,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
......@@ -57,6 +58,20 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2},
{NFSD4_1_SUPPORTED_ATTRS_WORD0,
NFSD4_1_SUPPORTED_ATTRS_WORD1,
NFSD4_1_SUPPORTED_ATTRS_WORD2},
{NFSD4_1_SUPPORTED_ATTRS_WORD0,
NFSD4_1_SUPPORTED_ATTRS_WORD1,
NFSD4_2_SUPPORTED_ATTRS_WORD2},
};
/*
* As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
* directory in order to indicate to the client that a filesystem boundary is present
......@@ -285,7 +300,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
static __be32
nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
struct iattr *iattr, struct nfs4_acl **acl,
struct xdr_netobj *label)
struct xdr_netobj *label, int *umask)
{
int expected_len, len = 0;
u32 dummy32;
......@@ -296,6 +311,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if ((status = nfsd4_decode_bitmap(argp, bmval)))
return status;
if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
|| bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
|| bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) {
if (nfsd_attrs_supported(argp->minorversion, bmval))
return nfserr_inval;
return nfserr_attrnotsupp;
}
READ_BUF(4);
expected_len = be32_to_cpup(p++);
......@@ -435,12 +458,18 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
return nfserr_jukebox;
}
#endif
if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
|| bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
|| bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2)
READ_BUF(expected_len - len);
else if (len != expected_len)
if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
if (!umask)
goto xdr_error;
READ_BUF(8);
len += 8;
dummy32 = be32_to_cpup(p++);
iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO);
dummy32 = be32_to_cpup(p++);
*umask = dummy32 & S_IRWXUGO;
iattr->ia_valid |= ATTR_MODE;
}
if (len != expected_len)
goto xdr_error;
DECODE_TAIL;
......@@ -634,7 +663,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
return status;
status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
&create->cr_acl, &create->cr_label);
&create->cr_acl, &create->cr_label,
&current->fs->umask);
if (status)
goto out;
......@@ -879,13 +909,15 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
case NFS4_OPEN_NOCREATE:
break;
case NFS4_OPEN_CREATE:
current->fs->umask = 0;
READ_BUF(4);
open->op_createmode = be32_to_cpup(p++);
switch (open->op_createmode) {
case NFS4_CREATE_UNCHECKED:
case NFS4_CREATE_GUARDED:
status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl, &open->op_label);
&open->op_iattr, &open->op_acl, &open->op_label,
&current->fs->umask);
if (status)
goto out;
break;
......@@ -899,7 +931,8 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
READ_BUF(NFS4_VERIFIER_SIZE);
COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
status = nfsd4_decode_fattr(argp, open->op_bmval,
&open->op_iattr, &open->op_acl, &open->op_label);
&open->op_iattr, &open->op_acl, &open->op_label,
&current->fs->umask);
if (status)
goto out;
break;
......@@ -1136,7 +1169,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
if (status)
return status;
return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
&setattr->sa_acl, &setattr->sa_label);
&setattr->sa_acl, &setattr->sa_label, NULL);
}
static __be32
......@@ -2340,9 +2373,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion));
BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion));
BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
if (exp->ex_fslocs.migrated) {
status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err);
......@@ -2409,29 +2440,27 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p++; /* to be backfilled later */
if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
u32 word0 = nfsd_suppattrs0(minorversion);
u32 word1 = nfsd_suppattrs1(minorversion);
u32 word2 = nfsd_suppattrs2(minorversion);
u32 *supp = nfsd_suppattrs[minorversion];
if (!IS_POSIXACL(dentry->d_inode))
word0 &= ~FATTR4_WORD0_ACL;
supp[0] &= ~FATTR4_WORD0_ACL;
if (!contextsupport)
word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
if (!word2) {
supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
if (!supp[2]) {
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(2);
*p++ = cpu_to_be32(word0);
*p++ = cpu_to_be32(word1);
*p++ = cpu_to_be32(supp[0]);
*p++ = cpu_to_be32(supp[1]);
} else {
p = xdr_reserve_space(xdr, 16);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(3);
*p++ = cpu_to_be32(word0);
*p++ = cpu_to_be32(word1);
*p++ = cpu_to_be32(word2);
*p++ = cpu_to_be32(supp[0]);
*p++ = cpu_to_be32(supp[1]);
*p++ = cpu_to_be32(supp[2]);
}
}
if (bmval0 & FATTR4_WORD0_TYPE) {
......
......@@ -9,6 +9,7 @@
*/
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
#include <linux/highmem.h>
#include <linux/log2.h>
......@@ -174,8 +175,12 @@ int nfsd_reply_cache_init(void)
goto out_nomem;
drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
if (!drc_hashtbl) {
drc_hashtbl = vzalloc(hashsize * sizeof(*drc_hashtbl));
if (!drc_hashtbl)
goto out_nomem;
}
for (i = 0; i < hashsize; i++) {
INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
spin_lock_init(&drc_hashtbl[i].cache_lock);
......@@ -204,7 +209,7 @@ void nfsd_reply_cache_shutdown(void)
}
}
kfree (drc_hashtbl);
kvfree(drc_hashtbl);
drc_hashtbl = NULL;
drc_hashsize = 0;
......
......@@ -217,7 +217,7 @@ static const struct file_operations pool_stats_operations = {
.release = nfsd_pool_stats_release,
};
static struct file_operations reply_cache_stats_operations = {
static const struct file_operations reply_cache_stats_operations = {
.open = nfsd_reply_cache_stats_open,
.read = seq_read,
.llseek = seq_lseek,
......
......@@ -359,44 +359,46 @@ void nfsd_lockd_shutdown(void);
#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
static inline u32 nfsd_suppattrs0(u32 minorversion)
{
return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
: NFSD4_SUPPORTED_ATTRS_WORD0;
}
extern u32 nfsd_suppattrs[3][3];
static inline u32 nfsd_suppattrs1(u32 minorversion)
static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
{
return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
: NFSD4_SUPPORTED_ATTRS_WORD1;
return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2]));
}
static inline u32 nfsd_suppattrs2(u32 minorversion)
static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
{
switch (minorversion) {
default: return NFSD4_2_SUPPORTED_ATTRS_WORD2;
case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2;
case 0: return NFSD4_SUPPORTED_ATTRS_WORD2;
}
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
}
/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
#define NFSD_WRITEONLY_ATTRS_WORD1 \
(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
/*
* These are the only attrs allowed in CREATE/OPEN/SETATTR. Don't add
* a writeable attribute here without also adding code to parse it to
* nfsd4_decode_fattr().
*/
#define NFSD_WRITEABLE_ATTRS_WORD0 \
(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
#define NFSD_WRITEABLE_ATTRS_WORD1 \
(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL
#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
FATTR4_WORD2_SECURITY_LABEL
#else
#define NFSD_WRITEABLE_ATTRS_WORD2 0
#define MAYBE_FATTR4_WORD2_SECURITY_LABEL 0
#endif
#define NFSD_WRITEABLE_ATTRS_WORD2 \
(FATTR4_WORD2_MODE_UMASK \
| MAYBE_FATTR4_WORD2_SECURITY_LABEL)
#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
NFSD_WRITEABLE_ATTRS_WORD0
......
......@@ -661,8 +661,8 @@ nfsd(void *vrqstp)
mutex_lock(&nfsd_mutex);
/* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
* with the init process. We need to create files with the
* umask as defined by the client instead of init's umask. */
if (unshare_fs_struct() < 0) {
printk("Unable to start nfsd thread: out of memory\n");
goto out;
......
......@@ -79,7 +79,6 @@ struct svc_rdma_op_ctxt {
struct ib_cqe reg_cqe;
struct ib_cqe inv_cqe;
struct list_head dto_q;
enum ib_wc_status wc_status;
u32 byte_len;
u32 position;
struct svcxprt_rdma *xprt;
......@@ -139,7 +138,7 @@ struct svcxprt_rdma {
int sc_max_sge_rd; /* max sge for read target */
bool sc_snd_w_inv; /* OK to use Send With Invalidate */
atomic_t sc_sq_count; /* Number of SQ WR on queue */
atomic_t sc_sq_avail; /* SQEs ready to be consumed */
unsigned int sc_sq_depth; /* Depth of SQ */
unsigned int sc_rq_depth; /* Depth of RQ */
u32 sc_max_requests; /* Forward credits */
......@@ -148,7 +147,6 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
atomic_t sc_dma_used;
spinlock_t sc_ctxt_lock;
struct list_head sc_ctxts;
int sc_ctxt_used;
......@@ -200,7 +198,6 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
struct svc_rdma_op_ctxt *ctxt)
{
ctxt->mapped_sges++;
atomic_inc(&rdma->sc_dma_used);
}
/* svc_rdma_backchannel.c */
......@@ -236,8 +233,6 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
struct svc_rdma_req_map *, bool);
extern int svc_rdma_sendto(struct svc_rqst *);
extern struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *);
extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
int);
......
......@@ -200,7 +200,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(hmac_md5))
goto out_free_md5;
req = ahash_request_alloc(md5, GFP_KERNEL);
req = ahash_request_alloc(md5, GFP_NOFS);
if (!req)
goto out_free_hmac_md5;
......@@ -230,7 +230,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
goto out;
ahash_request_free(req);
req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
req = ahash_request_alloc(hmac_md5, GFP_NOFS);
if (!req)
goto out_free_hmac_md5;
......@@ -299,7 +299,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(tfm))
goto out_free_cksum;
req = ahash_request_alloc(tfm, GFP_KERNEL);
req = ahash_request_alloc(tfm, GFP_NOFS);
if (!req)
goto out_free_ahash;
......@@ -397,7 +397,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
goto out_free_cksum;
checksumlen = crypto_ahash_digestsize(tfm);
req = ahash_request_alloc(tfm, GFP_KERNEL);
req = ahash_request_alloc(tfm, GFP_NOFS);
if (!req)
goto out_free_ahash;
......@@ -963,7 +963,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
}
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
GFP_KERNEL);
GFP_NOFS);
if (!desc) {
dprintk("%s: failed to allocate shash descriptor for '%s'\n",
__func__, kctx->gk5e->cksum_name);
......@@ -1030,7 +1030,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
}
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
GFP_KERNEL);
GFP_NOFS);
if (!desc) {
dprintk("%s: failed to allocate shash descriptor for '%s'\n",
__func__, kctx->gk5e->cksum_name);
......
......@@ -451,8 +451,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
goto out_err_free_hmac;
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
GFP_KERNEL);
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), GFP_NOFS);
if (!desc) {
dprintk("%s: failed to allocate hash descriptor for '%s'\n",
__func__, ctx->gk5e->cksum_name);
......
......@@ -1548,7 +1548,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
ret = SVC_COMPLETE;
goto out;
drop:
ret = SVC_DROP;
ret = SVC_CLOSE;
out:
if (rsci)
cache_put(&rsci->h, sn->rsc_cache);
......
......@@ -1155,8 +1155,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
goto close;
case SVC_DROP:
goto dropit;
case SVC_COMPLETE:
......@@ -1246,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
sendit:
if (svc_authorise(rqstp))
goto dropit;
goto close;
return 1; /* Caller can now send it */
dropit:
......@@ -1254,11 +1253,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
dprintk("svc: svc_process dropit\n");
return 0;
close:
if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
err_short_len:
svc_printk(rqstp, "short len %Zd, dropping request\n",
argv->iov_len);
goto dropit; /* drop request */
goto close;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
......
......@@ -124,8 +124,7 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister);
#define DN_HASHMAX (1<<DN_HASHBITS)
static struct hlist_head auth_domain_table[DN_HASHMAX];
static spinlock_t auth_domain_lock =
__SPIN_LOCK_UNLOCKED(auth_domain_lock);
static DEFINE_SPINLOCK(auth_domain_lock);
void auth_domain_put(struct auth_domain *dom)
{
......
......@@ -164,13 +164,9 @@ static int
xprt_rdma_bc_allocate(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
size_t size = rqst->rq_callsize;
struct svcxprt_rdma *rdma;
struct page *page;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
if (size > PAGE_SIZE) {
WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
size);
......@@ -359,6 +355,7 @@ xprt_setup_rdma_bc(struct xprt_create *args)
out_fail:
xprt_rdma_free_addresses(xprt);
args->bc_xprt->xpt_bc_xprt = NULL;
args->bc_xprt->xpt_bc_xps = NULL;
xprt_put(xprt);
xprt_free(xprt);
return ERR_PTR(-EINVAL);
......
......@@ -279,7 +279,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
frmr->sg);
return -ENOMEM;
}
atomic_inc(&xprt->sc_dma_used);
n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
if (unlikely(n != frmr->sg_nents)) {
......@@ -374,9 +373,7 @@ rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
u32 position, u32 byte_count, u32 page_offset, int page_no)
{
char *srcp, *destp;
int ret;
ret = 0;
srcp = head->arg.head[0].iov_base + position;
byte_count = head->arg.head[0].iov_len - position;
if (byte_count > PAGE_SIZE) {
......@@ -415,6 +412,20 @@ rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
return 1;
}
/* Returns the address of the first read chunk or <nul> if no read chunk
* is present
*/
static struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_read_chunk *ch =
(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
if (ch->rc_discrim == xdr_zero)
return NULL;
return ch;
}
static int rdma_read_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
......@@ -627,8 +638,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto defer;
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
ctxt, rdma_xprt, rqstp, ctxt->wc_status);
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
......
......@@ -153,76 +153,35 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
return dma_addr;
}
/* Returns the address of the first read chunk or <nul> if no read chunk
* is present
/* Parse the RPC Call's transport header.
*/
struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
struct rpcrdma_write_array **write,
struct rpcrdma_write_array **reply)
{
struct rpcrdma_read_chunk *ch =
(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
__be32 *p;
if (ch->rc_discrim == xdr_zero)
return NULL;
return ch;
}
/* Returns the address of the first read write array element or <nul>
* if no write array list is present
*/
static struct rpcrdma_write_array *
svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
{
if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
rmsgp->rm_body.rm_chunks[1] == xdr_zero)
return NULL;
return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
}
/* Returns the address of the first reply array element or <nul> if no
* reply array is present
*/
static struct rpcrdma_write_array *
svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
struct rpcrdma_write_array *wr_ary)
{
struct rpcrdma_read_chunk *rch;
struct rpcrdma_write_array *rp_ary;
/* XXX: Need to fix when reply chunk may occur with read list
* and/or write list.
*/
if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
rmsgp->rm_body.rm_chunks[1] != xdr_zero)
return NULL;
rch = svc_rdma_get_read_chunk(rmsgp);
if (rch) {
while (rch->rc_discrim != xdr_zero)
rch++;
/* The reply chunk follows an empty write array located
* at 'rc_position' here. The reply array is at rc_target.
*/
rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
goto found_it;
}
p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
if (wr_ary) {
int chunk = be32_to_cpu(wr_ary->wc_nchunks);
/* Read list */
while (*p++ != xdr_zero)
p += 5;
rp_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[chunk].wc_target.rs_length;
goto found_it;
/* Write list */
if (*p != xdr_zero) {
*write = (struct rpcrdma_write_array *)p;
while (*p++ != xdr_zero)
p += 1 + be32_to_cpu(*p) * 4;
} else {
*write = NULL;
p++;
}
/* No read list, no write list */
rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];
found_it:
if (rp_ary->wc_discrim == xdr_zero)
return NULL;
return rp_ary;
/* Reply chunk */
if (*p != xdr_zero)
*reply = (struct rpcrdma_write_array *)p;
else
*reply = NULL;
}
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
......@@ -240,31 +199,22 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
{
struct rpcrdma_read_chunk *rd_ary;
struct rpcrdma_segment *arg_ch;
u32 inv_rkey;
inv_rkey = 0;
rd_ary = svc_rdma_get_read_chunk(rdma_argp);
if (rd_ary) {
inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
goto out;
}
rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
if (rd_ary->rc_discrim != xdr_zero)
return be32_to_cpu(rd_ary->rc_target.rs_handle);
if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
arg_ch = &wr_ary->wc_array[0].wc_target;
inv_rkey = be32_to_cpu(arg_ch->rs_handle);
goto out;
return be32_to_cpu(arg_ch->rs_handle);
}
if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
arg_ch = &rp_ary->wc_array[0].wc_target;
inv_rkey = be32_to_cpu(arg_ch->rs_handle);
goto out;
return be32_to_cpu(arg_ch->rs_handle);
}
out:
dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
return inv_rkey;
return 0;
}
/* Assumptions:
......@@ -622,8 +572,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
* places this at the start of page 0.
*/
rdma_argp = page_address(rqstp->rq_pages[0]);
wr_ary = svc_rdma_get_write_array(rdma_argp);
rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
inv_rkey = 0;
if (rdma->sc_snd_w_inv)
......@@ -636,7 +585,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
/* Create the RDMA response header. xprt->xpt_mutex,
* acquired in svc_send(), serializes RPC replies. The
* code path below that inserts the credit grant value
* into each transport header runs only inside this
* critical section.
*/
ret = -ENOMEM;
res_page = alloc_page(GFP_KERNEL);
if (!res_page)
......
......@@ -41,6 +41,7 @@
*/
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/interrupt.h>
......@@ -226,25 +227,22 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
struct svcxprt_rdma *xprt = ctxt->xprt;
struct ib_device *device = xprt->sc_cm_id->device;
u32 lkey = xprt->sc_pd->local_dma_lkey;
unsigned int i, count;
unsigned int i;
for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
for (i = 0; i < ctxt->mapped_sges; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
* the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
if (ctxt->sge[i].lkey == lkey) {
count++;
if (ctxt->sge[i].lkey == lkey)
ib_dma_unmap_page(device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
}
}
ctxt->mapped_sges = 0;
atomic_sub(count, &xprt->sc_dma_used);
}
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
......@@ -398,7 +396,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
ctxt->wc_status = wc->status;
svc_rdma_unmap_dma(ctxt);
if (wc->status != IB_WC_SUCCESS)
......@@ -436,7 +433,7 @@ static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
goto err;
out:
atomic_dec(&xprt->sc_sq_count);
atomic_inc(&xprt->sc_sq_avail);
wake_up(&xprt->sc_send_wait);
return;
......@@ -946,7 +943,6 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) {
ib_dma_unmap_sg(rdma->sc_cm_id->device,
frmr->sg, frmr->sg_nents, frmr->direction);
atomic_dec(&rdma->sc_dma_used);
spin_lock_bh(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
......@@ -973,6 +969,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
struct sockaddr *sap;
unsigned int i;
int ret = 0;
......@@ -1010,6 +1007,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_rq_depth = newxprt->sc_max_requests +
newxprt->sc_max_bc_requests;
newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
......@@ -1052,18 +1050,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = newxprt->sc_sq_cq;
qp_attr.recv_cq = newxprt->sc_rq_cq;
dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n"
" cm_id->device=%p, sc_pd->device=%p\n"
" cap.max_send_wr = %d\n"
" cap.max_recv_wr = %d\n"
" cap.max_send_sge = %d\n"
" cap.max_recv_sge = %d\n",
newxprt->sc_cm_id, newxprt->sc_pd,
dev, newxprt->sc_pd->device,
qp_attr.cap.max_send_wr,
qp_attr.cap.max_recv_wr,
qp_attr.cap.max_send_sge,
qp_attr.cap.max_recv_sge);
dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
newxprt->sc_cm_id, newxprt->sc_pd);
dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
......@@ -1146,31 +1138,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
}
dprintk("svcrdma: new connection %p accepted with the following "
"attributes:\n"
" local_ip : %pI4\n"
" local_port : %d\n"
" remote_ip : %pI4\n"
" remote_port : %d\n"
" max_sge : %d\n"
" max_sge_rd : %d\n"
" sq_depth : %d\n"
" max_requests : %d\n"
" ord : %d\n",
newxprt,
&((struct sockaddr_in *)&newxprt->sc_cm_id->
route.addr.src_addr)->sin_addr.s_addr,
ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
route.addr.src_addr)->sin_port),
&((struct sockaddr_in *)&newxprt->sc_cm_id->
route.addr.dst_addr)->sin_addr.s_addr,
ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
route.addr.dst_addr)->sin_port),
newxprt->sc_max_sge,
newxprt->sc_max_sge_rd,
newxprt->sc_sq_depth,
newxprt->sc_max_requests,
newxprt->sc_ord);
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
dprintk(" max_sge_rd : %d\n", newxprt->sc_max_sge_rd);
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", newxprt->sc_ord);
return &newxprt->sc_xprt;
......@@ -1257,9 +1234,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n",
rdma->sc_ctxt_used);
if (atomic_read(&rdma->sc_dma_used) != 0)
pr_err("svcrdma: dma still in use? (%d)\n",
atomic_read(&rdma->sc_dma_used));
/* Final put of backchannel client transport */
if (xprt->xpt_bc_xprt) {
......@@ -1339,15 +1313,13 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
/* If the SQ is full, wait until an SQ entry is available */
while (1) {
spin_lock_bh(&xprt->sc_lock);
if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
spin_unlock_bh(&xprt->sc_lock);
if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) {
atomic_inc(&rdma_stat_sq_starve);
/* Wait until SQ WR available if SQ still full */
atomic_add(wr_count, &xprt->sc_sq_avail);
wait_event(xprt->sc_send_wait,
atomic_read(&xprt->sc_sq_count) <
xprt->sc_sq_depth);
atomic_read(&xprt->sc_sq_avail) > wr_count);
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
return -ENOTCONN;
continue;
......@@ -1357,21 +1329,17 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
svc_xprt_get(&xprt->sc_xprt);
/* Bump used SQ WR count and post */
atomic_add(wr_count, &xprt->sc_sq_count);
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
if (ret) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
atomic_sub(wr_count, &xprt->sc_sq_count);
for (i = 0; i < wr_count; i ++)
svc_xprt_put(&xprt->sc_xprt);
dprintk("svcrdma: failed to post SQ WR rc=%d, "
"sc_sq_count=%d, sc_sq_depth=%d\n",
ret, atomic_read(&xprt->sc_sq_count),
dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret);
dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n",
atomic_read(&xprt->sc_sq_avail),
xprt->sc_sq_depth);
}
spin_unlock_bh(&xprt->sc_lock);
if (ret)
wake_up(&xprt->sc_send_wait);
}
break;
}
return ret;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册