提交 a87e84b5 编写于 作者: L Linus Torvalds

Merge branch 'for-2.6.32' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.32' of git://linux-nfs.org/~bfields/linux: (68 commits)
  nfsd4: nfsv4 clients should cross mountpoints
  nfsd: revise 4.1 status documentation
  sunrpc/cache: avoid variable over-loading in cache_defer_req
  sunrpc/cache: use list_del_init for the list_head entries in cache_deferred_req
  nfsd: return success for non-NFS4 nfs4_state_start
  nfsd41: Refactor create_client()
  nfsd41: modify nfsd4.1 backchannel to use new xprt class
  nfsd41: Backchannel: Implement cb_recall over NFSv4.1
  nfsd41: Backchannel: cb_sequence callback
  nfsd41: Backchannel: Setup sequence information
  nfsd41: Backchannel: Server backchannel RPC wait queue
  nfsd41: Backchannel: Add sequence arguments to callback RPC arguments
  nfsd41: Backchannel: callback infrastructure
  nfsd4: use common rpc_cred for all callbacks
  nfsd4: allow nfs4 state startup to fail
  SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous
  nfsd4: fix null dereference creating nfsv4 callback client
  nfsd4: fix whitespace in NFSPROC4_CLNT_CB_NULL definition
  nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel
  sunrpc/cache: simplify cache_fresh_locked and cache_fresh_unlocked.
  ...
......@@ -11,6 +11,11 @@ the /proc/fs/nfsd/versions control file. Note that to write this
control file, the nfsd service must be taken down. Use your user-mode
nfs-utils to set this up; see rpc.nfsd(8)
(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
"-4", respectively. Therefore, code meant to work on both new and old
kernels must turn 4.1 on or off *before* turning support for version 4
on or off; rpc.nfsd does this correctly.)
The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
on the latest NFSv4.1 Internet Draft:
http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
......@@ -25,6 +30,49 @@ are still under development out of tree.
See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
for more information.
The current implementation is intended for developers only: while it
does support ordinary file operations on clients we have tested against
(including the linux client), it is incomplete in ways which may limit
features unexpectedly, cause known bugs in rare cases, or cause
interoperability problems with future clients. Known issues:
- gss support is questionable: currently mounts with kerberos
from a linux client are possible, but we aren't really
conformant with the spec (for example, we don't use kerberos
on the backchannel correctly).
- no trunking support: no clients currently take advantage of
trunking, but this is a mandatory failure, and its use is
recommended to clients in a number of places. (E.g. to ensure
timely renewal in case an existing connection's retry timeouts
have gotten too long; see section 8.3 of the draft.)
Therefore, lack of this feature may cause future clients to
fail.
- Incomplete backchannel support: incomplete backchannel gss
support and no support for BACKCHANNEL_CTL mean that
callbacks (hence delegations and layouts) may not be
available and clients confused by the incomplete
implementation may fail.
- Server reboot recovery is unsupported; if the server reboots,
clients may fail.
- We do not support SSV, which provides security for shared
client-server state (thus preventing unauthorized tampering
with locks and opens, for example). It is mandatory for
servers to support this, though no clients use it yet.
- Mandatory operations which we do not support, such as
DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
TEST_STATEID, are not currently used by clients, but will be
(and the spec recommends their uses in common cases), and
clients should not be expected to know how to recover from the
case where they are not supported. This will eventually cause
interoperability failures.
In addition, some limitations are inherited from the current NFSv4
implementation:
- Incomplete delegation enforcement: if a file is renamed or
unlinked, a client holding a delegation may continue to
indefinitely allow opens of the file under the old name.
The table below, taken from the NFSv4.1 document, lists
the operations that are mandatory to implement (REQ), optional
(OPT), and NFSv4.0 operations that are required not to implement (MNI)
......@@ -142,6 +190,12 @@ NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
Implementation notes:
DELEGPURGE:
* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
persist across client reboots). Thus we need not implement this for
now.
EXCHANGE_ID:
* only SP4_NONE state protection supported
* implementation ids are ignored
......
......@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
*/
if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
continue;
if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
continue;
if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
continue;
......
......@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
*/
chain = &nlm_hosts[nlm_hash_address(ni->sap)];
hlist_for_each_entry(host, pos, chain, h_hash) {
if (!nlm_cmp_addr(nlm_addr(host), ni->sap))
if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
continue;
/* See if we have an NSM handle for this client */
......@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
if (host->h_server != ni->server)
continue;
if (ni->server &&
!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
!rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
continue;
/* Move to head of hash chain. */
......
......@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
struct nsm_handle *nsm;
list_for_each_entry(nsm, &nsm_handles, sm_link)
if (nlm_cmp_addr(nsm_addr(nsm), sap))
if (rpc_cmp_addr(nsm_addr(nsm), sap))
return nsm;
return NULL;
}
......
......@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
static int
nlmsvc_match_ip(void *datap, struct nlm_host *host)
{
return nlm_cmp_addr(nlm_srcaddr(host), datap);
return rpc_cmp_addr(nlm_srcaddr(host), datap);
}
/**
......
......@@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
if (rv)
goto out;
rv = check_nfsd_access(exp, rqstp);
if (rv)
fh_put(fhp);
out:
exp_put(exp);
return rv;
......
......@@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
return p;
}
static __be32 *
encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
struct svc_fh *fhp)
{
p = encode_post_op_attr(cd->rqstp, p, fhp);
*p++ = xdr_one; /* yes, a file handle follows */
p = encode_fh(p, fhp);
fh_put(fhp);
return p;
}
static int
compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
const char *name, int namlen)
......@@ -836,29 +825,54 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
dparent = cd->fh.fh_dentry;
exp = cd->fh.fh_export;
fh_init(fhp, NFS3_FHSIZE);
if (isdotent(name, namlen)) {
if (namlen == 2) {
dchild = dget_parent(dparent);
if (dchild == dparent) {
/* filesystem root - cannot return filehandle for ".." */
dput(dchild);
return 1;
return -ENOENT;
}
} else
dchild = dget(dparent);
} else
dchild = lookup_one_len(name, dparent, namlen);
if (IS_ERR(dchild))
return 1;
if (d_mountpoint(dchild) ||
fh_compose(fhp, exp, dchild, &cd->fh) != 0 ||
!dchild->d_inode)
rv = 1;
return -ENOENT;
rv = -ENOENT;
if (d_mountpoint(dchild))
goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
if (rv)
goto out;
if (!dchild->d_inode)
goto out;
rv = 0;
out:
dput(dchild);
return rv;
}
__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
{
struct svc_fh fh;
int err;
fh_init(&fh, NFS3_FHSIZE);
err = compose_entry_fh(cd, &fh, name, namlen);
if (err) {
*p++ = 0;
*p++ = 0;
goto out;
}
p = encode_post_op_attr(cd->rqstp, p, &fh);
*p++ = xdr_one; /* yes, a file handle follows */
p = encode_fh(p, &fh);
out:
fh_put(&fh);
return p;
}
/*
* Encode a directory entry. This one works for both normal readdir
* and readdirplus.
......@@ -929,16 +943,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
p = encode_entry_baggage(cd, p, name, namlen, ino);
/* throw in readdirplus baggage */
if (plus) {
struct svc_fh fh;
if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
*p++ = 0;
*p++ = 0;
} else
p = encode_entryplus_baggage(cd, p, &fh);
}
if (plus)
p = encode_entryplus_baggage(cd, p, name, namlen);
num_entry_words = p - cd->buffer;
} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
/* temporarily encode entry into next page, then move back to
......@@ -951,17 +957,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
/* throw in readdirplus baggage */
if (plus) {
struct svc_fh fh;
if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
/* zero out the filehandle */
*p1++ = 0;
*p1++ = 0;
} else
p1 = encode_entryplus_baggage(cd, p1, &fh);
}
if (plus)
p = encode_entryplus_baggage(cd, p1, name, namlen);
/* determine entry word length and lengths to go in pages */
num_entry_words = p1 - tmp;
......
......@@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
deny = ~pas.group & pas.other;
if (deny) {
ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
ace->flag = eflag;
ace->access_mask = deny_mask_from_posix(deny, flags);
ace->whotype = NFS4_ACL_WHO_GROUP;
ace++;
......@@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
if (deny) {
ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
ace->access_mask = mask_from_posix(deny, flags);
ace->access_mask = deny_mask_from_posix(deny, flags);
ace->whotype = NFS4_ACL_WHO_NAMED;
ace->who = pa->e_id;
ace++;
......
......@@ -43,25 +43,30 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/state.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs4.h>
#include <linux/sunrpc/xprtsock.h>
#define NFSDDBG_FACILITY NFSDDBG_PROC
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
#define NFS4_STATEID_SIZE 16
/* Index of predefined Linux callback client operations */
enum {
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_SEQUENCE,
};
enum nfs_cb_opnum4 {
OP_CB_RECALL = 4,
OP_CB_SEQUENCE = 11,
};
#define NFS4_MAXTAGLEN 20
......@@ -70,17 +75,29 @@ enum nfs_cb_opnum4 {
#define NFS4_dec_cb_null_sz 0
#define cb_compound_enc_hdr_sz 4
#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
#define cb_sequence_enc_sz (sessionid_sz + 4 + \
1 /* no referring calls list yet */)
#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
#define op_enc_sz 1
#define op_dec_sz 2
#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2)
#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
cb_sequence_enc_sz + \
1 + enc_stateid_sz + \
enc_nfs4_fh_sz)
#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
struct nfs4_rpc_args {
void *args_op;
struct nfsd4_cb_sequence args_seq;
};
/*
* Generic encode routines from fs/nfs/nfs4xdr.c
*/
......@@ -137,11 +154,13 @@ xdr_error: \
} while (0)
struct nfs4_cb_compound_hdr {
int status;
u32 ident;
/* args */
u32 ident; /* minorversion 0 only */
u32 nops;
__be32 *nops_p;
u32 minorversion;
/* res */
int status;
u32 taglen;
char *tag;
};
......@@ -238,6 +257,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
hdr->nops++;
}
static void
encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 *p;
if (hdr->minorversion == 0)
return;
RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
WRITE32(OP_CB_SEQUENCE);
WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
WRITE32(args->cbs_clp->cl_cb_seq_nr);
WRITE32(0); /* slotid, always 0 */
WRITE32(0); /* highest slotid always 0 */
WRITE32(0); /* cachethis always 0 */
WRITE32(0); /* FIXME: support referring_call_lists */
hdr->nops++;
}
static int
nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
{
......@@ -249,15 +289,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
}
static int
nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args)
nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
struct nfs4_rpc_args *rpc_args)
{
struct xdr_stream xdr;
struct nfs4_delegation *args = rpc_args->args_op;
struct nfs4_cb_compound_hdr hdr = {
.ident = args->dl_ident,
.minorversion = rpc_args->args_seq.cbs_minorversion,
};
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
encode_cb_compound_hdr(&xdr, &hdr);
encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
encode_cb_recall(&xdr, args, &hdr);
encode_cb_nops(&hdr);
return 0;
......@@ -299,6 +343,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
return 0;
}
/*
* Our current back channel implmentation supports a single backchannel
* with a single slot.
*/
static int
decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
struct rpc_rqst *rqstp)
{
struct nfs4_sessionid id;
int status;
u32 dummy;
__be32 *p;
if (res->cbs_minorversion == 0)
return 0;
status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
if (status)
return status;
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
status = -ESERVERFAULT;
READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
NFS4_MAX_SESSIONID_LEN)) {
dprintk("%s Invalid session id\n", __func__);
goto out;
}
READ32(dummy);
if (dummy != res->cbs_clp->cl_cb_seq_nr) {
dprintk("%s Invalid sequence number\n", __func__);
goto out;
}
READ32(dummy); /* slotid must be 0 */
if (dummy != 0) {
dprintk("%s Invalid slotid\n", __func__);
goto out;
}
/* FIXME: process highest slotid and target highest slotid */
status = 0;
out:
return status;
}
static int
nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
{
......@@ -306,7 +401,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
}
static int
nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
struct nfsd4_cb_sequence *seq)
{
struct xdr_stream xdr;
struct nfs4_cb_compound_hdr hdr;
......@@ -316,6 +412,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
status = decode_cb_compound_hdr(&xdr, &hdr);
if (status)
goto out;
if (seq) {
status = decode_cb_sequence(&xdr, seq, rqstp);
if (status)
goto out;
}
status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
out:
return status;
......@@ -377,16 +478,15 @@ static int max_cb_time(void)
int setup_callback_client(struct nfs4_client *clp)
{
struct sockaddr_in addr;
struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
struct rpc_timeout timeparms = {
.to_initval = max_cb_time(),
.to_retries = 0,
};
struct rpc_create_args args = {
.protocol = IPPROTO_TCP,
.address = (struct sockaddr *)&addr,
.addrsize = sizeof(addr),
.protocol = XPRT_TRANSPORT_TCP,
.address = (struct sockaddr *) &cb->cb_addr,
.addrsize = cb->cb_addrlen,
.timeout = &timeparms,
.program = &cb_program,
.prognumber = cb->cb_prog,
......@@ -399,13 +499,10 @@ int setup_callback_client(struct nfs4_client *clp)
if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
return -EINVAL;
/* Initialize address */
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_port = htons(cb->cb_port);
addr.sin_addr.s_addr = htonl(cb->cb_addr);
if (cb->cb_minorversion) {
args.bc_xprt = clp->cl_cb_xprt;
args.protocol = XPRT_TRANSPORT_BC_TCP;
}
/* Create RPC client */
client = rpc_create(&args);
if (IS_ERR(client)) {
......@@ -439,42 +536,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
.rpc_call_done = nfsd4_cb_probe_done,
};
static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
{
struct auth_cred acred = {
.machine_cred = 1
};
static struct rpc_cred *callback_cred;
/*
* Note in the gss case this doesn't actually have to wait for a
* gss upcall (or any calls to the client); this just creates a
* non-uptodate cred which the rpc state machine will fill in with
* a refresh_upcall later.
*/
return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
RPCAUTH_LOOKUP_NEW);
int set_callback_cred(void)
{
callback_cred = rpc_lookup_machine_cred();
if (!callback_cred)
return -ENOMEM;
return 0;
}
void do_probe_callback(struct nfs4_client *clp)
{
struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
.rpc_cred = callback_cred
};
struct rpc_cred *cred;
int status;
cred = lookup_cb_cred(cb);
if (IS_ERR(cred)) {
status = PTR_ERR(cred);
goto out;
}
cb->cb_cred = cred;
msg.rpc_cred = cb->cb_cred;
status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
&nfsd4_cb_probe_ops, (void *)clp);
out:
if (status) {
warn_no_callback_path(clp, status);
put_nfs4_client(clp);
......@@ -503,11 +587,95 @@ nfsd4_probe_callback(struct nfs4_client *clp)
do_probe_callback(clp);
}
/*
* There's currently a single callback channel slot.
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
struct rpc_task *task)
{
struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
u32 *ptr = (u32 *)clp->cl_sessionid.data;
int status = 0;
dprintk("%s: %u:%u:%u:%u\n", __func__,
ptr[0], ptr[1], ptr[2], ptr[3]);
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
dprintk("%s slot is busy\n", __func__);
status = -EAGAIN;
goto out;
}
/*
* We'll need the clp during XDR encoding and decoding,
* and the sequence during decoding to verify the reply
*/
args->args_seq.cbs_clp = clp;
task->tk_msg.rpc_resp = &args->args_seq;
out:
dprintk("%s status=%d\n", __func__, status);
return status;
}
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
*/
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_delegation *dp = calldata;
struct nfs4_client *clp = dp->dl_client;
struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
u32 minorversion = clp->cl_cb_conn.cb_minorversion;
int status = 0;
args->args_seq.cbs_minorversion = minorversion;
if (minorversion) {
status = nfsd41_cb_setup_sequence(clp, task);
if (status) {
if (status != -EAGAIN) {
/* terminate rpc task */
task->tk_status = status;
task->tk_action = NULL;
}
return;
}
}
rpc_call_start(task);
}
static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
{
struct nfs4_delegation *dp = calldata;
struct nfs4_client *clp = dp->dl_client;
dprintk("%s: minorversion=%d\n", __func__,
clp->cl_cb_conn.cb_minorversion);
if (clp->cl_cb_conn.cb_minorversion) {
/* No need for lock, access serialized in nfsd4_cb_prepare */
++clp->cl_cb_seq_nr;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_seq_nr);
/* We're done looking into the sequence information */
task->tk_msg.rpc_resp = NULL;
}
}
static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
{
struct nfs4_delegation *dp = calldata;
struct nfs4_client *clp = dp->dl_client;
nfsd4_cb_done(task, calldata);
switch (task->tk_status) {
case -EIO:
/* Network partition? */
......@@ -520,16 +688,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
break;
default:
/* success, or error we can't handle */
return;
goto done;
}
if (dp->dl_retries--) {
rpc_delay(task, 2*HZ);
task->tk_status = 0;
rpc_restart_call(task);
return;
} else {
atomic_set(&clp->cl_cb_conn.cb_set, 0);
warn_no_callback_path(clp, task->tk_status);
}
done:
kfree(task->tk_msg.rpc_argp);
}
static void nfsd4_cb_recall_release(void *calldata)
......@@ -542,6 +713,7 @@ static void nfsd4_cb_recall_release(void *calldata)
}
static const struct rpc_call_ops nfsd4_cb_recall_ops = {
.rpc_call_prepare = nfsd4_cb_prepare,
.rpc_call_done = nfsd4_cb_recall_done,
.rpc_release = nfsd4_cb_recall_release,
};
......@@ -554,17 +726,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
{
struct nfs4_client *clp = dp->dl_client;
struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
struct nfs4_rpc_args *args;
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
.rpc_argp = dp,
.rpc_cred = clp->cl_cb_conn.cb_cred
.rpc_cred = callback_cred
};
int status;
int status = -ENOMEM;
args = kzalloc(sizeof(*args), GFP_KERNEL);
if (!args)
goto out;
args->args_op = dp;
msg.rpc_argp = args;
dp->dl_retries = 1;
status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
&nfsd4_cb_recall_ops, dp);
out:
if (status) {
kfree(args);
put_nfs4_client(clp);
nfs4_put_delegation(dp);
}
......
......@@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable)
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
struct svc_export *exp = cstate->current_fh.fh_export;
/*
* Check about attributes are supported by the NFSv4 server or not.
......@@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_attrnotsupp;
/*
* Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported
* Check FATTR4_WORD0_ACL can be supported
* in current environment or not.
*/
if (bmval[0] & FATTR4_WORD0_ACL) {
if (!IS_POSIXACL(dentry->d_inode))
return nfserr_attrnotsupp;
}
if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
if (exp->ex_fslocs.locations == NULL)
return nfserr_attrnotsupp;
}
/*
* According to spec, read-only attributes return ERR_INVAL.
......@@ -123,6 +118,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp,
return status;
}
static int
is_create_with_attrs(struct nfsd4_open *open)
{
return open->op_create == NFS4_OPEN_CREATE
&& (open->op_createmode == NFS4_CREATE_UNCHECKED
|| open->op_createmode == NFS4_CREATE_GUARDED
|| open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
}
/*
* if error occurs when setting the acl, just clear the acl bit
* in the returned attr bitmap.
*/
static void
do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfs4_acl *acl, u32 *bmval)
{
__be32 status;
status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
if (status)
/*
* We should probably fail the whole open at this point,
* but we've already created the file, so it's too late;
* So this seems the least of evils:
*/
bmval[0] &= ~FATTR4_WORD0_ACL;
}
static inline void
fh_dup2(struct svc_fh *dst, struct svc_fh *src)
{
......@@ -206,6 +230,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
if (status)
goto out;
if (is_create_with_attrs(open) && open->op_acl != NULL)
do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
set_change_info(&open->op_cinfo, current_fh);
fh_dup2(current_fh, &resfh);
......@@ -536,12 +563,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_badtype;
}
if (!status) {
fh_unlock(&cstate->current_fh);
set_change_info(&create->cr_cinfo, &cstate->current_fh);
fh_dup2(&cstate->current_fh, &resfh);
}
if (status)
goto out;
if (create->cr_acl != NULL)
do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
create->cr_bmval);
fh_unlock(&cstate->current_fh);
set_change_info(&create->cr_cinfo, &cstate->current_fh);
fh_dup2(&cstate->current_fh, &resfh);
out:
fh_put(&resfh);
return status;
}
......@@ -946,34 +978,6 @@ static struct nfsd4_operation nfsd4_ops[];
static const char *nfsd4_op_name(unsigned opnum);
/*
* This is a replay of a compound for which no cache entry pages
* were used. Encode the sequence operation, and if cachethis is FALSE
* encode the uncache rep error on the next operation.
*/
static __be32
nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args,
struct nfsd4_compoundres *resp)
{
struct nfsd4_op *op;
dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__,
resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
/* Encode the replayed sequence operation */
BUG_ON(resp->opcnt != 1);
op = &args->ops[resp->opcnt - 1];
nfsd4_encode_operation(resp, op);
/*return nfserr_retry_uncached_rep in next operation. */
if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) {
op = &args->ops[resp->opcnt++];
op->status = nfserr_retry_uncached_rep;
nfsd4_encode_operation(resp, op);
}
return op->status;
}
/*
* Enforce NFSv4.1 COMPOUND ordering rules.
*
......@@ -1083,13 +1087,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
BUG_ON(op->status == nfs_ok);
encode_op:
/* Only from SEQUENCE or CREATE_SESSION */
/* Only from SEQUENCE */
if (resp->cstate.status == nfserr_replay_cache) {
dprintk("%s NFS4.1 replay from cache\n", __func__);
if (nfsd4_not_cached(resp))
status = nfsd4_enc_uncached_replay(args, resp);
else
status = op->status;
status = op->status;
goto out;
}
if (op->status == nfserr_replay_me) {
......
此差异已折叠。
......@@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
{
struct svc_fh tmp_fh;
char *path, *rootpath;
char *path = NULL, *rootpath;
size_t rootlen;
fh_init(&tmp_fh, NFS4_FHSIZE);
*stat = exp_pseudoroot(rqstp, &tmp_fh);
......@@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
path = exp->ex_pathname;
if (strncmp(path, rootpath, strlen(rootpath))) {
rootlen = strlen(rootpath);
if (strncmp(path, rootpath, rootlen)) {
dprintk("nfsd: fs_locations failed;"
"%s is not contained in %s\n", path, rootpath);
*stat = nfserr_notsupp;
return NULL;
path = NULL;
goto out;
}
return path + strlen(rootpath);
path += rootlen;
out:
fh_put(&tmp_fh);
return path;
}
/*
......@@ -1793,11 +1798,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_nfserr;
}
}
if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
if (exp->ex_fslocs.locations == NULL) {
bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
}
}
if ((buflen -= 16) < 0)
goto out_resource;
......@@ -1825,8 +1825,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_resource;
if (!aclsupport)
word0 &= ~FATTR4_WORD0_ACL;
if (!exp->ex_fslocs.locations)
word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
if (!word2) {
WRITE32(2);
WRITE32(word0);
......@@ -3064,6 +3062,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
WRITE32(0);
ADJUST_ARGS();
resp->cstate.datap = p; /* DRC cache data pointer */
return 0;
}
......@@ -3166,7 +3165,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
return status;
session = resp->cstate.session;
if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0)
if (session == NULL || slot->sl_cachethis == 0)
return status;
if (resp->opcnt >= args->opcnt)
......@@ -3291,6 +3290,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
/*
* All that remains is to write the tag and operation count...
*/
struct nfsd4_compound_state *cs = &resp->cstate;
struct kvec *iov;
p = resp->tagp;
*p++ = htonl(resp->taglen);
......@@ -3304,17 +3304,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
iov = &rqstp->rq_res.head[0];
iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
BUG_ON(iov->iov_len > PAGE_SIZE);
if (nfsd4_has_session(&resp->cstate)) {
if (resp->cstate.status == nfserr_replay_cache &&
!nfsd4_not_cached(resp)) {
iov->iov_len = resp->cstate.iovlen;
} else {
nfsd4_store_cache_entry(resp);
dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
resp->cstate.slot->sl_inuse = 0;
}
if (resp->cstate.session)
nfsd4_put_session(resp->cstate.session);
if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
nfsd4_store_cache_entry(resp);
dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
resp->cstate.slot->sl_inuse = false;
nfsd4_put_session(resp->cstate.session);
}
return 1;
}
......
......@@ -174,12 +174,13 @@ static const struct file_operations exports_operations = {
};
extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
static struct file_operations pool_stats_operations = {
.open = nfsd_pool_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.release = nfsd_pool_stats_release,
.owner = THIS_MODULE,
};
......@@ -776,10 +777,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
size -= len;
mesg += len;
}
mutex_unlock(&nfsd_mutex);
return (mesg-buf);
rv = mesg - buf;
out_free:
kfree(nthreads);
mutex_unlock(&nfsd_mutex);
......
......@@ -397,44 +397,51 @@ static inline void _fh_update_old(struct dentry *dentry,
fh->ofh_dirino = 0;
}
__be32
fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
struct svc_fh *ref_fh)
static bool is_root_export(struct svc_export *exp)
{
/* ref_fh is a reference file handle.
* if it is non-null and for the same filesystem, then we should compose
* a filehandle which is of the same version, where possible.
* Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
* Then create a 32byte filehandle using nfs_fhbase_old
*
*/
return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
}
u8 version;
u8 fsid_type = 0;
struct inode * inode = dentry->d_inode;
struct dentry *parent = dentry->d_parent;
__u32 *datap;
dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev;
int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root);
static struct super_block *exp_sb(struct svc_export *exp)
{
return exp->ex_path.dentry->d_inode->i_sb;
}
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
MAJOR(ex_dev), MINOR(ex_dev),
(long) exp->ex_path.dentry->d_inode->i_ino,
parent->d_name.name, dentry->d_name.name,
(inode ? inode->i_ino : 0));
static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
{
switch (fsid_type) {
case FSID_DEV:
if (!old_valid_dev(exp_sb(exp)->s_dev))
return 0;
/* FALL THROUGH */
case FSID_MAJOR_MINOR:
case FSID_ENCODE_DEV:
return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
case FSID_NUM:
return exp->ex_flags & NFSEXP_FSID;
case FSID_UUID8:
case FSID_UUID16:
if (!is_root_export(exp))
return 0;
/* fall through */
case FSID_UUID4_INUM:
case FSID_UUID16_INUM:
return exp->ex_uuid != NULL;
}
return 1;
}
/* Choose filehandle version and fsid type based on
* the reference filehandle (if it is in the same export)
* or the export options.
*/
retry:
static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
{
u8 version;
u8 fsid_type;
retry:
version = 1;
if (ref_fh && ref_fh->fh_export == exp) {
version = ref_fh->fh_handle.fh_version;
fsid_type = ref_fh->fh_handle.fh_fsid_type;
if (ref_fh == fhp)
fh_put(ref_fh);
ref_fh = NULL;
switch (version) {
......@@ -447,58 +454,66 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
goto retry;
}
/* Need to check that this type works for this
* export point. As the fsid -> filesystem mapping
* was guided by user-space, there is no guarantee
* that the filesystem actually supports that fsid
* type. If it doesn't we loop around again without
* ref_fh set.
/*
* As the fsid -> filesystem mapping was guided by
* user-space, there is no guarantee that the filesystem
* actually supports that fsid type. If it doesn't we
* loop around again without ref_fh set.
*/
switch(fsid_type) {
case FSID_DEV:
if (!old_valid_dev(ex_dev))
goto retry;
/* FALL THROUGH */
case FSID_MAJOR_MINOR:
case FSID_ENCODE_DEV:
if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
& FS_REQUIRES_DEV))
goto retry;
break;
case FSID_NUM:
if (! (exp->ex_flags & NFSEXP_FSID))
goto retry;
break;
case FSID_UUID8:
case FSID_UUID16:
if (!root_export)
goto retry;
/* fall through */
case FSID_UUID4_INUM:
case FSID_UUID16_INUM:
if (exp->ex_uuid == NULL)
goto retry;
break;
}
if (!fsid_type_ok_for_exp(fsid_type, exp))
goto retry;
} else if (exp->ex_flags & NFSEXP_FSID) {
fsid_type = FSID_NUM;
} else if (exp->ex_uuid) {
if (fhp->fh_maxsize >= 64) {
if (root_export)
if (is_root_export(exp))
fsid_type = FSID_UUID16;
else
fsid_type = FSID_UUID16_INUM;
} else {
if (root_export)
if (is_root_export(exp))
fsid_type = FSID_UUID8;
else
fsid_type = FSID_UUID4_INUM;
}
} else if (!old_valid_dev(ex_dev))
} else if (!old_valid_dev(exp_sb(exp)->s_dev))
/* for newer device numbers, we must use a newer fsid format */
fsid_type = FSID_ENCODE_DEV;
else
fsid_type = FSID_DEV;
fhp->fh_handle.fh_version = version;
if (version)
fhp->fh_handle.fh_fsid_type = fsid_type;
}
__be32
fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
struct svc_fh *ref_fh)
{
/* ref_fh is a reference file handle.
* if it is non-null and for the same filesystem, then we should compose
* a filehandle which is of the same version, where possible.
* Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
* Then create a 32byte filehandle using nfs_fhbase_old
*
*/
struct inode * inode = dentry->d_inode;
struct dentry *parent = dentry->d_parent;
__u32 *datap;
dev_t ex_dev = exp_sb(exp)->s_dev;
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
MAJOR(ex_dev), MINOR(ex_dev),
(long) exp->ex_path.dentry->d_inode->i_ino,
parent->d_name.name, dentry->d_name.name,
(inode ? inode->i_ino : 0));
/* Choose filehandle version and fsid type based on
* the reference filehandle (if it is in the same export)
* or the export options.
*/
set_version_and_fsid_type(fhp, exp, ref_fh);
if (ref_fh == fhp)
fh_put(ref_fh);
......@@ -516,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
fhp->fh_export = exp;
cache_get(&exp->h);
if (version == 0xca) {
if (fhp->fh_handle.fh_version == 0xca) {
/* old style filehandle please */
memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
fhp->fh_handle.fh_size = NFS_FHSIZE;
......@@ -530,22 +545,22 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
_fh_update_old(dentry, exp, &fhp->fh_handle);
} else {
int len;
fhp->fh_handle.fh_version = 1;
fhp->fh_handle.fh_auth_type = 0;
datap = fhp->fh_handle.fh_auth+0;
fhp->fh_handle.fh_fsid_type = fsid_type;
mk_fsid(fsid_type, datap, ex_dev,
mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
exp->ex_path.dentry->d_inode->i_ino,
exp->ex_fsid, exp->ex_uuid);
len = key_len(fsid_type);
len = key_len(fhp->fh_handle.fh_fsid_type);
datap += len/4;
fhp->fh_handle.fh_size = 4 + len;
if (inode)
_fh_update(fhp, exp, dentry);
if (fhp->fh_handle.fh_fileid_type == 255)
if (fhp->fh_handle.fh_fileid_type == 255) {
fh_put(fhp);
return nfserr_opnotsupp;
}
}
return 0;
......@@ -639,8 +654,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
case FSID_DEV:
case FSID_ENCODE_DEV:
case FSID_MAJOR_MINOR:
if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
& FS_REQUIRES_DEV)
if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
return FSIDSOURCE_DEV;
break;
case FSID_NUM:
......
......@@ -34,6 +34,7 @@
#include <linux/nfsd/syscall.h>
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
#include <linux/seq_file.h>
#define NFSDDBG_FACILITY NFSDDBG_SVC
......@@ -66,6 +67,16 @@ struct timeval nfssvc_boot;
DEFINE_MUTEX(nfsd_mutex);
struct svc_serv *nfsd_serv;
/*
* nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
* nfsd_drc_max_pages limits the total amount of memory available for
* version 4.1 DRC caches.
* nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
*/
spinlock_t nfsd_drc_lock;
unsigned int nfsd_drc_max_mem;
unsigned int nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static struct svc_version * nfsd_acl_version[] = {
......@@ -235,13 +246,12 @@ void nfsd_reset_versions(void)
*/
static void set_max_drc(void)
{
/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
#define NFSD_DRC_SIZE_SHIFT 7
nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
>> NFSD_DRC_SIZE_SHIFT;
nfsd_serv->sv_drc_pages_used = 0;
dprintk("%s svc_drc_max_pages %u\n", __func__,
nfsd_serv->sv_drc_max_pages);
#define NFSD_DRC_SIZE_SHIFT 10
nfsd_drc_max_mem = (nr_free_buffer_pages()
>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
nfsd_drc_mem_used = 0;
spin_lock_init(&nfsd_drc_lock);
dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
}
int nfsd_create_serv(void)
......@@ -401,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
error = nfsd_racache_init(2*nrservs);
if (error<0)
goto out;
nfs4_state_start();
error = nfs4_state_start();
if (error)
goto out;
nfsd_reset_versions();
......@@ -569,10 +581,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ rqstp->rq_res.head[0].iov_len;
rqstp->rq_res.head[0].iov_len += sizeof(__be32);
/* NFSv4.1 DRC requires statp */
if (rqstp->rq_vers == 4)
nfsd4_set_statp(rqstp, statp);
/* Now call the procedure handler, and encode NFS status. */
nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
nfserr = map_new_errors(rqstp->rq_vers, nfserr);
......@@ -607,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
int nfsd_pool_stats_open(struct inode *inode, struct file *file)
{
if (nfsd_serv == NULL)
int ret;
mutex_lock(&nfsd_mutex);
if (nfsd_serv == NULL) {
mutex_unlock(&nfsd_mutex);
return -ENODEV;
return svc_pool_stats_open(nfsd_serv, file);
}
/* bump up the psudo refcount while traversing */
svc_get(nfsd_serv);
ret = svc_pool_stats_open(nfsd_serv, file);
mutex_unlock(&nfsd_mutex);
return ret;
}
int nfsd_pool_stats_release(struct inode *inode, struct file *file)
{
int ret = seq_release(inode, file);
mutex_lock(&nfsd_mutex);
/* this function really, really should have been called svc_put() */
svc_destroy(nfsd_serv);
mutex_unlock(&nfsd_mutex);
return ret;
}
......@@ -89,6 +89,12 @@ struct raparm_hbucket {
#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
static inline int
nfsd_v4client(struct svc_rqst *rq)
{
return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
}
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point.
......@@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
path_put(&path);
goto out;
}
if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
if (nfsd_v4client(rqstp) ||
(exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
/* successfully crossed mount point */
/*
* This is subtle: path.dentry is *not* on path.mnt
......
......@@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
}
}
static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
}
#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
return 0;
}
#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
/*
* Compare two host addresses
*
* Return TRUE if the addresses are the same; otherwise FALSE.
*/
static inline int nlm_cmp_addr(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
if (sap1->sa_family == sap2->sa_family) {
switch (sap1->sa_family) {
case AF_INET:
return __nlm_cmp_addr4(sap1, sap2);
case AF_INET6:
return __nlm_cmp_addr6(sap1, sap2);
}
}
return 0;
}
/*
* Compare two NLM locks.
* When the second lock is of type F_UNLCK, this acts like a wildcard.
......
......@@ -234,7 +234,7 @@ enum nfs_opnum4 {
Needs to be updated if more operations are defined in future.*/
#define FIRST_NFS4_OP OP_ACCESS
#define LAST_NFS4_OP OP_RELEASE_LOCKOWNER
#define LAST_NFS4_OP OP_RECLAIM_COMPLETE
enum nfsstat4 {
NFS4_OK = 0,
......
......@@ -56,6 +56,9 @@ extern struct svc_version nfsd_version2, nfsd_version3,
extern u32 nfsd_supported_minorversion;
extern struct mutex nfsd_mutex;
extern struct svc_serv *nfsd_serv;
extern spinlock_t nfsd_drc_lock;
extern unsigned int nfsd_drc_max_mem;
extern unsigned int nfsd_drc_mem_used;
extern struct seq_operations nfs_exports_op;
......@@ -163,7 +166,7 @@ extern int nfsd_max_blksize;
extern unsigned int max_delegations;
int nfs4_state_init(void);
void nfsd4_free_slabs(void);
void nfs4_state_start(void);
int nfs4_state_start(void);
void nfs4_state_shutdown(void);
time_t nfs4_lease_time(void);
void nfs4_reset_lease(time_t leasetime);
......@@ -171,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
#else
static inline int nfs4_state_init(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
static inline void nfs4_state_start(void) { }
static inline int nfs4_state_start(void) { return 0; }
static inline void nfs4_state_shutdown(void) { }
static inline time_t nfs4_lease_time(void) { return 0; }
static inline void nfs4_reset_lease(time_t leasetime) { }
......
......@@ -60,6 +60,12 @@ typedef struct {
#define si_stateownerid si_opaque.so_stateownerid
#define si_fileid si_opaque.so_fileid
struct nfsd4_cb_sequence {
/* args/res */
u32 cbs_minorversion;
struct nfs4_client *cbs_clp;
};
struct nfs4_delegation {
struct list_head dl_perfile;
struct list_head dl_perclnt;
......@@ -81,38 +87,35 @@ struct nfs4_delegation {
/* client delegation callback info */
struct nfs4_cb_conn {
/* SETCLIENTID info */
u32 cb_addr;
unsigned short cb_port;
struct sockaddr_storage cb_addr;
size_t cb_addrlen;
u32 cb_prog;
u32 cb_minorversion;
u32 cb_ident; /* minorversion 0 only */
/* RPC client info */
atomic_t cb_set; /* successful CB_NULL call */
struct rpc_clnt * cb_client;
struct rpc_cred * cb_cred;
};
/* Maximum number of slots per session. 128 is useful for long haul TCP */
#define NFSD_MAX_SLOTS_PER_SESSION 128
/* Maximum number of pages per slot cache entry */
#define NFSD_PAGES_PER_SLOT 1
/* Maximum number of slots per session. 160 is useful for long haul TCP */
#define NFSD_MAX_SLOTS_PER_SESSION 160
/* Maximum number of operations per session compound */
#define NFSD_MAX_OPS_PER_COMPOUND 16
struct nfsd4_cache_entry {
__be32 ce_status;
struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */
struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1];
int ce_cachethis;
short ce_resused;
int ce_opcnt;
int ce_rpchdrlen;
};
/* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 1024
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
#define NFSD_MAX_MEM_PER_SESSION \
(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
struct nfsd4_slot {
bool sl_inuse;
u32 sl_seqid;
struct nfsd4_cache_entry sl_cache_entry;
bool sl_inuse;
bool sl_cachethis;
u16 sl_opcnt;
u32 sl_seqid;
__be32 sl_status;
u32 sl_datalen;
char sl_data[];
};
struct nfsd4_channel_attrs {
......@@ -126,6 +129,25 @@ struct nfsd4_channel_attrs {
u32 rdma_attrs;
};
struct nfsd4_create_session {
clientid_t clientid;
struct nfs4_sessionid sessionid;
u32 seqid;
u32 flags;
struct nfsd4_channel_attrs fore_channel;
struct nfsd4_channel_attrs back_channel;
u32 callback_prog;
u32 uid;
u32 gid;
};
/* The single slot clientid cache structure */
struct nfsd4_clid_slot {
u32 sl_seqid;
__be32 sl_status;
struct nfsd4_create_session sl_cr_ses;
};
struct nfsd4_session {
struct kref se_ref;
struct list_head se_hash; /* hash by sessionid */
......@@ -135,7 +157,7 @@ struct nfsd4_session {
struct nfs4_sessionid se_sessionid;
struct nfsd4_channel_attrs se_fchannel;
struct nfsd4_channel_attrs se_bchannel;
struct nfsd4_slot se_slots[]; /* forward channel slots */
struct nfsd4_slot *se_slots[]; /* forward channel slots */
};
static inline void
......@@ -180,7 +202,7 @@ struct nfs4_client {
char cl_recdir[HEXDIR_LEN]; /* recovery dir */
nfs4_verifier cl_verifier; /* generated by client */
time_t cl_time; /* time of last lease renewal */
__be32 cl_addr; /* client ipaddress */
struct sockaddr_storage cl_addr; /* client ipaddress */
u32 cl_flavor; /* setclientid pseudoflavor */
char *cl_principal; /* setclientid principal name */
struct svc_cred cl_cred; /* setclientid principal */
......@@ -192,9 +214,17 @@ struct nfs4_client {
/* for nfs41 */
struct list_head cl_sessions;
struct nfsd4_slot cl_slot; /* create_session slot */
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
struct nfs4_sessionid cl_sessionid;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
u32 cl_cb_seq_nr;
struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
};
/* struct nfs4_client_reset
......@@ -345,6 +375,7 @@ extern int nfs4_in_grace(void);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
extern void put_nfs4_client(struct nfs4_client *clp);
extern void nfs4_free_stateowner(struct kref *kref);
extern int set_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
extern void nfs4_put_delegation(struct nfs4_delegation *dp);
......
......@@ -51,7 +51,7 @@ struct nfsd4_compound_state {
/* For sessions DRC */
struct nfsd4_session *session;
struct nfsd4_slot *slot;
__be32 *statp;
__be32 *datap;
size_t iovlen;
u32 minorversion;
u32 status;
......@@ -366,18 +366,6 @@ struct nfsd4_exchange_id {
int spa_how;
};
struct nfsd4_create_session {
clientid_t clientid;
struct nfs4_sessionid sessionid;
u32 seqid;
u32 flags;
struct nfsd4_channel_attrs fore_channel;
struct nfsd4_channel_attrs back_channel;
u32 callback_prog;
u32 uid;
u32 gid;
};
struct nfsd4_sequence {
struct nfs4_sessionid sessionid; /* request/response */
u32 seqid; /* request/response */
......@@ -479,13 +467,12 @@ struct nfsd4_compoundres {
static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
{
struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
return args->opcnt == 1;
return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
}
static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
{
return !resp->cstate.slot->sl_cache_entry.ce_cachethis ||
nfsd4_is_solo_sequence(resp);
return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
}
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
......
......@@ -111,7 +111,7 @@ struct rpc_credops {
void (*crdestroy)(struct rpc_cred *);
int (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
void (*crbind)(struct rpc_task *, struct rpc_cred *);
void (*crbind)(struct rpc_task *, struct rpc_cred *, int);
__be32 * (*crmarshal)(struct rpc_task *, __be32 *);
int (*crrefresh)(struct rpc_task *);
__be32 * (*crvalidate)(struct rpc_task *, __be32 *);
......@@ -140,7 +140,7 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *);
void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
void put_rpccred(struct rpc_cred *);
void rpcauth_unbindcred(struct rpc_task *);
__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
......
......@@ -22,6 +22,7 @@
#include <linux/sunrpc/timer.h>
#include <asm/signal.h>
#include <linux/path.h>
#include <net/ipv6.h>
struct rpc_inode;
......@@ -113,6 +114,7 @@ struct rpc_create_args {
rpc_authflavor_t authflavor;
unsigned long flags;
char *client_name;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
};
/* Values for "flags" field */
......@@ -188,5 +190,117 @@ static inline void rpc_set_port(struct sockaddr *sap,
#define IPV6_SCOPE_DELIMITER '%'
#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn")
static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
}
static inline bool __rpc_copy_addr4(struct sockaddr *dst,
const struct sockaddr *src)
{
const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
dsin->sin_family = ssin->sin_family;
dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
return true;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
}
static inline bool __rpc_copy_addr6(struct sockaddr *dst,
const struct sockaddr *src)
{
const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
dsin6->sin6_family = ssin6->sin6_family;
ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
return true;
}
#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
return false;
}
static inline bool __rpc_copy_addr6(struct sockaddr *dst,
const struct sockaddr *src)
{
return false;
}
#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
/**
* rpc_cmp_addr - compare the address portion of two sockaddrs.
* @sap1: first sockaddr
* @sap2: second sockaddr
*
* Just compares the family and address portion. Ignores port, scope, etc.
* Returns true if the addrs are equal, false if they aren't.
*/
static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
const struct sockaddr *sap2)
{
if (sap1->sa_family == sap2->sa_family) {
switch (sap1->sa_family) {
case AF_INET:
return __rpc_cmp_addr4(sap1, sap2);
case AF_INET6:
return __rpc_cmp_addr6(sap1, sap2);
}
}
return false;
}
/**
* rpc_copy_addr - copy the address portion of one sockaddr to another
* @dst: destination sockaddr
* @src: source sockaddr
*
* Just copies the address portion and family. Ignores port, scope, etc.
* Caller is responsible for making certain that dst is large enough to hold
* the address in src. Returns true if address family is supported. Returns
* false otherwise.
*/
static inline bool rpc_copy_addr(struct sockaddr *dst,
const struct sockaddr *src)
{
switch (src->sa_family) {
case AF_INET:
return __rpc_copy_addr4(dst, src);
case AF_INET6:
return __rpc_copy_addr6(dst, src);
}
return false;
}
/**
* rpc_get_scope_id - return scopeid for a given sockaddr
* @sa: sockaddr to get scopeid from
*
* Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
* not an AF_INET6 address.
*/
static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
{
if (sa->sa_family != AF_INET6)
return 0;
return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
}
#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_CLNT_H */
......@@ -94,8 +94,6 @@ struct svc_serv {
struct module * sv_module; /* optional module to count when
* adding threads */
svc_thread_fn sv_function; /* main function for threads */
unsigned int sv_drc_max_pages; /* Total pages for DRC */
unsigned int sv_drc_pages_used;/* DRC pages used */
#if defined(CONFIG_NFS_V4_1)
struct list_head sv_cb_list; /* queue for callback requests
* that arrive over the same
......
......@@ -65,6 +65,7 @@ struct svc_xprt {
size_t xpt_locallen; /* length of address */
struct sockaddr_storage xpt_remote; /* remote peer's address */
size_t xpt_remotelen; /* length of address */
struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
......
......@@ -28,6 +28,7 @@ struct svc_sock {
/* private TCP part */
u32 sk_reclen; /* length of record */
u32 sk_tcplen; /* current read length */
struct rpc_xprt *sk_bc_xprt; /* NFSv4.1 backchannel xprt */
};
/*
......
......@@ -124,6 +124,23 @@ struct rpc_xprt_ops {
void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
};
/*
* RPC transport identifiers
*
* To preserve compatibility with the historical use of raw IP protocol
* id's for transport selection, UDP and TCP identifiers are specified
* with the previous values. No such restriction exists for new transports,
* except that they may not collide with these values (17 and 6,
* respectively).
*/
#define XPRT_TRANSPORT_BC (1 << 31)
enum xprt_transports {
XPRT_TRANSPORT_UDP = IPPROTO_UDP,
XPRT_TRANSPORT_TCP = IPPROTO_TCP,
XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC,
XPRT_TRANSPORT_RDMA = 256
};
struct rpc_xprt {
struct kref kref; /* Reference count */
struct rpc_xprt_ops * ops; /* transport methods */
......@@ -179,6 +196,7 @@ struct rpc_xprt {
spinlock_t reserve_lock; /* lock slot table */
u32 xid; /* Next XID value to use */
struct rpc_task * snd_task; /* Task blocked in send */
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_NFS_V4_1)
struct svc_serv *bc_serv; /* The RPC service which will */
/* process the callback */
......@@ -231,6 +249,7 @@ struct xprt_create {
struct sockaddr * srcaddr; /* optional local address */
struct sockaddr * dstaddr; /* remote peer address */
size_t addrlen;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
};
struct xprt_class {
......
......@@ -40,11 +40,6 @@
#ifndef _LINUX_SUNRPC_XPRTRDMA_H
#define _LINUX_SUNRPC_XPRTRDMA_H
/*
* RPC transport identifier for RDMA
*/
#define XPRT_TRANSPORT_RDMA 256
/*
* rpcbind (v3+) RDMA netid.
*/
......
......@@ -12,17 +12,6 @@
int init_socket_xprt(void);
void cleanup_socket_xprt(void);
/*
* RPC transport identifiers for UDP, TCP
*
* To preserve compatibility with the historical use of raw IP protocol
* id's for transport selection, these are specified with the previous
* values. No such restriction exists for new transports, except that
* they may not collide with these values (17 and 6, respectively).
*/
#define XPRT_TRANSPORT_UDP IPPROTO_UDP
#define XPRT_TRANSPORT_TCP IPPROTO_TCP
/*
* RPC slot table sizes for UDP, TCP transports
*/
......
......@@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
void
rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
{
task->tk_msg.rpc_cred = get_rpccred(cred);
dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
......@@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
static void
rpcauth_bind_root_cred(struct rpc_task *task)
rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred acred = {
......@@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task)
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
ret = auth->au_ops->lookup_cred(auth, &acred, 0);
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
if (!IS_ERR(ret))
task->tk_msg.rpc_cred = ret;
else
......@@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task)
}
static void
rpcauth_bind_new_cred(struct rpc_task *task)
rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct rpc_cred *ret;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, auth->au_ops->au_name);
ret = rpcauth_lookupcred(auth, 0);
ret = rpcauth_lookupcred(auth, lookupflags);
if (!IS_ERR(ret))
task->tk_msg.rpc_cred = ret;
else
......@@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task)
void
rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
{
int lookupflags = 0;
if (flags & RPC_TASK_ASYNC)
lookupflags |= RPCAUTH_LOOKUP_NEW;
if (cred != NULL)
cred->cr_ops->crbind(task, cred);
cred->cr_ops->crbind(task, cred, lookupflags);
else if (flags & RPC_TASK_ROOTCREDS)
rpcauth_bind_root_cred(task);
rpcauth_bind_root_cred(task, lookupflags);
else
rpcauth_bind_new_cred(task);
rpcauth_bind_new_cred(task, lookupflags);
}
void
......
......@@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
static void
generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
struct rpc_cred *ret;
ret = auth->au_ops->lookup_cred(auth, acred, 0);
ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
if (!IS_ERR(ret))
task->tk_msg.rpc_cred = ret;
else
......
......@@ -1374,8 +1374,10 @@ svcauth_gss_release(struct svc_rqst *rqstp)
if (stat)
goto out_err;
break;
default:
goto out_err;
/*
* For any other gc_svc value, svcauth_gss_accept() already set
* the auth_error appropriately; just fall through:
*/
}
out:
......
......@@ -103,23 +103,21 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
static int cache_fresh_locked(struct cache_head *head, time_t expiry)
static void cache_fresh_locked(struct cache_head *head, time_t expiry)
{
head->expiry_time = expiry;
head->last_refresh = get_seconds();
return !test_and_set_bit(CACHE_VALID, &head->flags);
set_bit(CACHE_VALID, &head->flags);
}
static void cache_fresh_unlocked(struct cache_head *head,
struct cache_detail *detail, int new)
struct cache_detail *detail)
{
if (new)
cache_revisit_request(head);
if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
cache_revisit_request(head);
queue_loose(detail, head);
cache_dequeue(detail, head);
}
}
......@@ -132,7 +130,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
*/
struct cache_head **head;
struct cache_head *tmp;
int is_new;
if (!test_bit(CACHE_VALID, &old->flags)) {
write_lock(&detail->hash_lock);
......@@ -141,9 +138,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
is_new = cache_fresh_locked(old, new->expiry_time);
cache_fresh_locked(old, new->expiry_time);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail, is_new);
cache_fresh_unlocked(old, detail);
return old;
}
write_unlock(&detail->hash_lock);
......@@ -167,11 +164,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
*head = tmp;
detail->entries++;
cache_get(tmp);
is_new = cache_fresh_locked(tmp, new->expiry_time);
cache_fresh_locked(tmp, new->expiry_time);
cache_fresh_locked(old, 0);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(tmp, detail, is_new);
cache_fresh_unlocked(old, detail, 0);
cache_fresh_unlocked(tmp, detail);
cache_fresh_unlocked(old, detail);
cache_put(old, detail);
return tmp;
}
......@@ -184,6 +181,22 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
return cd->cache_upcall(cd, h);
}
static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
{
if (!test_bit(CACHE_VALID, &h->flags) ||
h->expiry_time < get_seconds())
return -EAGAIN;
else if (detail->flush_time > h->last_refresh)
return -EAGAIN;
else {
/* entry is valid */
if (test_bit(CACHE_NEGATIVE, &h->flags))
return -ENOENT;
else
return 0;
}
}
/*
* This is the generic cache management routine for all
* the authentication caches.
......@@ -192,8 +205,10 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
*
*
* Returns 0 if the cache_head can be used, or cache_puts it and returns
* -EAGAIN if upcall is pending,
* -ETIMEDOUT if upcall failed and should be retried,
* -EAGAIN if upcall is pending and request has been queued
* -ETIMEDOUT if upcall failed or request could not be queue or
* upcall completed but item is still invalid (implying that
* the cache item has been replaced with a newer one).
* -ENOENT if cache entry was negative
*/
int cache_check(struct cache_detail *detail,
......@@ -203,17 +218,7 @@ int cache_check(struct cache_detail *detail,
long refresh_age, age;
/* First decide return status as best we can */
if (!test_bit(CACHE_VALID, &h->flags) ||
h->expiry_time < get_seconds())
rv = -EAGAIN;
else if (detail->flush_time > h->last_refresh)
rv = -EAGAIN;
else {
/* entry is valid */
if (test_bit(CACHE_NEGATIVE, &h->flags))
rv = -ENOENT;
else rv = 0;
}
rv = cache_is_valid(detail, h);
/* now see if we want to start an upcall */
refresh_age = (h->expiry_time - h->last_refresh);
......@@ -229,10 +234,11 @@ int cache_check(struct cache_detail *detail,
switch (cache_make_upcall(detail, h)) {
case -EINVAL:
clear_bit(CACHE_PENDING, &h->flags);
cache_revisit_request(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
cache_fresh_unlocked(h, detail,
cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY));
cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
cache_fresh_unlocked(h, detail);
rv = -ENOENT;
}
break;
......@@ -245,10 +251,14 @@ int cache_check(struct cache_detail *detail,
}
}
if (rv == -EAGAIN)
if (cache_defer_req(rqstp, h) != 0)
rv = -ETIMEDOUT;
if (rv == -EAGAIN) {
if (cache_defer_req(rqstp, h) < 0) {
/* Request is not deferred */
rv = cache_is_valid(detail, h);
if (rv == -EAGAIN)
rv = -ETIMEDOUT;
}
}
if (rv)
cache_put(h, detail);
return rv;
......@@ -396,7 +406,7 @@ static int cache_clean(void)
)
continue;
if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
queue_loose(current_detail, ch);
cache_dequeue(current_detail, ch);
if (atomic_read(&ch->ref.refcount) == 1)
break;
......@@ -412,8 +422,10 @@ static int cache_clean(void)
if (!ch)
current_index ++;
spin_unlock(&cache_list_lock);
if (ch)
if (ch) {
cache_revisit_request(ch);
cache_put(ch, d);
}
} else
spin_unlock(&cache_list_lock);
......@@ -488,7 +500,7 @@ static int cache_defer_cnt;
static int cache_defer_req(struct cache_req *req, struct cache_head *item)
{
struct cache_deferred_req *dreq;
struct cache_deferred_req *dreq, *discard;
int hash = DFR_HASH(item);
if (cache_defer_cnt >= DFR_MAX) {
......@@ -496,11 +508,11 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
* or continue and drop the oldest below
*/
if (net_random()&1)
return -ETIMEDOUT;
return -ENOMEM;
}
dreq = req->defer(req);
if (dreq == NULL)
return -ETIMEDOUT;
return -ENOMEM;
dreq->item = item;
......@@ -513,23 +525,24 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
list_add(&dreq->hash, &cache_defer_hash[hash]);
/* it is in, now maybe clean up */
dreq = NULL;
discard = NULL;
if (++cache_defer_cnt > DFR_MAX) {
dreq = list_entry(cache_defer_list.prev,
struct cache_deferred_req, recent);
list_del(&dreq->recent);
list_del(&dreq->hash);
discard = list_entry(cache_defer_list.prev,
struct cache_deferred_req, recent);
list_del_init(&discard->recent);
list_del_init(&discard->hash);
cache_defer_cnt--;
}
spin_unlock(&cache_defer_lock);
if (dreq) {
if (discard)
/* there was one too many */
dreq->revisit(dreq, 1);
}
discard->revisit(discard, 1);
if (!test_bit(CACHE_PENDING, &item->flags)) {
/* must have just been validated... */
cache_revisit_request(item);
return -EAGAIN;
}
return 0;
}
......@@ -551,7 +564,7 @@ static void cache_revisit_request(struct cache_head *item)
dreq = list_entry(lp, struct cache_deferred_req, hash);
lp = lp->next;
if (dreq->item == item) {
list_del(&dreq->hash);
list_del_init(&dreq->hash);
list_move(&dreq->recent, &pending);
cache_defer_cnt--;
}
......@@ -577,7 +590,7 @@ void cache_clean_deferred(void *owner)
list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
if (dreq->owner == owner) {
list_del(&dreq->hash);
list_del_init(&dreq->hash);
list_move(&dreq->recent, &pending);
cache_defer_cnt--;
}
......@@ -887,7 +900,7 @@ static int cache_release(struct inode *inode, struct file *filp,
static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
{
struct cache_queue *cq;
spin_lock(&queue_lock);
......
......@@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.srcaddr = args->saddress,
.dstaddr = args->address,
.addrlen = args->addrsize,
.bc_xprt = args->bc_xprt,
};
char servername[48];
......
......@@ -21,6 +21,8 @@
#include <linux/sunrpc/clnt.h>
#include "sunrpc.h"
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED
#define RPC_TASK_MAGIC_ID 0xf00baa
......@@ -711,11 +713,6 @@ static void rpc_async_schedule(struct work_struct *work)
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
}
struct rpc_buffer {
size_t len;
char data[];
};
/**
* rpc_malloc - allocate an RPC buffer
* @task: RPC task that will use this buffer
......
......@@ -27,11 +27,25 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef _NET_SUNRPC_SUNRPC_H
#define _NET_SUNRPC_SUNRPC_H
#include <linux/net.h>
/*
* Header for dynamically allocated rpc buffers.
*/
struct rpc_buffer {
size_t len;
char data[];
};
static inline int rpc_reply_expected(struct rpc_task *task)
{
return (task->tk_msg.rpc_proc != NULL) &&
(task->tk_msg.rpc_proc->p_decode != NULL);
}
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
#endif /* _NET_SUNRPC_SUNRPC_H */
......@@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
......@@ -710,10 +711,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
spin_unlock_bh(&pool->sp_lock);
len = 0;
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
dprintk("svc_recv: found XPT_CLOSE\n");
svc_delete_xprt(xprt);
} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
struct svc_xprt *newxpt;
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt) {
......@@ -739,7 +737,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
svc_xprt_received(newxpt);
}
svc_xprt_received(xprt);
} else {
} else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, pool->sp_id, xprt,
atomic_read(&xprt->xpt_ref.refcount));
......@@ -752,6 +750,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
dprintk("svc: got len=%d\n", len);
}
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
dprintk("svc_recv: found XPT_CLOSE\n");
svc_delete_xprt(xprt);
}
/* No data, incomplete (TCP) read, or accept() */
if (len == 0 || len == -EAGAIN) {
rqstp->rq_res.len = 0;
......@@ -808,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp)
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
rpc_wake_up(&xprt->xpt_bc_pending);
svc_xprt_release(rqstp);
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
......@@ -1166,11 +1170,6 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
lock_kernel();
/* bump up the pseudo refcount while traversing */
svc_get(serv);
unlock_kernel();
if (!pidx)
return SEQ_START_TOKEN;
return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
......@@ -1198,12 +1197,6 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
static void svc_pool_stats_stop(struct seq_file *m, void *p)
{
struct svc_serv *serv = m->private;
lock_kernel();
/* this function really, really should have been called svc_put() */
svc_destroy(serv);
unlock_kernel();
}
static int svc_pool_stats_show(struct seq_file *m, void *p)
......
......@@ -668,6 +668,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
case 0:
*gip = ug->gi;
get_group_info(*gip);
cache_put(&ug->h, &unix_gid_cache);
return 0;
default:
return -EAGAIN;
......
......@@ -49,6 +49,7 @@
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
......@@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
}
/*
* Generic sendto routine
* send routine intended to be shared by the fore- and back-channel
*/
static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock;
int slen;
union {
struct cmsghdr hdr;
long all[SVC_PKTINFO_SPACE / sizeof(long)];
} buffer;
struct cmsghdr *cmh = &buffer.hdr;
int len = 0;
int result;
int size;
struct page **ppage = xdr->pages;
size_t base = xdr->page_base;
unsigned int pglen = xdr->page_len;
unsigned int flags = MSG_MORE;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
int slen;
int len = 0;
slen = xdr->len;
if (rqstp->rq_prot == IPPROTO_UDP) {
struct msghdr msg = {
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
.msg_controllen = sizeof(buffer),
.msg_flags = MSG_MORE,
};
svc_set_cmsg_data(rqstp, cmh);
if (sock_sendmsg(sock, &msg, 0) < 0)
goto out;
}
/* send head */
if (slen == xdr->head[0].iov_len)
flags = 0;
len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
len = kernel_sendpage(sock, headpage, headoffset,
xdr->head[0].iov_len, flags);
if (len != xdr->head[0].iov_len)
goto out;
......@@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
base = 0;
ppage++;
}
/* send tail */
if (xdr->tail[0].iov_len) {
result = kernel_sendpage(sock, rqstp->rq_respages[0],
((unsigned long)xdr->tail[0].iov_base)
& (PAGE_SIZE-1),
xdr->tail[0].iov_len, 0);
result = kernel_sendpage(sock, tailpage, tailoffset,
xdr->tail[0].iov_len, 0);
if (result > 0)
len += result;
}
out:
return len;
}
/*
* Generic sendto routine
*/
static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock;
union {
struct cmsghdr hdr;
long all[SVC_PKTINFO_SPACE / sizeof(long)];
} buffer;
struct cmsghdr *cmh = &buffer.hdr;
int len = 0;
unsigned long tailoff;
unsigned long headoff;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
if (rqstp->rq_prot == IPPROTO_UDP) {
struct msghdr msg = {
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
.msg_controllen = sizeof(buffer),
.msg_flags = MSG_MORE,
};
svc_set_cmsg_data(rqstp, cmh);
if (sock_sendmsg(sock, &msg, 0) < 0)
goto out;
}
tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
headoff = 0;
len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
rqstp->rq_respages[0], tailoff);
out:
dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
......@@ -431,6 +452,32 @@ static void svc_tcp_write_space(struct sock *sk)
svc_write_space(sk);
}
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
struct cmsghdr *cmh)
{
struct in_pktinfo *pki = CMSG_DATA(cmh);
if (cmh->cmsg_type != IP_PKTINFO)
return 0;
rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
return 1;
}
/*
* See net/ipv6/datagram.c : datagram_recv_ctl
*/
static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
struct cmsghdr *cmh)
{
struct in6_pktinfo *pki = CMSG_DATA(cmh);
if (cmh->cmsg_type != IPV6_PKTINFO)
return 0;
ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
return 1;
}
/*
* Copy the UDP datagram's destination address to the rqstp structure.
* The 'destination' address in this case is the address to which the
......@@ -438,23 +485,17 @@ static void svc_tcp_write_space(struct sock *sk)
* hosts, this can change from msg to msg. Note that only the IP
* address changes, the port number should remain the same.
*/
static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
struct cmsghdr *cmh)
static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
struct cmsghdr *cmh)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
switch (svsk->sk_sk->sk_family) {
case AF_INET: {
struct in_pktinfo *pki = CMSG_DATA(cmh);
rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
break;
}
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
break;
}
switch (cmh->cmsg_level) {
case SOL_IP:
return svc_udp_get_dest_address4(rqstp, cmh);
case SOL_IPV6:
return svc_udp_get_dest_address6(rqstp, cmh);
}
return 0;
}
/*
......@@ -531,16 +572,15 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_prot = IPPROTO_UDP;
if (cmh->cmsg_level != IPPROTO_IP ||
cmh->cmsg_type != IP_PKTINFO) {
if (!svc_udp_get_dest_address(rqstp, cmh)) {
if (net_ratelimit())
printk("rpcsvc: received unknown control message:"
"%d/%d\n",
cmh->cmsg_level, cmh->cmsg_type);
printk(KERN_WARNING
"svc: received unknown control message %d/%d; "
"dropping RPC reply datagram\n",
cmh->cmsg_level, cmh->cmsg_type);
skb_free_datagram(svsk->sk_sk, skb);
return 0;
}
svc_udp_get_dest_address(rqstp, cmh);
if (skb_is_nonlinear(skb)) {
/* we have to copy */
......@@ -651,8 +691,7 @@ static struct svc_xprt_class svc_udp_class = {
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
int one = 1;
mm_segment_t oldfs;
int err, level, optname, one = 1;
svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
......@@ -671,12 +710,22 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
oldfs = get_fs();
set_fs(KERNEL_DS);
/* make sure we get destination address info */
svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
(char __user *)&one, sizeof(one));
set_fs(oldfs);
switch (svsk->sk_sk->sk_family) {
case AF_INET:
level = SOL_IP;
optname = IP_PKTINFO;
break;
case AF_INET6:
level = SOL_IPV6;
optname = IPV6_RECVPKTINFO;
break;
default:
BUG();
}
err = kernel_setsockopt(svsk->sk_sock, level, optname,
(char *)&one, sizeof(one));
dprintk("svc: kernel_setsockopt returned %d\n", err);
}
/*
......@@ -826,21 +875,15 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
/*
* Receive data from a TCP socket.
* Receive data.
* If we haven't gotten the record length yet, get the next four bytes.
* Otherwise try to gobble up as much as possible up to the complete
* record length.
*/
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int len;
struct kvec *vec;
int pnum, vlen;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
int len;
if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
/* sndbuf needs to have room for one request
......@@ -861,10 +904,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
/* Receive data. If we haven't got the record length yet, get
* the next four bytes. Otherwise try to gobble up as much as
* possible up to the complete record length.
*/
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
struct kvec iov;
......@@ -879,7 +918,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
dprintk("svc: short recvfrom while reading record "
"length (%d of %d)\n", len, want);
svc_xprt_received(&svsk->sk_xprt);
return -EAGAIN; /* record header not complete */
goto err_again; /* record header not complete */
}
svsk->sk_reclen = ntohl(svsk->sk_reclen);
......@@ -894,6 +933,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
"per record not supported\n");
goto err_delete;
}
svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
if (svsk->sk_reclen > serv->sv_max_mesg) {
......@@ -914,17 +954,121 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
dprintk("svc: incomplete TCP record (%d of %d)\n",
len, svsk->sk_reclen);
svc_xprt_received(&svsk->sk_xprt);
return -EAGAIN; /* record not complete */
goto err_again; /* record not complete */
}
len = svsk->sk_reclen;
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
return len;
error:
if (len == -EAGAIN) {
dprintk("RPC: TCP recv_record got EAGAIN\n");
svc_xprt_received(&svsk->sk_xprt);
}
return len;
err_delete:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_again:
return -EAGAIN;
}
static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
struct rpc_rqst **reqpp, struct kvec *vec)
{
struct rpc_rqst *req = NULL;
u32 *p;
u32 xid;
u32 calldir;
int len;
len = svc_recvfrom(rqstp, vec, 1, 8);
if (len < 0)
goto error;
p = (u32 *)rqstp->rq_arg.head[0].iov_base;
xid = *p++;
calldir = *p;
if (calldir == 0) {
/* REQUEST is the most common case */
vec[0] = rqstp->rq_arg.head[0];
} else {
/* REPLY */
if (svsk->sk_bc_xprt)
req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
if (!req) {
printk(KERN_NOTICE
"%s: Got unrecognized reply: "
"calldir 0x%x sk_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir),
svsk->sk_bc_xprt, xid);
vec[0] = rqstp->rq_arg.head[0];
goto out;
}
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(struct xdr_buf));
/* copy the xid and call direction */
memcpy(req->rq_private_buf.head[0].iov_base,
rqstp->rq_arg.head[0].iov_base, 8);
vec[0] = req->rq_private_buf.head[0];
}
out:
vec[0].iov_base += 8;
vec[0].iov_len -= 8;
len = svsk->sk_reclen - 8;
error:
*reqpp = req;
return len;
}
/*
* Receive data from a TCP socket.
*/
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int len;
struct kvec *vec;
int pnum, vlen;
struct rpc_rqst *req = NULL;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
len = svc_tcp_recv_record(svsk, rqstp);
if (len < 0)
goto error;
vec = rqstp->rq_vec;
vec[0] = rqstp->rq_arg.head[0];
vlen = PAGE_SIZE;
/*
* We have enough data for the whole tcp record. Let's try and read the
* first 8 bytes to get the xid and the call direction. We can use this
* to figure out if this is a call or a reply to a callback. If
* sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
* In that case, don't bother with the calldir and just read the data.
* It will be rejected in svc_process.
*/
if (len >= 8) {
len = svc_process_calldir(svsk, rqstp, &req, vec);
if (len < 0)
goto err_again;
vlen -= 8;
}
pnum = 1;
while (vlen < len) {
vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
vec[pnum].iov_base = (req) ?
page_address(req->rq_private_buf.pages[pnum - 1]) :
page_address(rqstp->rq_pages[pnum]);
vec[pnum].iov_len = PAGE_SIZE;
pnum++;
vlen += PAGE_SIZE;
......@@ -934,8 +1078,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
/* Now receive data */
len = svc_recvfrom(rqstp, vec, pnum, len);
if (len < 0)
goto error;
goto err_again;
/*
* Account for the 8 bytes we read earlier
*/
len += 8;
if (req) {
xprt_complete_rqst(req->rq_task, len);
len = 0;
goto out;
}
dprintk("svc: TCP complete record (%d bytes)\n", len);
rqstp->rq_arg.len = len;
rqstp->rq_arg.page_base = 0;
......@@ -949,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
out:
/* Reset TCP read info */
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
......@@ -960,21 +1115,19 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
return len;
err_delete:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
return -EAGAIN;
error:
err_again:
if (len == -EAGAIN) {
dprintk("RPC: TCP recvfrom got EAGAIN\n");
svc_xprt_received(&svsk->sk_xprt);
} else {
return len;
}
error:
if (len != -EAGAIN) {
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
svsk->sk_xprt.xpt_server->sv_name, -len);
goto err_delete;
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
}
return len;
return -EAGAIN;
}
/*
......
......@@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
}
static inline int xprt_has_timer(struct rpc_xprt *xprt)
{
return xprt->idle_timeout != 0;
}
/**
* xprt_prepare_transmit - reserve the transport before sending a request
* @task: RPC task about to send a request
......@@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task)
if (!list_empty(&req->rq_list))
list_del(&req->rq_list);
xprt->last_used = jiffies;
if (list_empty(&xprt->recv))
if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
mod_timer(&xprt->timer,
xprt->last_used + xprt->idle_timeout);
spin_unlock_bh(&xprt->transport_lock);
......@@ -1082,8 +1087,11 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
#endif /* CONFIG_NFS_V4_1 */
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
setup_timer(&xprt->timer, xprt_init_autodisconnect,
(unsigned long)xprt);
if (xprt_has_timer(xprt))
setup_timer(&xprt->timer, xprt_init_autodisconnect,
(unsigned long)xprt);
else
init_timer(&xprt->timer);
xprt->last_used = jiffies;
xprt->cwnd = RPC_INITCWND;
xprt->bind_index = 0;
......@@ -1102,7 +1110,6 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
dprintk("RPC: created transport %p with %u slots\n", xprt,
xprt->max_reqs);
return xprt;
}
......
......@@ -730,12 +730,12 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
goto err;
mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
if (!mr)
if (IS_ERR(mr))
goto err_free_frmr;
pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
RPCSVC_MAXPAGES);
if (!pl)
if (IS_ERR(pl))
goto err_free_mr;
frmr->mr = mr;
......
......@@ -32,6 +32,7 @@
#include <linux/tcp.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprtsock.h>
#include <linux/file.h>
#ifdef CONFIG_NFS_V4_1
......@@ -43,6 +44,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include "sunrpc.h"
/*
* xprtsock tunables
*/
......@@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
xprt->stat.bklog_u);
}
/*
* Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
* we allocate pages instead doing a kmalloc like rpc_malloc is because we want
* to use the server side send routines.
*/
void *bc_malloc(struct rpc_task *task, size_t size)
{
struct page *page;
struct rpc_buffer *buf;
BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
page = alloc_page(GFP_KERNEL);
if (!page)
return NULL;
buf = page_address(page);
buf->len = PAGE_SIZE;
return buf->data;
}
/*
* Free the space allocated in the bc_alloc routine
*/
void bc_free(void *buffer)
{
struct rpc_buffer *buf;
if (!buffer)
return;
buf = container_of(buffer, struct rpc_buffer, data);
free_page((unsigned long)buf);
}
/*
* Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
* held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
*/
static int bc_sendto(struct rpc_rqst *req)
{
int len;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
unsigned long headoff;
unsigned long tailoff;
/*
* Set up the rpc header and record marker stuff
*/
xs_encode_tcp_record_marker(xbufp);
tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
len = svc_send_common(sock, xbufp,
virt_to_page(xbufp->head[0].iov_base), headoff,
xbufp->tail[0].iov_base, tailoff);
if (len != xbufp->len) {
printk(KERN_NOTICE "Error sending entire callback!\n");
len = -EAGAIN;
}
return len;
}
/*
* The send routine. Borrows from svc_send
*/
static int bc_send_request(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct svc_xprt *xprt;
struct svc_sock *svsk;
u32 len;
dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
/*
* Get the server socket associated with this callback xprt
*/
xprt = req->rq_xprt->bc_xprt;
svsk = container_of(xprt, struct svc_sock, sk_xprt);
/*
* Grab the mutex to serialize data as the connection is shared
* with the fore channel
*/
if (!mutex_trylock(&xprt->xpt_mutex)) {
rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
if (!mutex_trylock(&xprt->xpt_mutex))
return -EAGAIN;
rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
}
if (test_bit(XPT_DEAD, &xprt->xpt_flags))
len = -ENOTCONN;
else
len = bc_sendto(req);
mutex_unlock(&xprt->xpt_mutex);
if (len > 0)
len = 0;
return len;
}
/*
* The close routine. Since this is client initiated, we do nothing
*/
static void bc_close(struct rpc_xprt *xprt)
{
return;
}
/*
* The xprt destroy routine. Again, because this connection is client
* initiated, we do nothing
*/
static void bc_destroy(struct rpc_xprt *xprt)
{
return;
}
static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
......@@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.print_stats = xs_tcp_print_stats,
};
/*
* The rpc_xprt_ops for the server backchannel
*/
static struct rpc_xprt_ops bc_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xprt_release_xprt,
.buf_alloc = bc_malloc,
.buf_free = bc_free,
.send_request = bc_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = bc_close,
.destroy = bc_destroy,
.print_stats = xs_tcp_print_stats,
};
static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
unsigned int slot_table_size)
{
......@@ -2322,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
return ERR_PTR(-EINVAL);
}
/**
* xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
* @args: rpc transport creation arguments
*
*/
static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
{
struct sockaddr *addr = args->dstaddr;
struct rpc_xprt *xprt;
struct sock_xprt *transport;
struct svc_sock *bc_sock;
if (!args->bc_xprt)
ERR_PTR(-EINVAL);
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->timeout = &xs_tcp_default_timeout;
/* backchannel */
xprt_set_bound(xprt);
xprt->bind_timeout = 0;
xprt->connect_timeout = 0;
xprt->reestablish_timeout = 0;
xprt->idle_timeout = 0;
/*
* The backchannel uses the same socket connection as the
* forechannel
*/
xprt->bc_xprt = args->bc_xprt;
bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
bc_sock->sk_bc_xprt = xprt;
transport->sock = bc_sock->sk_sock;
transport->inet = bc_sock->sk_sk;
xprt->ops = &bc_tcp_ops;
switch (addr->sa_family) {
case AF_INET:
xs_format_peer_addresses(xprt, "tcp",
RPCBIND_NETID_TCP);
break;
case AF_INET6:
xs_format_peer_addresses(xprt, "tcp",
RPCBIND_NETID_TCP6);
break;
default:
kfree(xprt);
return ERR_PTR(-EAFNOSUPPORT);
}
if (xprt_bound(xprt))
dprintk("RPC: set up xprt to %s (port %s) via %s\n",
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT],
xprt->address_strings[RPC_DISPLAY_PROTO]);
else
dprintk("RPC: set up xprt to %s (autobind) via %s\n",
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PROTO]);
/*
* Since we don't want connections for the backchannel, we set
* the xprt status to connected
*/
xprt_set_connected(xprt);
if (try_module_get(THIS_MODULE))
return xprt;
kfree(xprt->slot);
kfree(xprt);
return ERR_PTR(-EINVAL);
}
static struct xprt_class xs_udp_transport = {
.list = LIST_HEAD_INIT(xs_udp_transport.list),
.name = "udp",
.owner = THIS_MODULE,
.ident = IPPROTO_UDP,
.ident = XPRT_TRANSPORT_UDP,
.setup = xs_setup_udp,
};
......@@ -2334,10 +2562,18 @@ static struct xprt_class xs_tcp_transport = {
.list = LIST_HEAD_INIT(xs_tcp_transport.list),
.name = "tcp",
.owner = THIS_MODULE,
.ident = IPPROTO_TCP,
.ident = XPRT_TRANSPORT_TCP,
.setup = xs_setup_tcp,
};
static struct xprt_class xs_bc_tcp_transport = {
.list = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
.name = "tcp NFSv4.1 backchannel",
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_BC_TCP,
.setup = xs_setup_bc_tcp,
};
/**
* init_socket_xprt - set up xprtsock's sysctls, register with RPC client
*
......@@ -2351,6 +2587,7 @@ int init_socket_xprt(void)
xprt_register_transport(&xs_udp_transport);
xprt_register_transport(&xs_tcp_transport);
xprt_register_transport(&xs_bc_tcp_transport);
return 0;
}
......@@ -2370,6 +2607,7 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_udp_transport);
xprt_unregister_transport(&xs_tcp_transport);
xprt_unregister_transport(&xs_bc_tcp_transport);
}
static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册