Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
cloud-kernel
提交
6925bac1
cloud-kernel
项目概览
openanolis
/
cloud-kernel
大约 1 年 前同步成功
通知
158
Star
36
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
2
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
cloud-kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
2
合并请求
2
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6925bac1
编写于
10月 15, 2008
作者:
T
Trond Myklebust
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'next'
上级
04ab5918
011935a0
变更
26
显示空白变更内容
内联
并排
Showing
26 changed file
with
955 addition
and
500 deletion
+955
-500
fs/nfs/client.c
fs/nfs/client.c
+2
-3
fs/nfs/dir.c
fs/nfs/dir.c
+15
-5
fs/nfs/file.c
fs/nfs/file.c
+7
-11
fs/nfs/inode.c
fs/nfs/inode.c
+109
-74
fs/nfs/internal.h
fs/nfs/internal.h
+23
-2
fs/nfs/mount_clnt.c
fs/nfs/mount_clnt.c
+2
-1
fs/nfs/namespace.c
fs/nfs/namespace.c
+5
-2
fs/nfs/nfs3acl.c
fs/nfs/nfs3acl.c
+2
-0
fs/nfs/nfs3proc.c
fs/nfs/nfs3proc.c
+18
-2
fs/nfs/nfs4namespace.c
fs/nfs/nfs4namespace.c
+49
-56
fs/nfs/proc.c
fs/nfs/proc.c
+8
-2
fs/nfs/super.c
fs/nfs/super.c
+76
-50
fs/nfs/unlink.c
fs/nfs/unlink.c
+3
-2
fs/nfs/write.c
fs/nfs/write.c
+2
-1
include/linux/nfs_fs.h
include/linux/nfs_fs.h
+7
-12
include/linux/nfs_fs_sb.h
include/linux/nfs_fs_sb.h
+0
-1
include/linux/nfs_mount.h
include/linux/nfs_mount.h
+4
-0
include/linux/nfs_xdr.h
include/linux/nfs_xdr.h
+6
-5
include/linux/sunrpc/xprtrdma.h
include/linux/sunrpc/xprtrdma.h
+1
-3
net/sunrpc/clnt.c
net/sunrpc/clnt.c
+2
-2
net/sunrpc/rpcb_clnt.c
net/sunrpc/rpcb_clnt.c
+32
-8
net/sunrpc/xprt.c
net/sunrpc/xprt.c
+4
-8
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/rpc_rdma.c
+24
-5
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/transport.c
+31
-10
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/verbs.c
+507
-234
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtrdma/xprt_rdma.h
+16
-1
未找到文件。
fs/nfs/client.c
浏览文件 @
6925bac1
...
...
@@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server,
server
->
nfs_client
=
clp
;
/* Initialise the client representation from the mount data */
server
->
flags
=
data
->
flags
&
NFS_MOUNT_FLAGMASK
;
server
->
flags
=
data
->
flags
;
if
(
data
->
rsize
)
server
->
rsize
=
nfs_block_size
(
data
->
rsize
,
NULL
);
...
...
@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD
(
&
server
->
client_link
);
INIT_LIST_HEAD
(
&
server
->
master_link
);
init_waitqueue_head
(
&
server
->
active_wq
);
atomic_set
(
&
server
->
active
,
0
);
server
->
io_stats
=
nfs_alloc_iostats
();
...
...
@@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server,
goto
error
;
/* Initialise the client representation from the mount data */
server
->
flags
=
data
->
flags
&
NFS_MOUNT_FLAGMASK
;
server
->
flags
=
data
->
flags
;
server
->
caps
|=
NFS_CAP_ATOMIC_OPEN
;
if
(
data
->
rsize
)
...
...
fs/nfs/dir.c
浏览文件 @
6925bac1
...
...
@@ -156,6 +156,7 @@ typedef struct {
decode_dirent_t
decode
;
int
plus
;
unsigned
long
timestamp
;
unsigned
long
gencount
;
int
timestamp_valid
;
}
nfs_readdir_descriptor_t
;
...
...
@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
struct
file
*
file
=
desc
->
file
;
struct
inode
*
inode
=
file
->
f_path
.
dentry
->
d_inode
;
struct
rpc_cred
*
cred
=
nfs_file_cred
(
file
);
unsigned
long
timestamp
;
unsigned
long
timestamp
,
gencount
;
int
error
;
dfprintk
(
DIRCACHE
,
"NFS: %s: reading cookie %Lu into page %lu
\n
"
,
...
...
@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
again:
timestamp
=
jiffies
;
gencount
=
nfs_inc_attr_generation_counter
();
error
=
NFS_PROTO
(
inode
)
->
readdir
(
file
->
f_path
.
dentry
,
cred
,
desc
->
entry
->
cookie
,
page
,
NFS_SERVER
(
inode
)
->
dtsize
,
desc
->
plus
);
if
(
error
<
0
)
{
...
...
@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
goto
error
;
}
desc
->
timestamp
=
timestamp
;
desc
->
gencount
=
gencount
;
desc
->
timestamp_valid
=
1
;
SetPageUptodate
(
page
);
/* Ensure consistent page alignment of the data.
...
...
@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
if
(
IS_ERR
(
p
))
return
PTR_ERR
(
p
);
desc
->
ptr
=
p
;
if
(
desc
->
timestamp_valid
)
if
(
desc
->
timestamp_valid
)
{
desc
->
entry
->
fattr
->
time_start
=
desc
->
timestamp
;
else
desc
->
entry
->
fattr
->
gencount
=
desc
->
gencount
;
}
else
desc
->
entry
->
fattr
->
valid
&=
~
NFS_ATTR_FATTR
;
return
0
;
}
...
...
@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
struct
rpc_cred
*
cred
=
nfs_file_cred
(
file
);
struct
page
*
page
=
NULL
;
int
status
;
unsigned
long
timestamp
;
unsigned
long
timestamp
,
gencount
;
dfprintk
(
DIRCACHE
,
"NFS: uncached_readdir() searching for cookie %Lu
\n
"
,
(
unsigned
long
long
)
*
desc
->
dir_cookie
);
...
...
@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
goto
out
;
}
timestamp
=
jiffies
;
gencount
=
nfs_inc_attr_generation_counter
();
status
=
NFS_PROTO
(
inode
)
->
readdir
(
file
->
f_path
.
dentry
,
cred
,
*
desc
->
dir_cookie
,
page
,
NFS_SERVER
(
inode
)
->
dtsize
,
...
...
@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
desc
->
ptr
=
kmap
(
page
);
/* matching kunmap in nfs_do_filldir */
if
(
status
>=
0
)
{
desc
->
timestamp
=
timestamp
;
desc
->
gencount
=
gencount
;
desc
->
timestamp_valid
=
1
;
if
((
status
=
dir_decode
(
desc
))
==
0
)
desc
->
entry
->
prev_cookie
=
*
desc
->
dir_cookie
;
...
...
@@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
*/
void
nfs_force_lookup_revalidate
(
struct
inode
*
dir
)
{
NFS_I
(
dir
)
->
cache_change_attribute
=
jiffies
;
NFS_I
(
dir
)
->
cache_change_attribute
++
;
}
/*
...
...
@@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
{
if
(
IS_ROOT
(
dentry
))
return
1
;
if
(
NFS_SERVER
(
dir
)
->
flags
&
NFS_MOUNT_LOOKUP_CACHE_NONE
)
return
0
;
if
(
!
nfs_verify_change_attribute
(
dir
,
dentry
->
d_time
))
return
0
;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
...
...
@@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
/* Don't revalidate a negative dentry if we're creating a new file */
if
(
nd
!=
NULL
&&
nfs_lookup_check_intent
(
nd
,
LOOKUP_CREATE
)
!=
0
)
return
0
;
if
(
NFS_SERVER
(
dir
)
->
flags
&
NFS_MOUNT_LOOKUP_CACHE_NONEG
)
return
1
;
return
!
nfs_check_verifier
(
dir
,
dentry
);
}
...
...
fs/nfs/file.c
浏览文件 @
6925bac1
...
...
@@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
/* origin == SEEK_END => we must revalidate the cached file length */
if
(
origin
==
SEEK_END
)
{
struct
inode
*
inode
=
filp
->
f_mapping
->
host
;
int
retval
=
nfs_revalidate_file_size
(
inode
,
filp
);
if
(
retval
<
0
)
return
(
loff_t
)
retval
;
}
lock_kernel
();
/* BKL needed? */
spin_lock
(
&
inode
->
i_lock
);
loff
=
generic_file_llseek_unlocked
(
filp
,
offset
,
origin
);
spin_unlock
(
&
inode
->
i_lock
);
}
else
loff
=
generic_file_llseek_unlocked
(
filp
,
offset
,
origin
);
unlock_kernel
();
return
loff
;
}
...
...
@@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
filp
->
f_path
.
dentry
->
d_name
.
name
,
fl
->
fl_type
,
fl
->
fl_flags
);
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
* using ((u32) filp | 0x80000000) or some such as the pid.
* Not sure whether that would be unique, though, or whether
* that would break in other places.
*/
if
(
!
(
fl
->
fl_flags
&
FL_FLOCK
))
return
-
ENOLCK
;
...
...
fs/nfs/inode.c
浏览文件 @
6925bac1
...
...
@@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
init_special_inode
(
inode
,
inode
->
i_mode
,
fattr
->
rdev
);
nfsi
->
read_cache_jiffies
=
fattr
->
time_start
;
nfsi
->
last_updated
=
now
;
nfsi
->
cache_change_attribute
=
now
;
nfsi
->
attr_gencount
=
fattr
->
gencount
;
inode
->
i_atime
=
fattr
->
atime
;
inode
->
i_mtime
=
fattr
->
mtime
;
inode
->
i_ctime
=
fattr
->
ctime
;
...
...
@@ -453,6 +452,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
void
nfs_setattr_update_inode
(
struct
inode
*
inode
,
struct
iattr
*
attr
)
{
if
((
attr
->
ia_valid
&
(
ATTR_MODE
|
ATTR_UID
|
ATTR_GID
))
!=
0
)
{
spin_lock
(
&
inode
->
i_lock
);
if
((
attr
->
ia_valid
&
ATTR_MODE
)
!=
0
)
{
int
mode
=
attr
->
ia_mode
&
S_IALLUGO
;
mode
|=
inode
->
i_mode
&
~
S_IALLUGO
;
...
...
@@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode
->
i_uid
=
attr
->
ia_uid
;
if
((
attr
->
ia_valid
&
ATTR_GID
)
!=
0
)
inode
->
i_gid
=
attr
->
ia_gid
;
spin_lock
(
&
inode
->
i_lock
);
NFS_I
(
inode
)
->
cache_validity
|=
NFS_INO_INVALID_ACCESS
|
NFS_INO_INVALID_ACL
;
spin_unlock
(
&
inode
->
i_lock
);
}
...
...
@@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
}
}
static
int
nfs_wait_schedule
(
void
*
word
)
{
if
(
signal_pending
(
current
))
return
-
ERESTARTSYS
;
schedule
();
return
0
;
}
/*
* Wait for the inode to get unlocked.
*/
static
int
nfs_wait_on_inode
(
struct
inode
*
inode
)
{
struct
nfs_inode
*
nfsi
=
NFS_I
(
inode
);
int
error
;
error
=
wait_on_bit_lock
(
&
nfsi
->
flags
,
NFS_INO_REVALIDATING
,
nfs_wait_schedule
,
TASK_KILLABLE
);
return
error
;
}
static
void
nfs_wake_up_inode
(
struct
inode
*
inode
)
{
struct
nfs_inode
*
nfsi
=
NFS_I
(
inode
);
clear_bit
(
NFS_INO_REVALIDATING
,
&
nfsi
->
flags
);
smp_mb__after_clear_bit
();
wake_up_bit
(
&
nfsi
->
flags
,
NFS_INO_REVALIDATING
);
}
int
nfs_getattr
(
struct
vfsmount
*
mnt
,
struct
dentry
*
dentry
,
struct
kstat
*
stat
)
{
struct
inode
*
inode
=
dentry
->
d_inode
;
...
...
@@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
dfprintk
(
PAGECACHE
,
"NFS: revalidating (%s/%Ld)
\n
"
,
inode
->
i_sb
->
s_id
,
(
long
long
)
NFS_FILEID
(
inode
));
nfs_inc_stats
(
inode
,
NFSIOS_INODEREVALIDATE
);
if
(
is_bad_inode
(
inode
))
goto
out_nowai
t
;
goto
ou
t
;
if
(
NFS_STALE
(
inode
))
goto
out_nowait
;
status
=
nfs_wait_on_inode
(
inode
);
if
(
status
<
0
)
goto
out
;
status
=
-
ESTALE
;
if
(
NFS_STALE
(
inode
))
goto
out
;
nfs_inc_stats
(
inode
,
NFSIOS_INODEREVALIDATE
);
status
=
NFS_PROTO
(
inode
)
->
getattr
(
server
,
NFS_FH
(
inode
),
&
fattr
);
if
(
status
!=
0
)
{
dfprintk
(
PAGECACHE
,
"nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d
\n
"
,
...
...
@@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
goto
out
;
}
spin_lock
(
&
inode
->
i_lock
);
status
=
nfs_update_inode
(
inode
,
&
fattr
);
status
=
nfs_refresh_inode
(
inode
,
&
fattr
);
if
(
status
)
{
spin_unlock
(
&
inode
->
i_lock
);
dfprintk
(
PAGECACHE
,
"nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d
\n
"
,
inode
->
i_sb
->
s_id
,
(
long
long
)
NFS_FILEID
(
inode
),
status
);
goto
out
;
}
spin_unlock
(
&
inode
->
i_lock
);
if
(
nfsi
->
cache_validity
&
NFS_INO_INVALID_ACL
)
nfs_zap_acl_cache
(
inode
);
...
...
@@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
(
long
long
)
NFS_FILEID
(
inode
));
out:
nfs_wake_up_inode
(
inode
);
out_nowait:
return
status
;
}
...
...
@@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return
-
EIO
;
}
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode
(
inode
,
fattr
);
if
((
fattr
->
valid
&
NFS_ATTR_FATTR_V4
)
!=
0
&&
nfsi
->
change_attr
!=
fattr
->
change_attr
)
invalid
|=
NFS_INO_INVALID_ATTR
|
NFS_INO_REVAL_PAGECACHE
;
...
...
@@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if
(
invalid
!=
0
)
nfsi
->
cache_validity
|=
invalid
;
else
nfsi
->
cache_validity
&=
~
(
NFS_INO_INVALID_ATTR
|
NFS_INO_INVALID_ATIME
|
NFS_INO_REVAL_PAGECACHE
);
nfsi
->
read_cache_jiffies
=
fattr
->
time_start
;
return
0
;
}
static
int
nfs_ctime_need_update
(
const
struct
inode
*
inode
,
const
struct
nfs_fattr
*
fattr
)
{
return
timespec_compare
(
&
fattr
->
ctime
,
&
inode
->
i_ctime
)
>
0
;
}
static
int
nfs_size_need_update
(
const
struct
inode
*
inode
,
const
struct
nfs_fattr
*
fattr
)
{
return
nfs_size_to_loff_t
(
fattr
->
size
)
>
i_size_read
(
inode
);
}
static
unsigned
long
nfs_attr_generation_counter
;
static
unsigned
long
nfs_read_attr_generation_counter
(
void
)
{
smp_rmb
();
return
nfs_attr_generation_counter
;
}
unsigned
long
nfs_inc_attr_generation_counter
(
void
)
{
unsigned
long
ret
;
smp_rmb
();
ret
=
++
nfs_attr_generation_counter
;
smp_wmb
();
return
ret
;
}
void
nfs_fattr_init
(
struct
nfs_fattr
*
fattr
)
{
fattr
->
valid
=
0
;
fattr
->
time_start
=
jiffies
;
fattr
->
gencount
=
nfs_inc_attr_generation_counter
();
}
/**
* nfs_inode_attrs_need_update - check if the inode attributes need updating
* @inode - pointer to inode
* @fattr - attributes
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
*
* To do so, the function first assumes that a more recent ctime means
* that the attributes in fattr are newer, however it also attempt to
* catch the case where ctime either didn't change, or went backwards
* (if someone reset the clock on the server) by looking at whether
* or not this RPC call was started after the inode was last updated.
* Note also the check for wraparound of 'attr_gencount'
*
* The function returns 'true' if it thinks the attributes in 'fattr' are
* more recent than the ones cached in the inode.
*
*/
static
int
nfs_inode_attrs_need_update
(
const
struct
inode
*
inode
,
const
struct
nfs_fattr
*
fattr
)
{
const
struct
nfs_inode
*
nfsi
=
NFS_I
(
inode
);
return
((
long
)
fattr
->
gencount
-
(
long
)
nfsi
->
attr_gencount
)
>
0
||
nfs_ctime_need_update
(
inode
,
fattr
)
||
nfs_size_need_update
(
inode
,
fattr
)
||
((
long
)
nfsi
->
attr_gencount
-
(
long
)
nfs_read_attr_generation_counter
()
>
0
);
}
static
int
nfs_refresh_inode_locked
(
struct
inode
*
inode
,
struct
nfs_fattr
*
fattr
)
{
if
(
nfs_inode_attrs_need_update
(
inode
,
fattr
))
return
nfs_update_inode
(
inode
,
fattr
);
return
nfs_check_inode_attributes
(
inode
,
fattr
);
}
/**
* nfs_refresh_inode - try to update the inode attribute cache
* @inode - pointer to inode
...
...
@@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
*/
int
nfs_refresh_inode
(
struct
inode
*
inode
,
struct
nfs_fattr
*
fattr
)
{
struct
nfs_inode
*
nfsi
=
NFS_I
(
inode
);
int
status
;
if
((
fattr
->
valid
&
NFS_ATTR_FATTR
)
==
0
)
return
0
;
spin_lock
(
&
inode
->
i_lock
);
if
(
time_after
(
fattr
->
time_start
,
nfsi
->
last_updated
))
status
=
nfs_update_inode
(
inode
,
fattr
);
else
status
=
nfs_check_inode_attributes
(
inode
,
fattr
);
status
=
nfs_refresh_inode_locked
(
inode
,
fattr
);
spin_unlock
(
&
inode
->
i_lock
);
return
status
;
}
static
int
nfs_post_op_update_inode_locked
(
struct
inode
*
inode
,
struct
nfs_fattr
*
fattr
)
{
struct
nfs_inode
*
nfsi
=
NFS_I
(
inode
);
nfsi
->
cache_validity
|=
NFS_INO_INVALID_ATTR
|
NFS_INO_REVAL_PAGECACHE
;
if
(
S_ISDIR
(
inode
->
i_mode
))
nfsi
->
cache_validity
|=
NFS_INO_INVALID_DATA
;
if
((
fattr
->
valid
&
NFS_ATTR_FATTR
)
==
0
)
return
0
;
return
nfs_refresh_inode_locked
(
inode
,
fattr
);
}
/**
* nfs_post_op_update_inode - try to update the inode attribute cache
* @inode - pointer to inode
...
...
@@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
int
nfs_post_op_update_inode
(
struct
inode
*
inode
,
struct
nfs_fattr
*
fattr
)
{
struct
nfs_inode
*
nfsi
=
NFS_I
(
inode
)
;
int
status
;
spin_lock
(
&
inode
->
i_lock
);
nfsi
->
cache_validity
|=
NFS_INO_INVALID_ATTR
|
NFS_INO_REVAL_PAGECACHE
;
if
(
S_ISDIR
(
inode
->
i_mode
))
nfsi
->
cache_validity
|=
NFS_INO_INVALID_DATA
;
status
=
nfs_post_op_update_inode_locked
(
inode
,
fattr
);
spin_unlock
(
&
inode
->
i_lock
);
return
nfs_refresh_inode
(
inode
,
fattr
)
;
return
status
;
}
/**
...
...
@@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
int
nfs_post_op_update_inode_force_wcc
(
struct
inode
*
inode
,
struct
nfs_fattr
*
fattr
)
{
int
status
;
spin_lock
(
&
inode
->
i_lock
);
/* Don't do a WCC update if these attributes are already stale */
if
((
fattr
->
valid
&
NFS_ATTR_FATTR
)
==
0
||
!
nfs_inode_attrs_need_update
(
inode
,
fattr
))
{
fattr
->
valid
&=
~
(
NFS_ATTR_WCC_V4
|
NFS_ATTR_WCC
);
goto
out_noforce
;
}
if
((
fattr
->
valid
&
NFS_ATTR_FATTR_V4
)
!=
0
&&
(
fattr
->
valid
&
NFS_ATTR_WCC_V4
)
==
0
)
{
fattr
->
pre_change_attr
=
NFS_I
(
inode
)
->
change_attr
;
...
...
@@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
fattr
->
pre_size
=
i_size_read
(
inode
);
fattr
->
valid
|=
NFS_ATTR_WCC
;
}
return
nfs_post_op_update_inode
(
inode
,
fattr
);
out_noforce:
status
=
nfs_post_op_update_inode_locked
(
inode
,
fattr
);
spin_unlock
(
&
inode
->
i_lock
);
return
status
;
}
/*
...
...
@@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
/* If ctime has changed we should definitely clear access+acl caches */
if
(
!
timespec_equal
(
&
inode
->
i_ctime
,
&
fattr
->
ctime
))
invalid
|=
NFS_INO_INVALID_ACCESS
|
NFS_INO_INVALID_ACL
;
invalid
|=
NFS_INO_INVALID_A
TTR
|
NFS_INO_INVALID_A
CCESS
|
NFS_INO_INVALID_ACL
;
}
else
if
(
nfsi
->
change_attr
!=
fattr
->
change_attr
)
{
dprintk
(
"NFS: change_attr change on server for file %s/%ld
\n
"
,
inode
->
i_sb
->
s_id
,
inode
->
i_ino
);
...
...
@@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode
->
i_gid
!=
fattr
->
gid
)
invalid
|=
NFS_INO_INVALID_ATTR
|
NFS_INO_INVALID_ACCESS
|
NFS_INO_INVALID_ACL
;
if
(
inode
->
i_nlink
!=
fattr
->
nlink
)
invalid
|=
NFS_INO_INVALID_ATTR
;
inode
->
i_mode
=
fattr
->
mode
;
inode
->
i_nlink
=
fattr
->
nlink
;
inode
->
i_uid
=
fattr
->
uid
;
...
...
@@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_inc_stats
(
inode
,
NFSIOS_ATTRINVALIDATE
);
nfsi
->
attrtimeo
=
NFS_MINATTRTIMEO
(
inode
);
nfsi
->
attrtimeo_timestamp
=
now
;
nfsi
->
last_updated
=
now
;
nfsi
->
attr_gencount
=
nfs_inc_attr_generation_counter
()
;
}
else
{
if
(
!
time_in_range
(
now
,
nfsi
->
attrtimeo_timestamp
,
nfsi
->
attrtimeo_timestamp
+
nfsi
->
attrtimeo
))
{
if
((
nfsi
->
attrtimeo
<<=
1
)
>
NFS_MAXATTRTIMEO
(
inode
))
nfsi
->
attrtimeo
=
NFS_MAXATTRTIMEO
(
inode
);
nfsi
->
attrtimeo_timestamp
=
now
;
}
/*
* Avoid jiffy wraparound issues with nfsi->last_updated
*/
if
(
!
time_in_range
(
nfsi
->
last_updated
,
nfsi
->
read_cache_jiffies
,
now
))
nfsi
->
last_updated
=
nfsi
->
read_cache_jiffies
;
}
invalid
&=
~
NFS_INO_INVALID_ATTR
;
/* Don't invalidate the data if we were to blame */
...
...
fs/nfs/internal.h
浏览文件 @
6925bac1
...
...
@@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *);
void
nfs_zap_acl_cache
(
struct
inode
*
inode
);
/* super.c */
void
nfs_parse_ip_address
(
char
*
,
size_t
,
struct
sockaddr
*
,
size_t
*
);
extern
struct
file_system_type
nfs_xdev_fs_type
;
#ifdef CONFIG_NFS_V4
extern
struct
file_system_type
nfs4_xdev_fs_type
;
...
...
@@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat;
extern
int
__init
register_nfs_fs
(
void
);
extern
void
__exit
unregister_nfs_fs
(
void
);
extern
void
nfs_sb_active
(
struct
nfs_server
*
server
);
extern
void
nfs_sb_deactive
(
struct
nfs_server
*
server
);
extern
void
nfs_sb_active
(
struct
super_block
*
sb
);
extern
void
nfs_sb_deactive
(
struct
super_block
*
sb
);
/* namespace.c */
extern
char
*
nfs_path
(
const
char
*
base
,
...
...
@@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
PAGE_SIZE
-
1
)
>>
PAGE_SHIFT
;
}
#define IPV6_SCOPE_DELIMITER '%'
/*
* Set the port number in an address. Be agnostic about the address
* family.
*/
static
inline
void
nfs_set_port
(
struct
sockaddr
*
sap
,
unsigned
short
port
)
{
struct
sockaddr_in
*
ap
=
(
struct
sockaddr_in
*
)
sap
;
struct
sockaddr_in6
*
ap6
=
(
struct
sockaddr_in6
*
)
sap
;
switch
(
sap
->
sa_family
)
{
case
AF_INET
:
ap
->
sin_port
=
htons
(
port
);
break
;
case
AF_INET6
:
ap6
->
sin6_port
=
htons
(
port
);
break
;
}
}
fs/nfs/mount_clnt.c
浏览文件 @
6925bac1
...
...
@@ -14,6 +14,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs_fs.h>
#include "internal.h"
#ifdef RPC_DEBUG
# define NFSDBG_FACILITY NFSDBG_MOUNT
...
...
@@ -98,7 +99,7 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
out_mnt_err:
dprintk
(
"NFS: MNT server returned result %d
\n
"
,
result
.
status
);
status
=
-
EACCES
;
status
=
nfs_stat_to_errno
(
result
.
status
)
;
goto
out
;
}
...
...
fs/nfs/namespace.c
浏览文件 @
6925bac1
...
...
@@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
dprintk
(
"--> nfs_follow_mountpoint()
\n
"
);
BUG_ON
(
IS_ROOT
(
dentry
));
err
=
-
ESTALE
;
if
(
IS_ROOT
(
dentry
))
goto
out_err
;
dprintk
(
"%s: enter
\n
"
,
__func__
);
dput
(
nd
->
path
.
dentry
);
nd
->
path
.
dentry
=
dget
(
dentry
);
...
...
@@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
struct
nfs_clone_mount
*
mountdata
)
{
#ifdef CONFIG_NFS_V4
struct
vfsmount
*
mnt
=
NULL
;
struct
vfsmount
*
mnt
=
ERR_PTR
(
-
EINVAL
)
;
switch
(
server
->
nfs_client
->
rpc_ops
->
version
)
{
case
2
:
case
3
:
...
...
fs/nfs/nfs3acl.c
浏览文件 @
6925bac1
...
...
@@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
dprintk
(
"NFS call getacl
\n
"
);
msg
.
rpc_proc
=
&
server
->
client_acl
->
cl_procinfo
[
ACLPROC3_GETACL
];
nfs_fattr_init
(
&
fattr
);
status
=
rpc_call_sync
(
server
->
client_acl
,
&
msg
,
0
);
dprintk
(
"NFS reply getacl: %d
\n
"
,
status
);
...
...
@@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk
(
"NFS call setacl
\n
"
);
msg
.
rpc_proc
=
&
server
->
client_acl
->
cl_procinfo
[
ACLPROC3_SETACL
];
nfs_fattr_init
(
&
fattr
);
status
=
rpc_call_sync
(
server
->
client_acl
,
&
msg
,
0
);
nfs_access_zap_cache
(
inode
);
nfs_zap_acl_cache
(
inode
);
...
...
fs/nfs/nfs3proc.c
浏览文件 @
6925bac1
...
...
@@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
}
static
int
nfs3_proc_fsinfo
(
struct
nfs_server
*
server
,
struct
nfs_fh
*
fhandle
,
do_proc_fsinfo
(
struct
rpc_clnt
*
client
,
struct
nfs_fh
*
fhandle
,
struct
nfs_fsinfo
*
info
)
{
struct
rpc_message
msg
=
{
...
...
@@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk
(
"NFS call fsinfo
\n
"
);
nfs_fattr_init
(
info
->
fattr
);
status
=
rpc_call_sync
(
server
->
nfs_client
->
cl_rpc
client
,
&
msg
,
0
);
status
=
rpc_call_sync
(
client
,
&
msg
,
0
);
dprintk
(
"NFS reply fsinfo: %d
\n
"
,
status
);
return
status
;
}
/*
* Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via
* nfs_create_server
*/
static
int
nfs3_proc_fsinfo
(
struct
nfs_server
*
server
,
struct
nfs_fh
*
fhandle
,
struct
nfs_fsinfo
*
info
)
{
int
status
;
status
=
do_proc_fsinfo
(
server
->
client
,
fhandle
,
info
);
if
(
status
&&
server
->
nfs_client
->
cl_rpcclient
!=
server
->
client
)
status
=
do_proc_fsinfo
(
server
->
nfs_client
->
cl_rpcclient
,
fhandle
,
info
);
return
status
;
}
static
int
nfs3_proc_pathconf
(
struct
nfs_server
*
server
,
struct
nfs_fh
*
fhandle
,
struct
nfs_pathconf
*
info
)
...
...
fs/nfs/nfs4namespace.c
浏览文件 @
6925bac1
...
...
@@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
return
0
;
}
/*
* Check if the string represents a "valid" IPv4 address
*/
static
inline
int
valid_ipaddr4
(
const
char
*
buf
)
static
struct
vfsmount
*
try_location
(
struct
nfs_clone_mount
*
mountdata
,
char
*
page
,
char
*
page2
,
const
struct
nfs4_fs_location
*
location
)
{
int
rc
,
count
,
in
[
4
];
rc
=
sscanf
(
buf
,
"%d.%d.%d.%d"
,
&
in
[
0
],
&
in
[
1
],
&
in
[
2
],
&
in
[
3
]);
if
(
rc
!=
4
)
return
-
EINVAL
;
for
(
count
=
0
;
count
<
4
;
count
++
)
{
if
(
in
[
count
]
>
255
)
return
-
EINVAL
;
struct
vfsmount
*
mnt
=
ERR_PTR
(
-
ENOENT
);
char
*
mnt_path
;
int
page2len
;
unsigned
int
s
;
mnt_path
=
nfs4_pathname_string
(
&
location
->
rootpath
,
page2
,
PAGE_SIZE
);
if
(
IS_ERR
(
mnt_path
))
return
mnt
;
mountdata
->
mnt_path
=
mnt_path
;
page2
+=
strlen
(
mnt_path
)
+
1
;
page2len
=
PAGE_SIZE
-
strlen
(
mnt_path
)
-
1
;
for
(
s
=
0
;
s
<
location
->
nservers
;
s
++
)
{
const
struct
nfs4_string
*
buf
=
&
location
->
servers
[
s
];
struct
sockaddr_storage
addr
;
if
(
buf
->
len
<=
0
||
buf
->
len
>=
PAGE_SIZE
)
continue
;
mountdata
->
addr
=
(
struct
sockaddr
*
)
&
addr
;
if
(
memchr
(
buf
->
data
,
IPV6_SCOPE_DELIMITER
,
buf
->
len
))
continue
;
nfs_parse_ip_address
(
buf
->
data
,
buf
->
len
,
mountdata
->
addr
,
&
mountdata
->
addrlen
);
if
(
mountdata
->
addr
->
sa_family
==
AF_UNSPEC
)
continue
;
nfs_set_port
(
mountdata
->
addr
,
NFS_PORT
);
strncpy
(
page2
,
buf
->
data
,
page2len
);
page2
[
page2len
]
=
'\0'
;
mountdata
->
hostname
=
page2
;
snprintf
(
page
,
PAGE_SIZE
,
"%s:%s"
,
mountdata
->
hostname
,
mountdata
->
mnt_path
);
mnt
=
vfs_kern_mount
(
&
nfs4_referral_fs_type
,
0
,
page
,
mountdata
);
if
(
!
IS_ERR
(
mnt
))
break
;
}
return
0
;
return
mnt
;
}
/**
...
...
@@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
.
authflavor
=
NFS_SB
(
mnt_parent
->
mnt_sb
)
->
client
->
cl_auth
->
au_flavor
,
};
char
*
page
=
NULL
,
*
page2
=
NULL
;
unsigned
int
s
;
int
loc
,
error
;
if
(
locations
==
NULL
||
locations
->
nlocations
<=
0
)
...
...
@@ -152,54 +182,17 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
goto
out
;
}
loc
=
0
;
while
(
loc
<
locations
->
nlocations
&&
IS_ERR
(
mnt
))
{
for
(
loc
=
0
;
loc
<
locations
->
nlocations
;
loc
++
)
{
const
struct
nfs4_fs_location
*
location
=
&
locations
->
locations
[
loc
];
char
*
mnt_path
;
if
(
location
==
NULL
||
location
->
nservers
<=
0
||
location
->
rootpath
.
ncomponents
==
0
)
{
loc
++
;
continue
;
}
mnt_path
=
nfs4_pathname_string
(
&
location
->
rootpath
,
page2
,
PAGE_SIZE
);
if
(
IS_ERR
(
mnt_path
))
{
loc
++
;
location
->
rootpath
.
ncomponents
==
0
)
continue
;
}
mountdata
.
mnt_path
=
mnt_path
;
s
=
0
;
while
(
s
<
location
->
nservers
)
{
struct
sockaddr_in
addr
=
{
.
sin_family
=
AF_INET
,
.
sin_port
=
htons
(
NFS_PORT
),
};
if
(
location
->
servers
[
s
].
len
<=
0
||
valid_ipaddr4
(
location
->
servers
[
s
].
data
)
<
0
)
{
s
++
;
continue
;
}
mountdata
.
hostname
=
location
->
servers
[
s
].
data
;
addr
.
sin_addr
.
s_addr
=
in_aton
(
mountdata
.
hostname
),
mountdata
.
addr
=
(
struct
sockaddr
*
)
&
addr
;
mountdata
.
addrlen
=
sizeof
(
addr
);
snprintf
(
page
,
PAGE_SIZE
,
"%s:%s"
,
mountdata
.
hostname
,
mountdata
.
mnt_path
);
mnt
=
vfs_kern_mount
(
&
nfs4_referral_fs_type
,
0
,
page
,
&
mountdata
);
if
(
!
IS_ERR
(
mnt
))
{
mnt
=
try_location
(
&
mountdata
,
page
,
page2
,
location
);
if
(
!
IS_ERR
(
mnt
))
break
;
}
s
++
;
}
loc
++
;
}
out:
free_page
((
unsigned
long
)
page
);
...
...
fs/nfs/proc.c
浏览文件 @
6925bac1
...
...
@@ -65,6 +65,9 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk
(
"%s: call getattr
\n
"
,
__func__
);
nfs_fattr_init
(
fattr
);
status
=
rpc_call_sync
(
server
->
client
,
&
msg
,
0
);
/* Retry with default authentication if different */
if
(
status
&&
server
->
nfs_client
->
cl_rpcclient
!=
server
->
client
)
status
=
rpc_call_sync
(
server
->
nfs_client
->
cl_rpcclient
,
&
msg
,
0
);
dprintk
(
"%s: reply getattr: %d
\n
"
,
__func__
,
status
);
if
(
status
)
...
...
@@ -72,6 +75,9 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk
(
"%s: call statfs
\n
"
,
__func__
);
msg
.
rpc_proc
=
&
nfs_procedures
[
NFSPROC_STATFS
];
msg
.
rpc_resp
=
&
fsinfo
;
status
=
rpc_call_sync
(
server
->
client
,
&
msg
,
0
);
/* Retry with default authentication if different */
if
(
status
&&
server
->
nfs_client
->
cl_rpcclient
!=
server
->
client
)
status
=
rpc_call_sync
(
server
->
nfs_client
->
cl_rpcclient
,
&
msg
,
0
);
dprintk
(
"%s: reply statfs: %d
\n
"
,
__func__
,
status
);
if
(
status
)
...
...
fs/nfs/super.c
浏览文件 @
6925bac1
...
...
@@ -91,6 +91,7 @@ enum {
/* Mount options that take string arguments */
Opt_sec
,
Opt_proto
,
Opt_mountproto
,
Opt_mounthost
,
Opt_addr
,
Opt_mountaddr
,
Opt_clientaddr
,
Opt_lookupcache
,
/* Special mount options */
Opt_userspace
,
Opt_deprecated
,
Opt_sloppy
,
...
...
@@ -154,6 +155,8 @@ static const match_table_t nfs_mount_option_tokens = {
{
Opt_mounthost
,
"mounthost=%s"
},
{
Opt_mountaddr
,
"mountaddr=%s"
},
{
Opt_lookupcache
,
"lookupcache=%s"
},
{
Opt_err
,
NULL
}
};
...
...
@@ -200,6 +203,22 @@ static const match_table_t nfs_secflavor_tokens = {
{
Opt_sec_err
,
NULL
}
};
enum
{
Opt_lookupcache_all
,
Opt_lookupcache_positive
,
Opt_lookupcache_none
,
Opt_lookupcache_err
};
static
match_table_t
nfs_lookupcache_tokens
=
{
{
Opt_lookupcache_all
,
"all"
},
{
Opt_lookupcache_positive
,
"pos"
},
{
Opt_lookupcache_positive
,
"positive"
},
{
Opt_lookupcache_none
,
"none"
},
{
Opt_lookupcache_err
,
NULL
}
};
static
void
nfs_umount_begin
(
struct
super_block
*
);
static
int
nfs_statfs
(
struct
dentry
*
,
struct
kstatfs
*
);
...
...
@@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru
static
int
nfs_xdev_get_sb
(
struct
file_system_type
*
fs_type
,
int
flags
,
const
char
*
dev_name
,
void
*
raw_data
,
struct
vfsmount
*
mnt
);
static
void
nfs_kill_super
(
struct
super_block
*
);
static
void
nfs_put_super
(
struct
super_block
*
);
static
int
nfs_remount
(
struct
super_block
*
sb
,
int
*
flags
,
char
*
raw_data
);
static
struct
file_system_type
nfs_fs_type
=
{
...
...
@@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = {
.
alloc_inode
=
nfs_alloc_inode
,
.
destroy_inode
=
nfs_destroy_inode
,
.
write_inode
=
nfs_write_inode
,
.
put_super
=
nfs_put_super
,
.
statfs
=
nfs_statfs
,
.
clear_inode
=
nfs_clear_inode
,
.
umount_begin
=
nfs_umount_begin
,
...
...
@@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void)
unregister_filesystem
(
&
nfs_fs_type
);
}
void
nfs_sb_active
(
struct
nfs_server
*
server
)
void
nfs_sb_active
(
struct
super_block
*
sb
)
{
atomic_inc
(
&
server
->
active
);
}
struct
nfs_server
*
server
=
NFS_SB
(
sb
);
void
nfs_sb_deactive
(
struct
nfs_server
*
server
)
{
if
(
atomic_dec_and_test
(
&
server
->
active
))
wake_up
(
&
server
->
active_wq
);
if
(
atomic_inc_return
(
&
server
->
active
)
==
1
)
atomic_inc
(
&
sb
->
s_active
);
}
static
void
nfs_put_super
(
struct
super_block
*
sb
)
void
nfs_sb_deactive
(
struct
super_block
*
sb
)
{
struct
nfs_server
*
server
=
NFS_SB
(
sb
);
/*
* Make sure there are no outstanding ops to this server.
* If so, wait for them to finish before allowing the
* unmount to continue.
*/
wait_event
(
server
->
active_wq
,
atomic_read
(
&
server
->
active
)
==
0
);
if
(
atomic_dec_and_test
(
&
server
->
active
))
deactivate_super
(
sb
);
}
/*
...
...
@@ -663,25 +674,6 @@ static void nfs_umount_begin(struct super_block *sb)
rpc_killall_tasks
(
rpc
);
}
/*
* Set the port number in an address. Be agnostic about the address family.
*/
static
void
nfs_set_port
(
struct
sockaddr
*
sap
,
unsigned
short
port
)
{
switch
(
sap
->
sa_family
)
{
case
AF_INET
:
{
struct
sockaddr_in
*
ap
=
(
struct
sockaddr_in
*
)
sap
;
ap
->
sin_port
=
htons
(
port
);
break
;
}
case
AF_INET6
:
{
struct
sockaddr_in6
*
ap
=
(
struct
sockaddr_in6
*
)
sap
;
ap
->
sin6_port
=
htons
(
port
);
break
;
}
}
}
/*
* Sanity-check a server address provided by the mount command.
*
...
...
@@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len,
*
addr_len
=
0
;
}
#define IPV6_SCOPE_DELIMITER '%'
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static
void
nfs_parse_ipv6_scope_id
(
const
char
*
string
,
const
size_t
str_len
,
static
int
nfs_parse_ipv6_scope_id
(
const
char
*
string
,
const
size_t
str_len
,
const
char
*
delim
,
struct
sockaddr_in6
*
sin6
)
{
char
*
p
;
size_t
len
;
if
(
!
(
ipv6_addr_type
(
&
sin6
->
sin6_addr
)
&
IPV6_ADDR_LINKLOCAL
))
return
;
if
((
string
+
str_len
)
==
delim
)
return
1
;
if
(
*
delim
!=
IPV6_SCOPE_DELIMITER
)
return
;
return
0
;
if
(
!
(
ipv6_addr_type
(
&
sin6
->
sin6_addr
)
&
IPV6_ADDR_LINKLOCAL
))
return
0
;
len
=
(
string
+
str_len
)
-
delim
-
1
;
p
=
kstrndup
(
delim
+
1
,
len
,
GFP_KERNEL
);
...
...
@@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
scope_id
=
dev
->
ifindex
;
dev_put
(
dev
);
}
else
{
/* scope_id is set to zero on error */
strict_strtoul
(
p
,
10
,
&
scope_id
);
if
(
strict_strtoul
(
p
,
10
,
&
scope_id
)
==
0
)
{
kfree
(
p
);
return
0
;
}
}
kfree
(
p
);
sin6
->
sin6_scope_id
=
scope_id
;
dfprintk
(
MOUNT
,
"NFS: IPv6 scope ID = %lu
\n
"
,
scope_id
);
return
1
;
}
return
0
;
}
static
void
nfs_parse_ipv6_address
(
char
*
string
,
size_t
str_len
,
...
...
@@ -773,8 +773,10 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len,
sin6
->
sin6_family
=
AF_INET6
;
*
addr_len
=
sizeof
(
*
sin6
);
if
(
in6_pton
(
string
,
str_len
,
addr
,
IPV6_SCOPE_DELIMITER
,
&
delim
))
{
nfs_parse_ipv6_scope_id
(
string
,
str_len
,
delim
,
sin6
);
if
(
in6_pton
(
string
,
str_len
,
addr
,
IPV6_SCOPE_DELIMITER
,
&
delim
)
!=
0
)
{
if
(
nfs_parse_ipv6_scope_id
(
string
,
str_len
,
delim
,
sin6
)
!=
0
)
return
;
}
}
...
...
@@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len,
* If there is a problem constructing the new sockaddr, set the address
* family to AF_UNSPEC.
*/
static
void
nfs_parse_ip_address
(
char
*
string
,
size_t
str_len
,
void
nfs_parse_ip_address
(
char
*
string
,
size_t
str_len
,
struct
sockaddr
*
sap
,
size_t
*
addr_len
)
{
unsigned
int
i
,
colons
;
...
...
@@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw,
&
mnt
->
mount_server
.
addrlen
);
kfree
(
string
);
break
;
case
Opt_lookupcache
:
string
=
match_strdup
(
args
);
if
(
string
==
NULL
)
goto
out_nomem
;
token
=
match_token
(
string
,
nfs_lookupcache_tokens
,
args
);
kfree
(
string
);
switch
(
token
)
{
case
Opt_lookupcache_all
:
mnt
->
flags
&=
~
(
NFS_MOUNT_LOOKUP_CACHE_NONEG
|
NFS_MOUNT_LOOKUP_CACHE_NONE
);
break
;
case
Opt_lookupcache_positive
:
mnt
->
flags
&=
~
NFS_MOUNT_LOOKUP_CACHE_NONE
;
mnt
->
flags
|=
NFS_MOUNT_LOOKUP_CACHE_NONEG
;
break
;
case
Opt_lookupcache_none
:
mnt
->
flags
|=
NFS_MOUNT_LOOKUP_CACHE_NONEG
|
NFS_MOUNT_LOOKUP_CACHE_NONE
;
break
;
default:
errors
++
;
dfprintk
(
MOUNT
,
"NFS: invalid "
"lookupcache argument
\n
"
);
};
break
;
/*
* Special options
...
...
@@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options,
* Translate to nfs_parsed_mount_data, which nfs_fill_super
* can deal with.
*/
args
->
flags
=
data
->
flags
;
args
->
flags
=
data
->
flags
&
NFS_MOUNT_FLAGMASK
;
args
->
rsize
=
data
->
rsize
;
args
->
wsize
=
data
->
wsize
;
args
->
timeo
=
data
->
timeo
;
...
...
fs/nfs/unlink.c
浏览文件 @
6925bac1
...
...
@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata)
nfs_dec_sillycount
(
data
->
dir
);
nfs_free_unlinkdata
(
data
);
nfs_sb_deactive
(
NFS_SB
(
sb
)
);
nfs_sb_deactive
(
sb
);
}
static
const
struct
rpc_call_ops
nfs_unlink_ops
=
{
...
...
@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
.
rpc_message
=
&
msg
,
.
callback_ops
=
&
nfs_unlink_ops
,
.
callback_data
=
data
,
.
workqueue
=
nfsiod_workqueue
,
.
flags
=
RPC_TASK_ASYNC
,
};
struct
rpc_task
*
task
;
...
...
@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
nfs_dec_sillycount
(
dir
);
return
0
;
}
nfs_sb_active
(
NFS_SERVER
(
dir
)
);
nfs_sb_active
(
dir
->
i_sb
);
data
->
args
.
fh
=
NFS_FH
(
dir
);
nfs_fattr_init
(
&
data
->
res
.
dir_attr
);
...
...
fs/nfs/write.c
浏览文件 @
6925bac1
...
...
@@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
.
bdi
=
mapping
->
backing_dev_info
,
.
sync_mode
=
WB_SYNC_NONE
,
.
nr_to_write
=
LONG_MAX
,
.
range_start
=
0
,
.
range_end
=
LLONG_MAX
,
.
for_writepages
=
1
,
.
range_cyclic
=
1
,
};
int
ret
;
...
...
include/linux/nfs_fs.h
浏览文件 @
6925bac1
...
...
@@ -137,7 +137,7 @@ struct nfs_inode {
unsigned
long
attrtimeo_timestamp
;
__u64
change_attr
;
/* v4 only */
unsigned
long
last_updated
;
unsigned
long
attr_gencount
;
/* "Generation counter" for the attribute cache. This is
* bumped whenever we update the metadata on the
* server.
...
...
@@ -200,11 +200,10 @@ struct nfs_inode {
/*
* Bit offsets in flags field
*/
#define NFS_INO_REVALIDATING (0)
/* revalidating attrs */
#define NFS_INO_ADVISE_RDPLUS (1)
/* advise readdirplus */
#define NFS_INO_STALE (2)
/* possible stale inode */
#define NFS_INO_ACL_LRU_SET (3)
/* Inode is on the LRU list */
#define NFS_INO_MOUNTPOINT (4)
/* inode is remote mountpoint */
#define NFS_INO_ADVISE_RDPLUS (0)
/* advise readdirplus */
#define NFS_INO_STALE (1)
/* possible stale inode */
#define NFS_INO_ACL_LRU_SET (2)
/* Inode is on the LRU list */
#define NFS_INO_MOUNTPOINT (3)
/* inode is remote mountpoint */
static
inline
struct
nfs_inode
*
NFS_I
(
const
struct
inode
*
inode
)
{
...
...
@@ -345,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct
extern
void
put_nfs_open_context
(
struct
nfs_open_context
*
ctx
);
extern
struct
nfs_open_context
*
nfs_find_open_context
(
struct
inode
*
inode
,
struct
rpc_cred
*
cred
,
int
mode
);
extern
u64
nfs_compat_user_ino64
(
u64
fileid
);
extern
void
nfs_fattr_init
(
struct
nfs_fattr
*
fattr
);
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
extern
__be32
root_nfs_parse_addr
(
char
*
name
);
/*__init*/
static
inline
void
nfs_fattr_init
(
struct
nfs_fattr
*
fattr
)
{
fattr
->
valid
=
0
;
fattr
->
time_start
=
jiffies
;
}
extern
unsigned
long
nfs_inc_attr_generation_counter
(
void
);
/*
* linux/fs/nfs/file.c
...
...
include/linux/nfs_fs_sb.h
浏览文件 @
6925bac1
...
...
@@ -119,7 +119,6 @@ struct nfs_server {
void
(
*
destroy
)(
struct
nfs_server
*
);
atomic_t
active
;
/* Keep trace of any activity to this server */
wait_queue_head_t
active_wq
;
/* Wait for any activity to stop */
/* mountd-related mount options */
struct
sockaddr_storage
mountd_address
;
...
...
include/linux/nfs_mount.h
浏览文件 @
6925bac1
...
...
@@ -65,4 +65,8 @@ struct nfs_mount_data {
#define NFS_MOUNT_UNSHARED 0x8000
/* 5 */
#define NFS_MOUNT_FLAGMASK 0xFFFF
/* The following are for internal use only */
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
#endif
include/linux/nfs_xdr.h
浏览文件 @
6925bac1
...
...
@@ -36,6 +36,7 @@ struct nfs_fattr {
__u32
nlink
;
__u32
uid
;
__u32
gid
;
dev_t
rdev
;
__u64
size
;
union
{
struct
{
...
...
@@ -46,7 +47,6 @@ struct nfs_fattr {
__u64
used
;
}
nfs3
;
}
du
;
dev_t
rdev
;
struct
nfs_fsid
fsid
;
__u64
fileid
;
struct
timespec
atime
;
...
...
@@ -56,6 +56,7 @@ struct nfs_fattr {
__u64
change_attr
;
/* NFSv4 change attribute */
__u64
pre_change_attr
;
/* pre-op NFSv4 change attribute */
unsigned
long
time_start
;
unsigned
long
gencount
;
};
#define NFS_ATTR_WCC 0x0001
/* pre-op WCC data */
...
...
@@ -672,16 +673,16 @@ struct nfs4_rename_res {
struct
nfs_fattr
*
new_fattr
;
};
#define NFS4_SETCLIENTID_NAMELEN (
56
)
#define NFS4_SETCLIENTID_NAMELEN (
127
)
struct
nfs4_setclientid
{
const
nfs4_verifier
*
sc_verifier
;
unsigned
int
sc_name_len
;
char
sc_name
[
NFS4_SETCLIENTID_NAMELEN
];
char
sc_name
[
NFS4_SETCLIENTID_NAMELEN
+
1
];
u32
sc_prog
;
unsigned
int
sc_netid_len
;
char
sc_netid
[
RPCBIND_MAXNETIDLEN
];
char
sc_netid
[
RPCBIND_MAXNETIDLEN
+
1
];
unsigned
int
sc_uaddr_len
;
char
sc_uaddr
[
RPCBIND_MAXUADDRLEN
];
char
sc_uaddr
[
RPCBIND_MAXUADDRLEN
+
1
];
u32
sc_cb_ident
;
};
...
...
include/linux/sunrpc/xprtrdma.h
浏览文件 @
6925bac1
...
...
@@ -66,9 +66,6 @@
#define RPCRDMA_INLINE_PAD_THRESH (512)
/* payload threshold to pad (bytes) */
#define RDMA_RESOLVE_TIMEOUT (5*HZ)
/* TBD 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2)
/* retries if no listener backlog */
/* memory registration strategies */
#define RPCRDMA_PERSISTENT_REGISTRATION (1)
...
...
@@ -78,6 +75,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS
,
RPCRDMA_MEMWINDOWS_ASYNC
,
RPCRDMA_MTHCAFMR
,
RPCRDMA_FRMR
,
RPCRDMA_ALLPHYSICAL
,
RPCRDMA_LAST
};
...
...
net/sunrpc/clnt.c
浏览文件 @
6925bac1
...
...
@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
}
/* save the nodename */
clnt
->
cl_nodelen
=
strlen
(
utsname
()
->
nodename
);
clnt
->
cl_nodelen
=
strlen
(
init_
utsname
()
->
nodename
);
if
(
clnt
->
cl_nodelen
>
UNX_MAXNODENAME
)
clnt
->
cl_nodelen
=
UNX_MAXNODENAME
;
memcpy
(
clnt
->
cl_nodename
,
utsname
()
->
nodename
,
clnt
->
cl_nodelen
);
memcpy
(
clnt
->
cl_nodename
,
init_
utsname
()
->
nodename
,
clnt
->
cl_nodelen
);
rpc_register_client
(
clnt
);
return
clnt
;
...
...
net/sunrpc/rpcb_clnt.c
浏览文件 @
6925bac1
...
...
@@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
return
rpc_run_task
(
&
task_setup_data
);
}
/*
* In the case where rpc clients have been cloned, we want to make
* sure that we use the program number/version etc of the actual
* owner of the xprt. To do so, we walk back up the tree of parents
* to find whoever created the transport and/or whoever has the
* autobind flag set.
*/
static
struct
rpc_clnt
*
rpcb_find_transport_owner
(
struct
rpc_clnt
*
clnt
)
{
struct
rpc_clnt
*
parent
=
clnt
->
cl_parent
;
while
(
parent
!=
clnt
)
{
if
(
parent
->
cl_xprt
!=
clnt
->
cl_xprt
)
break
;
if
(
clnt
->
cl_autobind
)
break
;
clnt
=
parent
;
parent
=
parent
->
cl_parent
;
}
return
clnt
;
}
/**
* rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request
...
...
@@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
*/
void
rpcb_getport_async
(
struct
rpc_task
*
task
)
{
struct
rpc_clnt
*
clnt
=
task
->
tk_client
;
struct
rpc_clnt
*
clnt
;
struct
rpc_procinfo
*
proc
;
u32
bind_version
;
struct
rpc_xprt
*
xprt
=
task
->
tk_xprt
;
struct
rpc_xprt
*
xprt
;
struct
rpc_clnt
*
rpcb_clnt
;
static
struct
rpcbind_args
*
map
;
struct
rpc_task
*
child
;
...
...
@@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task)
size_t
salen
;
int
status
;
clnt
=
rpcb_find_transport_owner
(
task
->
tk_client
);
xprt
=
clnt
->
cl_xprt
;
dprintk
(
"RPC: %5u %s(%s, %u, %u, %d)
\n
"
,
task
->
tk_pid
,
__func__
,
clnt
->
cl_server
,
clnt
->
cl_prog
,
clnt
->
cl_vers
,
xprt
->
prot
);
/* Autobind on cloned rpc clients is discouraged */
BUG_ON
(
clnt
->
cl_parent
!=
clnt
);
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
rpc_sleep_on
(
&
xprt
->
binding
,
task
,
NULL
);
...
...
@@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task)
status
=
-
ENOMEM
;
dprintk
(
"RPC: %5u %s: no memory available
\n
"
,
task
->
tk_pid
,
__func__
);
goto
bailout_
nofree
;
goto
bailout_
release_client
;
}
map
->
r_prog
=
clnt
->
cl_prog
;
map
->
r_vers
=
clnt
->
cl_vers
;
...
...
@@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task)
task
->
tk_pid
,
__func__
);
return
;
}
rpc_put_task
(
child
);
task
->
tk_xprt
->
stat
.
bind_count
++
;
xprt
->
stat
.
bind_count
++
;
rpc_put_task
(
child
);
return
;
bailout_release_client:
rpc_release_client
(
rpcb_clnt
);
bailout_nofree:
rpcb_wake_rpcbind_waiters
(
xprt
,
status
);
task
->
tk_status
=
status
;
...
...
net/sunrpc/xprt.c
浏览文件 @
6925bac1
...
...
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
goto
out
;
}
result
=
-
EINVAL
;
if
(
try_module_get
(
THIS_MODULE
))
{
list_add_tail
(
&
transport
->
list
,
&
xprt_list
);
printk
(
KERN_INFO
"RPC: Registered %s transport module.
\n
"
,
transport
->
name
);
result
=
0
;
}
out:
spin_unlock
(
&
xprt_list_lock
);
...
...
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
"RPC: Unregistered %s transport module.
\n
"
,
transport
->
name
);
list_del_init
(
&
transport
->
list
);
module_put
(
THIS_MODULE
);
goto
out
;
}
}
...
...
net/sunrpc/xprtrdma/rpc_rdma.c
浏览文件 @
6925bac1
...
...
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
}
if
(
xdrbuf
->
tail
[
0
].
iov_len
)
{
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
if
(
xdrbuf
->
tail
[
0
].
iov_len
<
4
&&
xprt_rdma_pad_optimize
)
return
n
;
if
(
n
==
nsegs
)
return
0
;
seg
[
n
].
mr_page
=
NULL
;
...
...
@@ -508,7 +512,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if
(
hdrlen
==
0
)
return
-
1
;
dprintk
(
"RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd
\n
"
dprintk
(
"RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x
\n
"
,
__func__
,
transfertypes
[
wtype
],
hdrlen
,
rpclen
,
padlen
,
headerp
,
base
,
req
->
rl_iov
.
lkey
);
...
...
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
* Scatter inline received data back into provided iov's.
*/
static
void
rpcrdma_inline_fixup
(
struct
rpc_rqst
*
rqst
,
char
*
srcp
,
int
copy_len
)
rpcrdma_inline_fixup
(
struct
rpc_rqst
*
rqst
,
char
*
srcp
,
int
copy_len
,
int
pad
)
{
int
i
,
npages
,
curlen
,
olen
;
char
*
destp
;
...
...
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
}
else
rqst
->
rq_rcv_buf
.
tail
[
0
].
iov_len
=
0
;
if
(
pad
)
{
/* implicit padding on terminal chunk */
unsigned
char
*
p
=
rqst
->
rq_rcv_buf
.
tail
[
0
].
iov_base
;
while
(
pad
--
)
p
[
rqst
->
rq_rcv_buf
.
tail
[
0
].
iov_len
++
]
=
0
;
}
if
(
copy_len
)
dprintk
(
"RPC: %s: %d bytes in"
" %d extra segments (%d lost)
\n
"
,
...
...
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
struct
rpc_xprt
*
xprt
=
ep
->
rep_xprt
;
spin_lock_bh
(
&
xprt
->
transport_lock
);
if
(
++
xprt
->
connect_cookie
==
0
)
/* maintain a reserved value */
++
xprt
->
connect_cookie
;
if
(
ep
->
rep_connected
>
0
)
{
if
(
!
xprt_test_and_set_connected
(
xprt
))
xprt_wake_pending_tasks
(
xprt
,
0
);
}
else
{
if
(
xprt_test_and_clear_connected
(
xprt
))
xprt_wake_pending_tasks
(
xprt
,
ep
->
rep_connected
);
xprt_wake_pending_tasks
(
xprt
,
-
ENOTCONN
);
}
spin_unlock_bh
(
&
xprt
->
transport_lock
);
}
...
...
@@ -792,14 +805,20 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
((
unsigned
char
*
)
iptr
-
(
unsigned
char
*
)
headerp
);
status
=
rep
->
rr_len
+
rdmalen
;
r_xprt
->
rx_stats
.
total_rdma_reply
+=
rdmalen
;
/* special case - last chunk may omit padding */
if
(
rdmalen
&=
3
)
{
rdmalen
=
4
-
rdmalen
;
status
+=
rdmalen
;
}
}
else
{
/* else ordinary inline */
rdmalen
=
0
;
iptr
=
(
__be32
*
)((
unsigned
char
*
)
headerp
+
28
);
rep
->
rr_len
-=
28
;
/*sizeof *headerp;*/
status
=
rep
->
rr_len
;
}
/* Fix up the rpc results for upper layer */
rpcrdma_inline_fixup
(
rqst
,
(
char
*
)
iptr
,
rep
->
rr_len
);
rpcrdma_inline_fixup
(
rqst
,
(
char
*
)
iptr
,
rep
->
rr_len
,
rdmalen
);
break
;
case
htonl
(
RDMA_NOMSG
):
...
...
net/sunrpc/xprtrdma/transport.c
浏览文件 @
6925bac1
...
...
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
static
unsigned
int
xprt_rdma_max_inline_read
=
RPCRDMA_DEF_INLINE
;
static
unsigned
int
xprt_rdma_max_inline_write
=
RPCRDMA_DEF_INLINE
;
static
unsigned
int
xprt_rdma_inline_write_padding
;
#if !RPCRDMA_PERSISTENT_REGISTRATION
static
unsigned
int
xprt_rdma_memreg_strategy
=
RPCRDMA_REGISTER
;
/* FMR? */
#else
static
unsigned
int
xprt_rdma_memreg_strategy
=
RPCRDMA_ALLPHYSICAL
;
#endif
static
unsigned
int
xprt_rdma_memreg_strategy
=
RPCRDMA_FRMR
;
int
xprt_rdma_pad_optimize
=
0
;
#ifdef RPC_DEBUG
...
...
@@ -139,6 +136,14 @@ static ctl_table xr_tunables_table[] = {
.
extra1
=
&
min_memreg
,
.
extra2
=
&
max_memreg
,
},
{
.
ctl_name
=
CTL_UNNUMBERED
,
.
procname
=
"rdma_pad_optimize"
,
.
data
=
&
xprt_rdma_pad_optimize
,
.
maxlen
=
sizeof
(
unsigned
int
),
.
mode
=
0644
,
.
proc_handler
=
&
proc_dointvec
,
},
{
.
ctl_name
=
0
,
},
...
...
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
struct
rpcrdma_xprt
*
r_xprt
=
rpcx_to_rdmax
(
xprt
);
dprintk
(
"RPC: %s: closing
\n
"
,
__func__
);
if
(
r_xprt
->
rx_ep
.
rep_connected
>
0
)
xprt
->
reestablish_timeout
=
0
;
xprt_disconnect_done
(
xprt
);
(
void
)
rpcrdma_ep_disconnect
(
&
r_xprt
->
rx_ep
,
&
r_xprt
->
rx_ia
);
}
...
...
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
/* Reconnect */
schedule_delayed_work
(
&
r_xprt
->
rdma_connect
,
xprt
->
reestablish_timeout
);
xprt
->
reestablish_timeout
<<=
1
;
if
(
xprt
->
reestablish_timeout
>
(
30
*
HZ
))
xprt
->
reestablish_timeout
=
(
30
*
HZ
);
else
if
(
xprt
->
reestablish_timeout
<
(
5
*
HZ
))
xprt
->
reestablish_timeout
=
(
5
*
HZ
);
}
else
{
schedule_delayed_work
(
&
r_xprt
->
rdma_connect
,
0
);
if
(
!
RPC_IS_ASYNC
(
task
))
...
...
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
}
dprintk
(
"RPC: %s: size %zd, request 0x%p
\n
"
,
__func__
,
size
,
req
);
out:
req
->
rl_connect_cookie
=
0
;
/* our reserved value */
return
req
->
rl_xdr_buf
;
outfail:
...
...
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
req
->
rl_reply
->
rr_xprt
=
xprt
;
}
if
(
rpcrdma_ep_post
(
&
r_xprt
->
rx_ia
,
&
r_xprt
->
rx_ep
,
req
))
{
xprt_disconnect_done
(
xprt
);
return
-
ENOTCONN
;
/* implies disconnect */
}
/* Must suppress retransmit to maintain credits */
if
(
req
->
rl_connect_cookie
==
xprt
->
connect_cookie
)
goto
drop_connection
;
req
->
rl_connect_cookie
=
xprt
->
connect_cookie
;
if
(
rpcrdma_ep_post
(
&
r_xprt
->
rx_ia
,
&
r_xprt
->
rx_ep
,
req
))
goto
drop_connection
;
task
->
tk_bytes_sent
+=
rqst
->
rq_snd_buf
.
len
;
rqst
->
rq_bytes_sent
=
0
;
return
0
;
drop_connection:
xprt_disconnect_done
(
xprt
);
return
-
ENOTCONN
;
/* implies disconnect */
}
static
void
xprt_rdma_print_stats
(
struct
rpc_xprt
*
xprt
,
struct
seq_file
*
seq
)
...
...
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
{
int
rc
;
dprintk
(
"RPCRDMA Module Removed, deregister RPC RDMA transport
\n
"
);
dprintk
(
KERN_INFO
"RPCRDMA Module Removed, deregister RPC RDMA transport
\n
"
);
#ifdef RPC_DEBUG
if
(
sunrpc_table_header
)
{
unregister_sysctl_table
(
sunrpc_table_header
);
...
...
net/sunrpc/xprtrdma/verbs.c
浏览文件 @
6925bac1
...
...
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
switch
(
event
->
event
)
{
case
RDMA_CM_EVENT_ADDR_RESOLVED
:
case
RDMA_CM_EVENT_ROUTE_RESOLVED
:
ia
->
ri_async_rc
=
0
;
complete
(
&
ia
->
ri_done
);
break
;
case
RDMA_CM_EVENT_ADDR_ERROR
:
...
...
@@ -338,13 +339,32 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
wake_up_all
(
&
ep
->
rep_connect_wait
);
break
;
default:
ia
->
ri_async_rc
=
-
EINVAL
;
dprintk
(
"RPC: %s: unexpected CM event %X
\n
"
,
dprintk
(
"RPC: %s: unexpected CM event %d
\n
"
,
__func__
,
event
->
event
);
complete
(
&
ia
->
ri_done
);
break
;
}
#ifdef RPC_DEBUG
if
(
connstate
==
1
)
{
int
ird
=
attr
.
max_dest_rd_atomic
;
int
tird
=
ep
->
rep_remote_cma
.
responder_resources
;
printk
(
KERN_INFO
"rpcrdma: connection to %u.%u.%u.%u:%u "
"on %s, memreg %d slots %d ird %d%s
\n
"
,
NIPQUAD
(
addr
->
sin_addr
.
s_addr
),
ntohs
(
addr
->
sin_port
),
ia
->
ri_id
->
device
->
name
,
ia
->
ri_memreg_strategy
,
xprt
->
rx_buf
.
rb_max_requests
,
ird
,
ird
<
4
&&
ird
<
tird
/
2
?
" (low!)"
:
""
);
}
else
if
(
connstate
<
0
)
{
printk
(
KERN_INFO
"rpcrdma: connection to %u.%u.%u.%u:%u "
"closed (%d)
\n
"
,
NIPQUAD
(
addr
->
sin_addr
.
s_addr
),
ntohs
(
addr
->
sin_port
),
connstate
);
}
#endif
return
0
;
}
...
...
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct
rdma_cm_id
*
id
;
int
rc
;
init_completion
(
&
ia
->
ri_done
);
id
=
rdma_create_id
(
rpcrdma_conn_upcall
,
xprt
,
RDMA_PS_TCP
);
if
(
IS_ERR
(
id
))
{
rc
=
PTR_ERR
(
id
);
...
...
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return
id
;
}
ia
->
ri_async_rc
=
0
;
ia
->
ri_async_rc
=
-
ETIMEDOUT
;
rc
=
rdma_resolve_addr
(
id
,
NULL
,
addr
,
RDMA_RESOLVE_TIMEOUT
);
if
(
rc
)
{
dprintk
(
"RPC: %s: rdma_resolve_addr() failed %i
\n
"
,
__func__
,
rc
);
goto
out
;
}
wait_for_completion
(
&
ia
->
ri_done
);
wait_for_completion_interruptible_timeout
(
&
ia
->
ri_done
,
msecs_to_jiffies
(
RDMA_RESOLVE_TIMEOUT
)
+
1
);
rc
=
ia
->
ri_async_rc
;
if
(
rc
)
goto
out
;
ia
->
ri_async_rc
=
0
;
ia
->
ri_async_rc
=
-
ETIMEDOUT
;
rc
=
rdma_resolve_route
(
id
,
RDMA_RESOLVE_TIMEOUT
);
if
(
rc
)
{
dprintk
(
"RPC: %s: rdma_resolve_route() failed %i
\n
"
,
__func__
,
rc
);
goto
out
;
}
wait_for_completion
(
&
ia
->
ri_done
);
wait_for_completion_interruptible_timeout
(
&
ia
->
ri_done
,
msecs_to_jiffies
(
RDMA_RESOLVE_TIMEOUT
)
+
1
);
rc
=
ia
->
ri_async_rc
;
if
(
rc
)
goto
out
;
...
...
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
int
rpcrdma_ia_open
(
struct
rpcrdma_xprt
*
xprt
,
struct
sockaddr
*
addr
,
int
memreg
)
{
int
rc
;
int
rc
,
mem_priv
;
struct
ib_device_attr
devattr
;
struct
rpcrdma_ia
*
ia
=
&
xprt
->
rx_ia
;
init_completion
(
&
ia
->
ri_done
);
ia
->
ri_id
=
rpcrdma_create_id
(
xprt
,
ia
,
addr
);
if
(
IS_ERR
(
ia
->
ri_id
))
{
rc
=
PTR_ERR
(
ia
->
ri_id
);
...
...
@@ -442,6 +465,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto
out2
;
}
/*
* Query the device to determine if the requested memory
* registration strategy is supported. If it isn't, set the
* strategy to a globally supported model.
*/
rc
=
ib_query_device
(
ia
->
ri_id
->
device
,
&
devattr
);
if
(
rc
)
{
dprintk
(
"RPC: %s: ib_query_device failed %d
\n
"
,
__func__
,
rc
);
goto
out2
;
}
if
(
devattr
.
device_cap_flags
&
IB_DEVICE_LOCAL_DMA_LKEY
)
{
ia
->
ri_have_dma_lkey
=
1
;
ia
->
ri_dma_lkey
=
ia
->
ri_id
->
device
->
local_dma_lkey
;
}
switch
(
memreg
)
{
case
RPCRDMA_MEMWINDOWS
:
case
RPCRDMA_MEMWINDOWS_ASYNC
:
if
(
!
(
devattr
.
device_cap_flags
&
IB_DEVICE_MEM_WINDOW
))
{
dprintk
(
"RPC: %s: MEMWINDOWS registration "
"specified but not supported by adapter, "
"using slower RPCRDMA_REGISTER
\n
"
,
__func__
);
memreg
=
RPCRDMA_REGISTER
;
}
break
;
case
RPCRDMA_MTHCAFMR
:
if
(
!
ia
->
ri_id
->
device
->
alloc_fmr
)
{
#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk
(
"RPC: %s: MTHCAFMR registration "
"specified but not supported by adapter, "
"using riskier RPCRDMA_ALLPHYSICAL
\n
"
,
__func__
);
memreg
=
RPCRDMA_ALLPHYSICAL
;
#else
dprintk
(
"RPC: %s: MTHCAFMR registration "
"specified but not supported by adapter, "
"using slower RPCRDMA_REGISTER
\n
"
,
__func__
);
memreg
=
RPCRDMA_REGISTER
;
#endif
}
break
;
case
RPCRDMA_FRMR
:
/* Requires both frmr reg and local dma lkey */
if
((
devattr
.
device_cap_flags
&
(
IB_DEVICE_MEM_MGT_EXTENSIONS
|
IB_DEVICE_LOCAL_DMA_LKEY
))
!=
(
IB_DEVICE_MEM_MGT_EXTENSIONS
|
IB_DEVICE_LOCAL_DMA_LKEY
))
{
#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk
(
"RPC: %s: FRMR registration "
"specified but not supported by adapter, "
"using riskier RPCRDMA_ALLPHYSICAL
\n
"
,
__func__
);
memreg
=
RPCRDMA_ALLPHYSICAL
;
#else
dprintk
(
"RPC: %s: FRMR registration "
"specified but not supported by adapter, "
"using slower RPCRDMA_REGISTER
\n
"
,
__func__
);
memreg
=
RPCRDMA_REGISTER
;
#endif
}
break
;
}
/*
* Optionally obtain an underlying physical identity mapping in
* order to do a memory window-based bind. This base registration
...
...
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* revoked after the corresponding completion similar to a storage
* adapter.
*/
if
(
memreg
>
RPCRDMA_REGISTER
)
{
int
mem_priv
=
IB_ACCESS_LOCAL_WRITE
;
switch
(
memreg
)
{
case
RPCRDMA_BOUNCEBUFFERS
:
case
RPCRDMA_REGISTER
:
case
RPCRDMA_FRMR
:
break
;
#if RPCRDMA_PERSISTENT_REGISTRATION
case
RPCRDMA_ALLPHYSICAL
:
mem_priv
|=
IB_ACCESS_REMOTE_WRITE
;
mem_priv
|=
IB_ACCESS_REMOTE_READ
;
break
;
mem_priv
=
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
|
IB_ACCESS_REMOTE_READ
;
goto
register_setup
;
#endif
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
mem_priv
|=
IB_ACCESS_MW_BIND
;
break
;
default:
mem_priv
=
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_MW_BIND
;
goto
register_setup
;
case
RPCRDMA_MTHCAFMR
:
if
(
ia
->
ri_have_dma_lkey
)
break
;
}
mem_priv
=
IB_ACCESS_LOCAL_WRITE
;
register_setup:
ia
->
ri_bind_mem
=
ib_get_dma_mr
(
ia
->
ri_pd
,
mem_priv
);
if
(
IS_ERR
(
ia
->
ri_bind_mem
))
{
printk
(
KERN_ALERT
"%s: ib_get_dma_mr for "
...
...
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
memreg
=
RPCRDMA_REGISTER
;
ia
->
ri_bind_mem
=
NULL
;
}
break
;
default:
printk
(
KERN_ERR
"%s: invalid memory registration mode %d
\n
"
,
__func__
,
memreg
);
rc
=
-
EINVAL
;
goto
out2
;
}
dprintk
(
"RPC: %s: memory registration strategy is %d
\n
"
,
__func__
,
memreg
);
/* Else will do memory reg/dereg for each chunk */
ia
->
ri_memreg_strategy
=
memreg
;
...
...
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
return
0
;
out2:
rdma_destroy_id
(
ia
->
ri_id
);
ia
->
ri_id
=
NULL
;
out1:
return
rc
;
}
...
...
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
dprintk
(
"RPC: %s: ib_dereg_mr returned %i
\n
"
,
__func__
,
rc
);
}
if
(
ia
->
ri_id
!=
NULL
&&
!
IS_ERR
(
ia
->
ri_id
)
&&
ia
->
ri_id
->
qp
)
if
(
ia
->
ri_id
!=
NULL
&&
!
IS_ERR
(
ia
->
ri_id
))
{
if
(
ia
->
ri_id
->
qp
)
rdma_destroy_qp
(
ia
->
ri_id
);
rdma_destroy_id
(
ia
->
ri_id
);
ia
->
ri_id
=
NULL
;
}
if
(
ia
->
ri_pd
!=
NULL
&&
!
IS_ERR
(
ia
->
ri_pd
))
{
rc
=
ib_dealloc_pd
(
ia
->
ri_pd
);
dprintk
(
"RPC: %s: ib_dealloc_pd returned %i
\n
"
,
__func__
,
rc
);
}
if
(
ia
->
ri_id
!=
NULL
&&
!
IS_ERR
(
ia
->
ri_id
))
rdma_destroy_id
(
ia
->
ri_id
);
}
/*
...
...
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep
->
rep_attr
.
srq
=
NULL
;
ep
->
rep_attr
.
cap
.
max_send_wr
=
cdata
->
max_requests
;
switch
(
ia
->
ri_memreg_strategy
)
{
case
RPCRDMA_FRMR
:
/* Add room for frmr register and invalidate WRs */
ep
->
rep_attr
.
cap
.
max_send_wr
*=
3
;
if
(
ep
->
rep_attr
.
cap
.
max_send_wr
>
devattr
.
max_qp_wr
)
return
-
EINVAL
;
break
;
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
/* Add room for mw_binds+unbinds - overkill! */
...
...
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep
->
rep_remote_cma
.
private_data_len
=
0
;
/* Client offers RDMA Read but does not initiate */
switch
(
ia
->
ri_memreg_strategy
)
{
case
RPCRDMA_BOUNCEBUFFERS
:
ep
->
rep_remote_cma
.
initiator_depth
=
0
;
if
(
ia
->
ri_memreg_strategy
==
RPCRDMA_BOUNCEBUFFERS
)
ep
->
rep_remote_cma
.
responder_resources
=
0
;
break
;
case
RPCRDMA_MTHCAFMR
:
case
RPCRDMA_REGISTER
:
ep
->
rep_remote_cma
.
responder_resources
=
cdata
->
max_requests
*
(
RPCRDMA_MAX_DATA_SEGS
/
8
);
break
;
case
RPCRDMA_MEMWINDOWS
:
case
RPCRDMA_MEMWINDOWS_ASYNC
:
#if RPCRDMA_PERSISTENT_REGISTRATION
case
RPCRDMA_ALLPHYSICAL
:
#endif
ep
->
rep_remote_cma
.
responder_resources
=
cdata
->
max_requests
*
(
RPCRDMA_MAX_DATA_SEGS
/
2
);
break
;
default:
break
;
}
if
(
ep
->
rep_remote_cma
.
responder_resources
>
devattr
.
max_qp_rd_atom
)
else
if
(
devattr
.
max_qp_rd_atom
>
32
)
/* arbitrary but <= 255 */
ep
->
rep_remote_cma
.
responder_resources
=
32
;
else
ep
->
rep_remote_cma
.
responder_resources
=
devattr
.
max_qp_rd_atom
;
ep
->
rep_remote_cma
.
initiator_depth
=
0
;
ep
->
rep_remote_cma
.
retry_count
=
7
;
ep
->
rep_remote_cma
.
flow_control
=
0
;
...
...
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if
(
rc
)
dprintk
(
"RPC: %s: rpcrdma_ep_disconnect"
" returned %i
\n
"
,
__func__
,
rc
);
rdma_destroy_qp
(
ia
->
ri_id
);
ia
->
ri_id
->
qp
=
NULL
;
}
ep
->
rep_func
=
NULL
;
/* padding - could be done in rpcrdma_buffer_destroy... */
if
(
ep
->
rep_pad_mr
)
{
rpcrdma_deregister_internal
(
ia
,
ep
->
rep_pad_mr
,
&
ep
->
rep_pad
);
ep
->
rep_pad_mr
=
NULL
;
}
if
(
ia
->
ri_id
->
qp
)
{
rdma_destroy_qp
(
ia
->
ri_id
);
ia
->
ri_id
->
qp
=
NULL
;
}
rpcrdma_clean_cq
(
ep
->
rep_cq
);
rc
=
ib_destroy_cq
(
ep
->
rep_cq
);
if
(
rc
)
...
...
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
struct
rdma_cm_id
*
id
;
int
rc
=
0
;
int
retry_count
=
0
;
int
reconnect
=
(
ep
->
rep_connected
!=
0
);
if
(
reconnect
)
{
if
(
ep
->
rep_connected
!=
0
)
{
struct
rpcrdma_xprt
*
xprt
;
retry:
rc
=
rpcrdma_ep_disconnect
(
ep
,
ia
);
...
...
@@ -745,6 +836,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
goto
out
;
}
/* END TEMP */
rdma_destroy_qp
(
ia
->
ri_id
);
rdma_destroy_id
(
ia
->
ri_id
);
ia
->
ri_id
=
id
;
}
...
...
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
}
}
/* Theoretically a client initiator_depth > 0 is not needed,
* but many peers fail to complete the connection unless they
* == responder_resources! */
if
(
ep
->
rep_remote_cma
.
initiator_depth
!=
ep
->
rep_remote_cma
.
responder_resources
)
ep
->
rep_remote_cma
.
initiator_depth
=
ep
->
rep_remote_cma
.
responder_resources
;
ep
->
rep_connected
=
0
;
rc
=
rdma_connect
(
ia
->
ri_id
,
&
ep
->
rep_remote_cma
);
...
...
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
goto
out
;
}
if
(
reconnect
)
return
0
;
wait_event_interruptible
(
ep
->
rep_connect_wait
,
ep
->
rep_connected
!=
0
);
/*
...
...
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
if
(
ep
->
rep_connected
<=
0
)
{
/* Sometimes, the only way to reliably connect to remote
* CMs is to use same nonzero values for ORD and IRD. */
if
(
retry_count
++
<=
RDMA_CONNECT_RETRY_MAX
+
1
&&
(
ep
->
rep_remote_cma
.
responder_resources
==
0
||
ep
->
rep_remote_cma
.
initiator_depth
!=
ep
->
rep_remote_cma
.
responder_resources
))
{
if
(
ep
->
rep_remote_cma
.
responder_resources
==
0
)
ep
->
rep_remote_cma
.
responder_resources
=
1
;
ep
->
rep_remote_cma
.
initiator_depth
=
ep
->
rep_remote_cma
.
responder_resources
;
if
(
ep
->
rep_remote_cma
.
initiator_depth
==
0
)
++
ep
->
rep_remote_cma
.
initiator_depth
;
if
(
ep
->
rep_remote_cma
.
responder_resources
==
0
)
++
ep
->
rep_remote_cma
.
responder_resources
;
if
(
retry_count
++
==
0
)
goto
retry
;
}
rc
=
ep
->
rep_connected
;
}
else
{
dprintk
(
"RPC: %s: connected
\n
"
,
__func__
);
...
...
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
char
*
p
;
size_t
len
;
int
i
,
rc
;
struct
rpcrdma_mw
*
r
;
buf
->
rb_max_requests
=
cdata
->
max_requests
;
spin_lock_init
(
&
buf
->
rb_lock
);
...
...
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* 2. arrays of struct rpcrdma_req to fill in pointers
* 3. array of struct rpcrdma_rep for replies
* 4. padding, if any
* 5. mw's, if any
* 5. mw's,
fmr's or frmr's,
if any
* Send/recv buffers in req/rep need to be registered
*/
...
...
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
(
sizeof
(
struct
rpcrdma_req
*
)
+
sizeof
(
struct
rpcrdma_rep
*
));
len
+=
cdata
->
padding
;
switch
(
ia
->
ri_memreg_strategy
)
{
case
RPCRDMA_FRMR
:
len
+=
buf
->
rb_max_requests
*
RPCRDMA_MAX_SEGS
*
sizeof
(
struct
rpcrdma_mw
);
break
;
case
RPCRDMA_MTHCAFMR
:
/* TBD we are perhaps overallocating here */
len
+=
(
buf
->
rb_max_requests
+
1
)
*
RPCRDMA_MAX_SEGS
*
...
...
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* and also reduce unbind-to-bind collision.
*/
INIT_LIST_HEAD
(
&
buf
->
rb_mws
);
r
=
(
struct
rpcrdma_mw
*
)
p
;
switch
(
ia
->
ri_memreg_strategy
)
{
case
RPCRDMA_FRMR
:
for
(
i
=
buf
->
rb_max_requests
*
RPCRDMA_MAX_SEGS
;
i
;
i
--
)
{
r
->
r
.
frmr
.
fr_mr
=
ib_alloc_fast_reg_mr
(
ia
->
ri_pd
,
RPCRDMA_MAX_SEGS
);
if
(
IS_ERR
(
r
->
r
.
frmr
.
fr_mr
))
{
rc
=
PTR_ERR
(
r
->
r
.
frmr
.
fr_mr
);
dprintk
(
"RPC: %s: ib_alloc_fast_reg_mr"
" failed %i
\n
"
,
__func__
,
rc
);
goto
out
;
}
r
->
r
.
frmr
.
fr_pgl
=
ib_alloc_fast_reg_page_list
(
ia
->
ri_id
->
device
,
RPCRDMA_MAX_SEGS
);
if
(
IS_ERR
(
r
->
r
.
frmr
.
fr_pgl
))
{
rc
=
PTR_ERR
(
r
->
r
.
frmr
.
fr_pgl
);
dprintk
(
"RPC: %s: "
"ib_alloc_fast_reg_page_list "
"failed %i
\n
"
,
__func__
,
rc
);
goto
out
;
}
list_add
(
&
r
->
mw_list
,
&
buf
->
rb_mws
);
++
r
;
}
break
;
case
RPCRDMA_MTHCAFMR
:
{
struct
rpcrdma_mw
*
r
=
(
struct
rpcrdma_mw
*
)
p
;
struct
ib_fmr_attr
fa
=
{
RPCRDMA_MAX_DATA_SEGS
,
1
,
PAGE_SHIFT
};
/* TBD we are perhaps overallocating here */
for
(
i
=
(
buf
->
rb_max_requests
+
1
)
*
RPCRDMA_MAX_SEGS
;
i
;
i
--
)
{
static
struct
ib_fmr_attr
fa
=
{
RPCRDMA_MAX_DATA_SEGS
,
1
,
PAGE_SHIFT
};
r
->
r
.
fmr
=
ib_alloc_fmr
(
ia
->
ri_pd
,
IB_ACCESS_REMOTE_WRITE
|
IB_ACCESS_REMOTE_READ
,
&
fa
);
...
...
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add
(
&
r
->
mw_list
,
&
buf
->
rb_mws
);
++
r
;
}
}
break
;
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
{
struct
rpcrdma_mw
*
r
=
(
struct
rpcrdma_mw
*
)
p
;
/* Allocate one extra request's worth, for full cycling */
for
(
i
=
(
buf
->
rb_max_requests
+
1
)
*
RPCRDMA_MAX_SEGS
;
i
;
i
--
)
{
r
->
r
.
mw
=
ib_alloc_mw
(
ia
->
ri_pd
);
...
...
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add
(
&
r
->
mw_list
,
&
buf
->
rb_mws
);
++
r
;
}
}
break
;
default:
break
;
...
...
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
int
rc
,
i
;
struct
rpcrdma_ia
*
ia
=
rdmab_to_ia
(
buf
);
struct
rpcrdma_mw
*
r
;
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
...
...
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
if
(
buf
->
rb_send_bufs
&&
buf
->
rb_send_bufs
[
i
])
{
while
(
!
list_empty
(
&
buf
->
rb_mws
))
{
struct
rpcrdma_mw
*
r
;
r
=
list_entry
(
buf
->
rb_mws
.
next
,
struct
rpcrdma_mw
,
mw_list
);
list_del
(
&
r
->
mw_list
);
switch
(
ia
->
ri_memreg_strategy
)
{
case
RPCRDMA_FRMR
:
rc
=
ib_dereg_mr
(
r
->
r
.
frmr
.
fr_mr
);
if
(
rc
)
dprintk
(
"RPC: %s:"
" ib_dereg_mr"
" failed %i
\n
"
,
__func__
,
rc
);
ib_free_fast_reg_page_list
(
r
->
r
.
frmr
.
fr_pgl
);
break
;
case
RPCRDMA_MTHCAFMR
:
rc
=
ib_dealloc_fmr
(
r
->
r
.
fmr
);
if
(
rc
)
...
...
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
struct
rpcrdma_req
*
req
;
unsigned
long
flags
;
int
i
;
struct
rpcrdma_mw
*
r
;
spin_lock_irqsave
(
&
buffers
->
rb_lock
,
flags
);
if
(
buffers
->
rb_send_index
==
buffers
->
rb_max_requests
)
{
...
...
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
}
buffers
->
rb_send_bufs
[
buffers
->
rb_send_index
++
]
=
NULL
;
if
(
!
list_empty
(
&
buffers
->
rb_mws
))
{
i
nt
i
=
RPCRDMA_MAX_SEGS
-
1
;
i
=
RPCRDMA_MAX_SEGS
-
1
;
do
{
struct
rpcrdma_mw
*
r
;
r
=
list_entry
(
buffers
->
rb_mws
.
next
,
struct
rpcrdma_mw
,
mw_list
);
list_del
(
&
r
->
mw_list
);
...
...
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
req
->
rl_reply
=
NULL
;
}
switch
(
ia
->
ri_memreg_strategy
)
{
case
RPCRDMA_FRMR
:
case
RPCRDMA_MTHCAFMR
:
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
...
...
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
va
,
len
,
DMA_BIDIRECTIONAL
);
iov
->
length
=
len
;
if
(
ia
->
ri_bind_mem
!=
NULL
)
{
if
(
ia
->
ri_have_dma_lkey
)
{
*
mrp
=
NULL
;
iov
->
lkey
=
ia
->
ri_dma_lkey
;
return
0
;
}
else
if
(
ia
->
ri_bind_mem
!=
NULL
)
{
*
mrp
=
NULL
;
iov
->
lkey
=
ia
->
ri_bind_mem
->
lkey
;
return
0
;
...
...
@@ -1329,74 +1450,169 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
seg
->
mr_dma
,
seg
->
mr_dmalen
,
seg
->
mr_dir
);
}
int
rpcrdma_register_external
(
struct
rpcrdma_mr_seg
*
seg
,
int
nsegs
,
int
writing
,
struct
rpcrdma_xprt
*
r_xprt
)
static
int
rpcrdma_register_frmr_external
(
struct
rpcrdma_mr_seg
*
seg
,
int
*
nsegs
,
int
writing
,
struct
rpcrdma_ia
*
ia
,
struct
rpcrdma_xprt
*
r_xprt
)
{
struct
rpcrdma_ia
*
ia
=
&
r_xprt
->
rx_ia
;
int
mem_priv
=
(
writing
?
IB_ACCESS_REMOTE_WRITE
:
IB_ACCESS_REMOTE_READ
);
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
int
i
;
int
rc
=
0
;
switch
(
ia
->
ri_memreg_strategy
)
{
struct
ib_send_wr
frmr_wr
,
*
bad_wr
;
u8
key
;
int
len
,
pageoff
;
int
i
,
rc
;
#if RPCRDMA_PERSISTENT_REGISTRATION
case
RPCRDMA_ALLPHYSICAL
:
pageoff
=
offset_in_page
(
seg1
->
mr_offset
);
seg1
->
mr_offset
-=
pageoff
;
/* start of page */
seg1
->
mr_len
+=
pageoff
;
len
=
-
pageoff
;
if
(
*
nsegs
>
RPCRDMA_MAX_DATA_SEGS
)
*
nsegs
=
RPCRDMA_MAX_DATA_SEGS
;
for
(
i
=
0
;
i
<
*
nsegs
;)
{
rpcrdma_map_one
(
ia
,
seg
,
writing
);
seg
->
mr_rkey
=
ia
->
ri_bind_mem
->
rkey
;
seg
->
mr_base
=
seg
->
mr_dma
;
seg
->
mr_nsegs
=
1
;
nsegs
=
1
;
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_pgl
->
page_list
[
i
]
=
seg
->
mr_dma
;
len
+=
seg
->
mr_len
;
++
seg
;
++
i
;
/* Check for holes */
if
((
i
<
*
nsegs
&&
offset_in_page
(
seg
->
mr_offset
))
||
offset_in_page
((
seg
-
1
)
->
mr_offset
+
(
seg
-
1
)
->
mr_len
))
break
;
#endif
}
dprintk
(
"RPC: %s: Using frmr %p to map %d segments
\n
"
,
__func__
,
seg1
->
mr_chunk
.
rl_mw
,
i
);
/* Bump the key */
key
=
(
u8
)(
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_mr
->
rkey
&
0x000000FF
);
ib_update_fast_reg_key
(
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_mr
,
++
key
);
/* Prepare FRMR WR */
memset
(
&
frmr_wr
,
0
,
sizeof
frmr_wr
);
frmr_wr
.
opcode
=
IB_WR_FAST_REG_MR
;
frmr_wr
.
send_flags
=
0
;
/* unsignaled */
frmr_wr
.
wr
.
fast_reg
.
iova_start
=
(
unsigned
long
)
seg1
->
mr_dma
;
frmr_wr
.
wr
.
fast_reg
.
page_list
=
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_pgl
;
frmr_wr
.
wr
.
fast_reg
.
page_list_len
=
i
;
frmr_wr
.
wr
.
fast_reg
.
page_shift
=
PAGE_SHIFT
;
frmr_wr
.
wr
.
fast_reg
.
length
=
i
<<
PAGE_SHIFT
;
frmr_wr
.
wr
.
fast_reg
.
access_flags
=
(
writing
?
IB_ACCESS_REMOTE_WRITE
:
IB_ACCESS_REMOTE_READ
);
frmr_wr
.
wr
.
fast_reg
.
rkey
=
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_mr
->
rkey
;
DECR_CQCOUNT
(
&
r_xprt
->
rx_ep
);
/* Registration using fast memory registration */
case
RPCRDMA_MTHCAFMR
:
{
rc
=
ib_post_send
(
ia
->
ri_id
->
qp
,
&
frmr_wr
,
&
bad_wr
);
if
(
rc
)
{
dprintk
(
"RPC: %s: failed ib_post_send for register,"
" status %i
\n
"
,
__func__
,
rc
);
while
(
i
--
)
rpcrdma_unmap_one
(
ia
,
--
seg
);
}
else
{
seg1
->
mr_rkey
=
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_mr
->
rkey
;
seg1
->
mr_base
=
seg1
->
mr_dma
+
pageoff
;
seg1
->
mr_nsegs
=
i
;
seg1
->
mr_len
=
len
;
}
*
nsegs
=
i
;
return
rc
;
}
static
int
rpcrdma_deregister_frmr_external
(
struct
rpcrdma_mr_seg
*
seg
,
struct
rpcrdma_ia
*
ia
,
struct
rpcrdma_xprt
*
r_xprt
)
{
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
struct
ib_send_wr
invalidate_wr
,
*
bad_wr
;
int
rc
;
while
(
seg1
->
mr_nsegs
--
)
rpcrdma_unmap_one
(
ia
,
seg
++
);
memset
(
&
invalidate_wr
,
0
,
sizeof
invalidate_wr
);
invalidate_wr
.
opcode
=
IB_WR_LOCAL_INV
;
invalidate_wr
.
send_flags
=
0
;
/* unsignaled */
invalidate_wr
.
ex
.
invalidate_rkey
=
seg1
->
mr_chunk
.
rl_mw
->
r
.
frmr
.
fr_mr
->
rkey
;
DECR_CQCOUNT
(
&
r_xprt
->
rx_ep
);
rc
=
ib_post_send
(
ia
->
ri_id
->
qp
,
&
invalidate_wr
,
&
bad_wr
);
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_post_send for invalidate,"
" status %i
\n
"
,
__func__
,
rc
);
return
rc
;
}
static
int
rpcrdma_register_fmr_external
(
struct
rpcrdma_mr_seg
*
seg
,
int
*
nsegs
,
int
writing
,
struct
rpcrdma_ia
*
ia
)
{
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
u64
physaddrs
[
RPCRDMA_MAX_DATA_SEGS
];
int
len
,
pageoff
=
offset_in_page
(
seg
->
mr_offset
);
int
len
,
pageoff
,
i
,
rc
;
pageoff
=
offset_in_page
(
seg1
->
mr_offset
);
seg1
->
mr_offset
-=
pageoff
;
/* start of page */
seg1
->
mr_len
+=
pageoff
;
len
=
-
pageoff
;
if
(
nsegs
>
RPCRDMA_MAX_DATA_SEGS
)
nsegs
=
RPCRDMA_MAX_DATA_SEGS
;
for
(
i
=
0
;
i
<
nsegs
;)
{
if
(
*
nsegs
>
RPCRDMA_MAX_DATA_SEGS
)
*
nsegs
=
RPCRDMA_MAX_DATA_SEGS
;
for
(
i
=
0
;
i
<
*
nsegs
;)
{
rpcrdma_map_one
(
ia
,
seg
,
writing
);
physaddrs
[
i
]
=
seg
->
mr_dma
;
len
+=
seg
->
mr_len
;
++
seg
;
++
i
;
/* Check for holes */
if
((
i
<
nsegs
&&
offset_in_page
(
seg
->
mr_offset
))
||
offset_in_page
((
seg
-
1
)
->
mr_offset
+
(
seg
-
1
)
->
mr_len
))
if
((
i
<
*
nsegs
&&
offset_in_page
(
seg
->
mr_offset
))
||
offset_in_page
((
seg
-
1
)
->
mr_offset
+
(
seg
-
1
)
->
mr_len
))
break
;
}
nsegs
=
i
;
rc
=
ib_map_phys_fmr
(
seg1
->
mr_chunk
.
rl_mw
->
r
.
fmr
,
physaddrs
,
nsegs
,
seg1
->
mr_dma
);
physaddrs
,
i
,
seg1
->
mr_dma
);
if
(
rc
)
{
dprintk
(
"RPC: %s: failed ib_map_phys_fmr "
"%u@0x%llx+%i (%d)... status %i
\n
"
,
__func__
,
len
,
(
unsigned
long
long
)
seg1
->
mr_dma
,
pageoff
,
nsegs
,
rc
);
while
(
nsegs
--
)
pageoff
,
i
,
rc
);
while
(
i
--
)
rpcrdma_unmap_one
(
ia
,
--
seg
);
}
else
{
seg1
->
mr_rkey
=
seg1
->
mr_chunk
.
rl_mw
->
r
.
fmr
->
rkey
;
seg1
->
mr_base
=
seg1
->
mr_dma
+
pageoff
;
seg1
->
mr_nsegs
=
nsegs
;
seg1
->
mr_nsegs
=
i
;
seg1
->
mr_len
=
len
;
}
}
break
;
*
nsegs
=
i
;
return
rc
;
}
/* Registration using memory windows */
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
{
static
int
rpcrdma_deregister_fmr_external
(
struct
rpcrdma_mr_seg
*
seg
,
struct
rpcrdma_ia
*
ia
)
{
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
LIST_HEAD
(
l
);
int
rc
;
list_add
(
&
seg1
->
mr_chunk
.
rl_mw
->
r
.
fmr
->
list
,
&
l
);
rc
=
ib_unmap_fmr
(
&
l
);
while
(
seg1
->
mr_nsegs
--
)
rpcrdma_unmap_one
(
ia
,
seg
++
);
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_unmap_fmr,"
" status %i
\n
"
,
__func__
,
rc
);
return
rc
;
}
static
int
rpcrdma_register_memwin_external
(
struct
rpcrdma_mr_seg
*
seg
,
int
*
nsegs
,
int
writing
,
struct
rpcrdma_ia
*
ia
,
struct
rpcrdma_xprt
*
r_xprt
)
{
int
mem_priv
=
(
writing
?
IB_ACCESS_REMOTE_WRITE
:
IB_ACCESS_REMOTE_READ
);
struct
ib_mw_bind
param
;
int
rc
;
*
nsegs
=
1
;
rpcrdma_map_one
(
ia
,
seg
,
writing
);
param
.
mr
=
ia
->
ri_bind_mem
;
param
.
wr_id
=
0ULL
;
/* no send cookie */
...
...
@@ -1406,8 +1622,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
param
.
mw_access_flags
=
mem_priv
;
DECR_CQCOUNT
(
&
r_xprt
->
rx_ep
);
rc
=
ib_bind_mw
(
ia
->
ri_id
->
qp
,
seg
->
mr_chunk
.
rl_mw
->
r
.
mw
,
&
param
);
rc
=
ib_bind_mw
(
ia
->
ri_id
->
qp
,
seg
->
mr_chunk
.
rl_mw
->
r
.
mw
,
&
param
);
if
(
rc
)
{
dprintk
(
"RPC: %s: failed ib_bind_mw "
"%u@0x%llx status %i
\n
"
,
...
...
@@ -1418,19 +1633,56 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
seg
->
mr_rkey
=
seg
->
mr_chunk
.
rl_mw
->
r
.
mw
->
rkey
;
seg
->
mr_base
=
param
.
addr
;
seg
->
mr_nsegs
=
1
;
nsegs
=
1
;
}
return
rc
;
}
static
int
rpcrdma_deregister_memwin_external
(
struct
rpcrdma_mr_seg
*
seg
,
struct
rpcrdma_ia
*
ia
,
struct
rpcrdma_xprt
*
r_xprt
,
void
**
r
)
{
struct
ib_mw_bind
param
;
LIST_HEAD
(
l
);
int
rc
;
BUG_ON
(
seg
->
mr_nsegs
!=
1
);
param
.
mr
=
ia
->
ri_bind_mem
;
param
.
addr
=
0ULL
;
/* unbind */
param
.
length
=
0
;
param
.
mw_access_flags
=
0
;
if
(
*
r
)
{
param
.
wr_id
=
(
u64
)
(
unsigned
long
)
*
r
;
param
.
send_flags
=
IB_SEND_SIGNALED
;
INIT_CQCOUNT
(
&
r_xprt
->
rx_ep
);
}
else
{
param
.
wr_id
=
0ULL
;
param
.
send_flags
=
0
;
DECR_CQCOUNT
(
&
r_xprt
->
rx_ep
);
}
break
;
rc
=
ib_bind_mw
(
ia
->
ri_id
->
qp
,
seg
->
mr_chunk
.
rl_mw
->
r
.
mw
,
&
param
);
rpcrdma_unmap_one
(
ia
,
seg
);
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_(un)bind_mw,"
" status %i
\n
"
,
__func__
,
rc
);
else
*
r
=
NULL
;
/* will upcall on completion */
return
rc
;
}
/* Default registration each time */
default:
{
static
int
rpcrdma_register_default_external
(
struct
rpcrdma_mr_seg
*
seg
,
int
*
nsegs
,
int
writing
,
struct
rpcrdma_ia
*
ia
)
{
int
mem_priv
=
(
writing
?
IB_ACCESS_REMOTE_WRITE
:
IB_ACCESS_REMOTE_READ
);
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
struct
ib_phys_buf
ipb
[
RPCRDMA_MAX_DATA_SEGS
];
int
len
=
0
;
if
(
nsegs
>
RPCRDMA_MAX_DATA_SEGS
)
nsegs
=
RPCRDMA_MAX_DATA_SEGS
;
for
(
i
=
0
;
i
<
nsegs
;)
{
int
len
,
i
,
rc
=
0
;
if
(
*
nsegs
>
RPCRDMA_MAX_DATA_SEGS
)
*
nsegs
=
RPCRDMA_MAX_DATA_SEGS
;
for
(
len
=
0
,
i
=
0
;
i
<
*
nsegs
;)
{
rpcrdma_map_one
(
ia
,
seg
,
writing
);
ipb
[
i
].
addr
=
seg
->
mr_dma
;
ipb
[
i
].
size
=
seg
->
mr_len
;
...
...
@@ -1438,28 +1690,85 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
++
seg
;
++
i
;
/* Check for holes */
if
((
i
<
nsegs
&&
offset_in_page
(
seg
->
mr_offset
))
||
if
((
i
<
*
nsegs
&&
offset_in_page
(
seg
->
mr_offset
))
||
offset_in_page
((
seg
-
1
)
->
mr_offset
+
(
seg
-
1
)
->
mr_len
))
break
;
}
nsegs
=
i
;
seg1
->
mr_base
=
seg1
->
mr_dma
;
seg1
->
mr_chunk
.
rl_mr
=
ib_reg_phys_mr
(
ia
->
ri_pd
,
ipb
,
nsegs
,
mem_priv
,
&
seg1
->
mr_base
);
ipb
,
i
,
mem_priv
,
&
seg1
->
mr_base
);
if
(
IS_ERR
(
seg1
->
mr_chunk
.
rl_mr
))
{
rc
=
PTR_ERR
(
seg1
->
mr_chunk
.
rl_mr
);
dprintk
(
"RPC: %s: failed ib_reg_phys_mr "
"%u@0x%llx (%d)... status %i
\n
"
,
__func__
,
len
,
(
unsigned
long
long
)
seg1
->
mr_dma
,
nsegs
,
rc
);
while
(
nsegs
--
)
(
unsigned
long
long
)
seg1
->
mr_dma
,
i
,
rc
);
while
(
i
--
)
rpcrdma_unmap_one
(
ia
,
--
seg
);
}
else
{
seg1
->
mr_rkey
=
seg1
->
mr_chunk
.
rl_mr
->
rkey
;
seg1
->
mr_nsegs
=
nsegs
;
seg1
->
mr_nsegs
=
i
;
seg1
->
mr_len
=
len
;
}
}
*
nsegs
=
i
;
return
rc
;
}
static
int
rpcrdma_deregister_default_external
(
struct
rpcrdma_mr_seg
*
seg
,
struct
rpcrdma_ia
*
ia
)
{
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
int
rc
;
rc
=
ib_dereg_mr
(
seg1
->
mr_chunk
.
rl_mr
);
seg1
->
mr_chunk
.
rl_mr
=
NULL
;
while
(
seg1
->
mr_nsegs
--
)
rpcrdma_unmap_one
(
ia
,
seg
++
);
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_dereg_mr,"
" status %i
\n
"
,
__func__
,
rc
);
return
rc
;
}
int
rpcrdma_register_external
(
struct
rpcrdma_mr_seg
*
seg
,
int
nsegs
,
int
writing
,
struct
rpcrdma_xprt
*
r_xprt
)
{
struct
rpcrdma_ia
*
ia
=
&
r_xprt
->
rx_ia
;
int
rc
=
0
;
switch
(
ia
->
ri_memreg_strategy
)
{
#if RPCRDMA_PERSISTENT_REGISTRATION
case
RPCRDMA_ALLPHYSICAL
:
rpcrdma_map_one
(
ia
,
seg
,
writing
);
seg
->
mr_rkey
=
ia
->
ri_bind_mem
->
rkey
;
seg
->
mr_base
=
seg
->
mr_dma
;
seg
->
mr_nsegs
=
1
;
nsegs
=
1
;
break
;
#endif
/* Registration using frmr registration */
case
RPCRDMA_FRMR
:
rc
=
rpcrdma_register_frmr_external
(
seg
,
&
nsegs
,
writing
,
ia
,
r_xprt
);
break
;
/* Registration using fmr memory registration */
case
RPCRDMA_MTHCAFMR
:
rc
=
rpcrdma_register_fmr_external
(
seg
,
&
nsegs
,
writing
,
ia
);
break
;
/* Registration using memory windows */
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
rc
=
rpcrdma_register_memwin_external
(
seg
,
&
nsegs
,
writing
,
ia
,
r_xprt
);
break
;
/* Default registration each time */
default:
rc
=
rpcrdma_register_default_external
(
seg
,
&
nsegs
,
writing
,
ia
);
break
;
}
if
(
rc
)
...
...
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
struct
rpcrdma_xprt
*
r_xprt
,
void
*
r
)
{
struct
rpcrdma_ia
*
ia
=
&
r_xprt
->
rx_ia
;
struct
rpcrdma_mr_seg
*
seg1
=
seg
;
int
nsegs
=
seg
->
mr_nsegs
,
rc
;
switch
(
ia
->
ri_memreg_strategy
)
{
...
...
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
break
;
#endif
case
RPCRDMA_FRMR
:
rc
=
rpcrdma_deregister_frmr_external
(
seg
,
ia
,
r_xprt
);
break
;
case
RPCRDMA_MTHCAFMR
:
{
LIST_HEAD
(
l
);
list_add
(
&
seg
->
mr_chunk
.
rl_mw
->
r
.
fmr
->
list
,
&
l
);
rc
=
ib_unmap_fmr
(
&
l
);
while
(
seg1
->
mr_nsegs
--
)
rpcrdma_unmap_one
(
ia
,
seg
++
);
}
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_unmap_fmr,"
" status %i
\n
"
,
__func__
,
rc
);
rc
=
rpcrdma_deregister_fmr_external
(
seg
,
ia
);
break
;
case
RPCRDMA_MEMWINDOWS_ASYNC
:
case
RPCRDMA_MEMWINDOWS
:
{
struct
ib_mw_bind
param
;
BUG_ON
(
nsegs
!=
1
);
param
.
mr
=
ia
->
ri_bind_mem
;
param
.
addr
=
0ULL
;
/* unbind */
param
.
length
=
0
;
param
.
mw_access_flags
=
0
;
if
(
r
)
{
param
.
wr_id
=
(
u64
)
(
unsigned
long
)
r
;
param
.
send_flags
=
IB_SEND_SIGNALED
;
INIT_CQCOUNT
(
&
r_xprt
->
rx_ep
);
}
else
{
param
.
wr_id
=
0ULL
;
param
.
send_flags
=
0
;
DECR_CQCOUNT
(
&
r_xprt
->
rx_ep
);
}
rc
=
ib_bind_mw
(
ia
->
ri_id
->
qp
,
seg
->
mr_chunk
.
rl_mw
->
r
.
mw
,
&
param
);
rpcrdma_unmap_one
(
ia
,
seg
);
}
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_(un)bind_mw,"
" status %i
\n
"
,
__func__
,
rc
);
else
r
=
NULL
;
/* will upcall on completion */
rc
=
rpcrdma_deregister_memwin_external
(
seg
,
ia
,
r_xprt
,
&
r
);
break
;
default:
rc
=
ib_dereg_mr
(
seg1
->
mr_chunk
.
rl_mr
);
seg1
->
mr_chunk
.
rl_mr
=
NULL
;
while
(
seg1
->
mr_nsegs
--
)
rpcrdma_unmap_one
(
ia
,
seg
++
);
if
(
rc
)
dprintk
(
"RPC: %s: failed ib_dereg_mr,"
" status %i
\n
"
,
__func__
,
rc
);
rc
=
rpcrdma_deregister_default_external
(
seg
,
ia
);
break
;
}
if
(
r
)
{
...
...
net/sunrpc/xprtrdma/xprt_rdma.h
浏览文件 @
6925bac1
...
...
@@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h>
/* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h>
/* xprt parameters */
#define RDMA_RESOLVE_TIMEOUT (5000)
/* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2)
/* retries if no listener backlog */
/*
* Interface Adapter -- one per transport instance
*/
...
...
@@ -58,6 +61,8 @@ struct rpcrdma_ia {
struct
rdma_cm_id
*
ri_id
;
struct
ib_pd
*
ri_pd
;
struct
ib_mr
*
ri_bind_mem
;
u32
ri_dma_lkey
;
int
ri_have_dma_lkey
;
struct
completion
ri_done
;
int
ri_async_rc
;
enum
rpcrdma_memreg
ri_memreg_strategy
;
...
...
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
union
{
struct
ib_mw
*
mw
;
struct
ib_fmr
*
fmr
;
struct
{
struct
ib_fast_reg_page_list
*
fr_pgl
;
struct
ib_mr
*
fr_mr
;
}
frmr
;
}
r
;
struct
list_head
mw_list
;
}
*
rl_mw
;
...
...
@@ -175,6 +184,7 @@ struct rpcrdma_req {
size_t
rl_size
;
/* actual length of buffer */
unsigned
int
rl_niovs
;
/* 0, 2 or 4 */
unsigned
int
rl_nchunks
;
/* non-zero if chunks */
unsigned
int
rl_connect_cookie
;
/* retry detection */
struct
rpcrdma_buffer
*
rl_buffer
;
/* home base for this structure */
struct
rpcrdma_rep
*
rl_reply
;
/* holder for reply buffer */
struct
rpcrdma_mr_seg
rl_segments
[
RPCRDMA_MAX_SEGS
];
/* chunk segments */
...
...
@@ -198,7 +208,7 @@ struct rpcrdma_buffer {
atomic_t
rb_credits
;
/* most recent server credits */
unsigned
long
rb_cwndscale
;
/* cached framework rpc_cwndscale */
int
rb_max_requests
;
/* client max requests */
struct
list_head
rb_mws
;
/* optional memory windows/fmrs */
struct
list_head
rb_mws
;
/* optional memory windows/fmrs
/frmrs
*/
int
rb_send_index
;
struct
rpcrdma_req
**
rb_send_bufs
;
int
rb_recv_index
;
...
...
@@ -273,6 +283,11 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern
int
xprt_rdma_pad_optimize
;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录