Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
107e0008
K
Kernel
项目概览
openeuler
/
Kernel
大约 1 年 前同步成功
通知
6
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
107e0008
编写于
10月 08, 2008
作者:
J
J. Bruce Fields
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'from-tomtucker' into for-2.6.28
上级
29373913
67080c82
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
710 addition
and
123 deletion
+710
-123
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/svc_rdma.h
+26
-1
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+166
-21
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
+215
-40
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
+303
-61
未找到文件。
include/linux/sunrpc/svc_rdma.h
浏览文件 @
107e0008
...
...
@@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
*/
struct
svc_rdma_op_ctxt
{
struct
svc_rdma_op_ctxt
*
read_hdr
;
struct
svc_rdma_fastreg_mr
*
frmr
;
int
hdr_count
;
struct
xdr_buf
arg
;
struct
list_head
dto_q
;
...
...
@@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge {
int
start
;
/* sge no for this chunk */
int
count
;
/* sge count for this chunk */
};
struct
svc_rdma_fastreg_mr
{
struct
ib_mr
*
mr
;
void
*
kva
;
struct
ib_fast_reg_page_list
*
page_list
;
int
page_list_len
;
unsigned
long
access_flags
;
unsigned
long
map_len
;
enum
dma_data_direction
direction
;
struct
list_head
frmr_list
;
};
struct
svc_rdma_req_map
{
struct
svc_rdma_fastreg_mr
*
frmr
;
unsigned
long
count
;
union
{
struct
kvec
sge
[
RPCSVC_MAXPAGES
];
struct
svc_rdma_chunk_sge
ch
[
RPCSVC_MAXPAGES
];
};
};
#define RDMACTXT_F_FAST_UNREG 1
#define RDMACTXT_F_LAST_CTXT 2
#define SVCRDMA_DEVCAP_FAST_REG 1
/* fast mr registration */
#define SVCRDMA_DEVCAP_READ_W_INV 2
/* read w/ invalidate */
struct
svcxprt_rdma
{
struct
svc_xprt
sc_xprt
;
/* SVC transport structure */
struct
rdma_cm_id
*
sc_cm_id
;
/* RDMA connection id */
...
...
@@ -136,6 +151,11 @@ struct svcxprt_rdma {
struct
ib_cq
*
sc_rq_cq
;
struct
ib_cq
*
sc_sq_cq
;
struct
ib_mr
*
sc_phys_mr
;
/* MR for server memory */
u32
sc_dev_caps
;
/* distilled device caps */
u32
sc_dma_lkey
;
/* local dma key */
unsigned
int
sc_frmr_pg_list_len
;
struct
list_head
sc_frmr_q
;
spinlock_t
sc_frmr_q_lock
;
spinlock_t
sc_lock
;
/* transport lock */
...
...
@@ -192,8 +212,13 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern
int
svc_rdma_create_listen
(
struct
svc_serv
*
,
int
,
struct
sockaddr
*
);
extern
struct
svc_rdma_op_ctxt
*
svc_rdma_get_context
(
struct
svcxprt_rdma
*
);
extern
void
svc_rdma_put_context
(
struct
svc_rdma_op_ctxt
*
,
int
);
extern
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*
ctxt
);
extern
struct
svc_rdma_req_map
*
svc_rdma_get_req_map
(
void
);
extern
void
svc_rdma_put_req_map
(
struct
svc_rdma_req_map
*
);
extern
int
svc_rdma_fastreg
(
struct
svcxprt_rdma
*
,
struct
svc_rdma_fastreg_mr
*
);
extern
struct
svc_rdma_fastreg_mr
*
svc_rdma_get_frmr
(
struct
svcxprt_rdma
*
);
extern
void
svc_rdma_put_frmr
(
struct
svcxprt_rdma
*
,
struct
svc_rdma_fastreg_mr
*
);
extern
void
svc_sq_reap
(
struct
svcxprt_rdma
*
);
extern
void
svc_rq_reap
(
struct
svcxprt_rdma
*
);
extern
struct
svc_xprt_class
svc_rdma_class
;
...
...
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
浏览文件 @
107e0008
...
...
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
*
* Assumptions:
* - chunk[0]->position points to pages[0] at an offset of 0
* - pages[] is not physically or virtually contigous and consists of
* - pages[] is not physically or virtually contig
u
ous and consists of
* PAGE_SIZE elements.
*
* Output:
...
...
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
* chunk in the read list
*
*/
static
int
rdma_rcl_to_sge
(
struct
svcxprt_rdma
*
xprt
,
static
int
map_read_chunks
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rqst
*
rqstp
,
struct
svc_rdma_op_ctxt
*
head
,
struct
rpcrdma_msg
*
rmsgp
,
...
...
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
return
sge_no
;
}
static
void
rdma_set_ctxt_sge
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_op_ctxt
*
ctxt
,
struct
kvec
*
vec
,
u64
*
sgl_offset
,
int
count
)
/* Map a read-chunk-list to an XDR and fast register the page-list.
*
* Assumptions:
* - chunk[0] position points to pages[0] at an offset of 0
* - pages[] will be made physically contiguous by creating a one-off memory
* region using the fastreg verb.
* - byte_count is # of bytes in read-chunk-list
* - ch_count is # of chunks in read-chunk-list
*
* Output:
* - sge array pointing into pages[] array.
* - chunk_sge array specifying sge index and count for each
* chunk in the read list
*/
static
int
fast_reg_read_chunks
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rqst
*
rqstp
,
struct
svc_rdma_op_ctxt
*
head
,
struct
rpcrdma_msg
*
rmsgp
,
struct
svc_rdma_req_map
*
rpl_map
,
struct
svc_rdma_req_map
*
chl_map
,
int
ch_count
,
int
byte_count
)
{
int
page_no
;
int
ch_no
;
u32
offset
;
struct
rpcrdma_read_chunk
*
ch
;
struct
svc_rdma_fastreg_mr
*
frmr
;
int
ret
=
0
;
frmr
=
svc_rdma_get_frmr
(
xprt
);
if
(
IS_ERR
(
frmr
))
return
-
ENOMEM
;
head
->
frmr
=
frmr
;
head
->
arg
.
head
[
0
]
=
rqstp
->
rq_arg
.
head
[
0
];
head
->
arg
.
tail
[
0
]
=
rqstp
->
rq_arg
.
tail
[
0
];
head
->
arg
.
pages
=
&
head
->
pages
[
head
->
count
];
head
->
hdr_count
=
head
->
count
;
/* save count of hdr pages */
head
->
arg
.
page_base
=
0
;
head
->
arg
.
page_len
=
byte_count
;
head
->
arg
.
len
=
rqstp
->
rq_arg
.
len
+
byte_count
;
head
->
arg
.
buflen
=
rqstp
->
rq_arg
.
buflen
+
byte_count
;
/* Fast register the page list */
frmr
->
kva
=
page_address
(
rqstp
->
rq_arg
.
pages
[
0
]);
frmr
->
direction
=
DMA_FROM_DEVICE
;
frmr
->
access_flags
=
(
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
);
frmr
->
map_len
=
byte_count
;
frmr
->
page_list_len
=
PAGE_ALIGN
(
byte_count
)
>>
PAGE_SHIFT
;
for
(
page_no
=
0
;
page_no
<
frmr
->
page_list_len
;
page_no
++
)
{
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
page_address
(
rqstp
->
rq_arg
.
pages
[
page_no
]),
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
head
->
arg
.
pages
[
page_no
]
=
rqstp
->
rq_arg
.
pages
[
page_no
];
}
head
->
count
+=
page_no
;
/* rq_respages points one past arg pages */
rqstp
->
rq_respages
=
&
rqstp
->
rq_arg
.
pages
[
page_no
];
/* Create the reply and chunk maps */
offset
=
0
;
ch
=
(
struct
rpcrdma_read_chunk
*
)
&
rmsgp
->
rm_body
.
rm_chunks
[
0
];
for
(
ch_no
=
0
;
ch_no
<
ch_count
;
ch_no
++
)
{
rpl_map
->
sge
[
ch_no
].
iov_base
=
frmr
->
kva
+
offset
;
rpl_map
->
sge
[
ch_no
].
iov_len
=
ch
->
rc_target
.
rs_length
;
chl_map
->
ch
[
ch_no
].
count
=
1
;
chl_map
->
ch
[
ch_no
].
start
=
ch_no
;
offset
+=
ch
->
rc_target
.
rs_length
;
ch
++
;
}
ret
=
svc_rdma_fastreg
(
xprt
,
frmr
);
if
(
ret
)
goto
fatal_err
;
return
ch_no
;
fatal_err:
printk
(
"svcrdma: error fast registering xdr for xprt %p"
,
xprt
);
svc_rdma_put_frmr
(
xprt
,
frmr
);
return
-
EIO
;
}
static
int
rdma_set_ctxt_sge
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_op_ctxt
*
ctxt
,
struct
svc_rdma_fastreg_mr
*
frmr
,
struct
kvec
*
vec
,
u64
*
sgl_offset
,
int
count
)
{
int
i
;
ctxt
->
count
=
count
;
ctxt
->
direction
=
DMA_FROM_DEVICE
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
atomic_inc
(
&
xprt
->
sc_dma_used
);
ctxt
->
sge
[
i
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
vec
[
i
].
iov_base
,
vec
[
i
].
iov_len
,
DMA_FROM_DEVICE
);
ctxt
->
sge
[
i
].
length
=
0
;
/* in case map fails */
if
(
!
frmr
)
{
ctxt
->
sge
[
i
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
vec
[
i
].
iov_base
,
vec
[
i
].
iov_len
,
DMA_FROM_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
ctxt
->
sge
[
i
].
addr
))
return
-
EINVAL
;
ctxt
->
sge
[
i
].
lkey
=
xprt
->
sc_dma_lkey
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
}
else
{
ctxt
->
sge
[
i
].
addr
=
(
unsigned
long
)
vec
[
i
].
iov_base
;
ctxt
->
sge
[
i
].
lkey
=
frmr
->
mr
->
lkey
;
}
ctxt
->
sge
[
i
].
length
=
vec
[
i
].
iov_len
;
ctxt
->
sge
[
i
].
lkey
=
xprt
->
sc_phys_mr
->
lkey
;
*
sgl_offset
=
*
sgl_offset
+
vec
[
i
].
iov_len
;
}
return
0
;
}
static
int
rdma_read_max_sge
(
struct
svcxprt_rdma
*
xprt
,
int
sge_count
)
...
...
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct
svc_rdma_op_ctxt
*
hdr_ctxt
)
{
struct
ib_send_wr
read_wr
;
struct
ib_send_wr
inv_wr
;
int
err
=
0
;
int
ch_no
;
int
ch_count
;
...
...
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
svc_rdma_rcl_chunk_counts
(
ch
,
&
ch_count
,
&
byte_count
);
if
(
ch_count
>
RPCSVC_MAXPAGES
)
return
-
EINVAL
;
sge_count
=
rdma_rcl_to_sge
(
xprt
,
rqstp
,
hdr_ctxt
,
rmsgp
,
rpl_map
,
chl_map
,
ch_count
,
byte_count
);
if
(
!
xprt
->
sc_frmr_pg_list_len
)
sge_count
=
map_read_chunks
(
xprt
,
rqstp
,
hdr_ctxt
,
rmsgp
,
rpl_map
,
chl_map
,
ch_count
,
byte_count
);
else
sge_count
=
fast_reg_read_chunks
(
xprt
,
rqstp
,
hdr_ctxt
,
rmsgp
,
rpl_map
,
chl_map
,
ch_count
,
byte_count
);
if
(
sge_count
<
0
)
{
err
=
-
EIO
;
goto
out
;
}
sgl_offset
=
0
;
ch_no
=
0
;
...
...
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
next_sge:
ctxt
=
svc_rdma_get_context
(
xprt
);
ctxt
->
direction
=
DMA_FROM_DEVICE
;
ctxt
->
frmr
=
hdr_ctxt
->
frmr
;
ctxt
->
read_hdr
=
NULL
;
clear_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
);
clear_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
/* Prepare READ WR */
memset
(
&
read_wr
,
0
,
sizeof
read_wr
);
ctxt
->
wr_op
=
IB_WR_RDMA_READ
;
read_wr
.
wr_id
=
(
unsigned
long
)
ctxt
;
read_wr
.
opcode
=
IB_WR_RDMA_READ
;
ctxt
->
wr_op
=
read_wr
.
opcode
;
read_wr
.
send_flags
=
IB_SEND_SIGNALED
;
read_wr
.
wr
.
rdma
.
rkey
=
ch
->
rc_target
.
rs_handle
;
read_wr
.
wr
.
rdma
.
remote_addr
=
...
...
@@ -327,10 +444,15 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
read_wr
.
sg_list
=
ctxt
->
sge
;
read_wr
.
num_sge
=
rdma_read_max_sge
(
xprt
,
chl_map
->
ch
[
ch_no
].
count
);
rdma_set_ctxt_sge
(
xprt
,
ctxt
,
&
rpl_map
->
sge
[
chl_map
->
ch
[
ch_no
].
start
],
&
sgl_offset
,
read_wr
.
num_sge
);
err
=
rdma_set_ctxt_sge
(
xprt
,
ctxt
,
hdr_ctxt
->
frmr
,
&
rpl_map
->
sge
[
chl_map
->
ch
[
ch_no
].
start
],
&
sgl_offset
,
read_wr
.
num_sge
);
if
(
err
)
{
svc_rdma_unmap_dma
(
ctxt
);
svc_rdma_put_context
(
ctxt
,
0
);
goto
out
;
}
if
(((
ch
+
1
)
->
rc_discrim
==
0
)
&&
(
read_wr
.
num_sge
==
chl_map
->
ch
[
ch_no
].
count
))
{
/*
...
...
@@ -339,6 +461,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
* the client and the RPC needs to be enqueued.
*/
set_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
);
if
(
hdr_ctxt
->
frmr
)
{
set_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
/*
* Invalidate the local MR used to map the data
* sink.
*/
if
(
xprt
->
sc_dev_caps
&
SVCRDMA_DEVCAP_READ_W_INV
)
{
read_wr
.
opcode
=
IB_WR_RDMA_READ_WITH_INV
;
ctxt
->
wr_op
=
read_wr
.
opcode
;
read_wr
.
ex
.
invalidate_rkey
=
ctxt
->
frmr
->
mr
->
lkey
;
}
else
{
/* Prepare INVALIDATE WR */
memset
(
&
inv_wr
,
0
,
sizeof
inv_wr
);
inv_wr
.
opcode
=
IB_WR_LOCAL_INV
;
inv_wr
.
send_flags
=
IB_SEND_SIGNALED
;
inv_wr
.
ex
.
invalidate_rkey
=
hdr_ctxt
->
frmr
->
mr
->
lkey
;
read_wr
.
next
=
&
inv_wr
;
}
}
ctxt
->
read_hdr
=
hdr_ctxt
;
}
/* Post the read */
...
...
net/sunrpc/xprtrdma/svc_rdma_sendto.c
浏览文件 @
107e0008
...
...
@@ -69,9 +69,127 @@
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
static
void
xdr_to_sge
(
struct
svcxprt_rdma
*
xprt
,
struct
xdr_buf
*
xdr
,
struct
svc_rdma_req_map
*
vec
)
int
fast_reg_xdr
(
struct
svcxprt_rdma
*
xprt
,
struct
xdr_buf
*
xdr
,
struct
svc_rdma_req_map
*
vec
)
{
int
sge_no
;
u32
sge_bytes
;
u32
page_bytes
;
u32
page_off
;
int
page_no
=
0
;
u8
*
frva
;
struct
svc_rdma_fastreg_mr
*
frmr
;
frmr
=
svc_rdma_get_frmr
(
xprt
);
if
(
IS_ERR
(
frmr
))
return
-
ENOMEM
;
vec
->
frmr
=
frmr
;
/* Skip the RPCRDMA header */
sge_no
=
1
;
/* Map the head. */
frva
=
(
void
*
)((
unsigned
long
)(
xdr
->
head
[
0
].
iov_base
)
&
PAGE_MASK
);
vec
->
sge
[
sge_no
].
iov_base
=
xdr
->
head
[
0
].
iov_base
;
vec
->
sge
[
sge_no
].
iov_len
=
xdr
->
head
[
0
].
iov_len
;
vec
->
count
=
2
;
sge_no
++
;
/* Build the FRMR */
frmr
->
kva
=
frva
;
frmr
->
direction
=
DMA_TO_DEVICE
;
frmr
->
access_flags
=
0
;
frmr
->
map_len
=
PAGE_SIZE
;
frmr
->
page_list_len
=
1
;
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
(
void
*
)
xdr
->
head
[
0
].
iov_base
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
page_off
=
xdr
->
page_base
;
page_bytes
=
xdr
->
page_len
+
page_off
;
if
(
!
page_bytes
)
goto
encode_tail
;
/* Map the pages */
vec
->
sge
[
sge_no
].
iov_base
=
frva
+
frmr
->
map_len
+
page_off
;
vec
->
sge
[
sge_no
].
iov_len
=
page_bytes
;
sge_no
++
;
while
(
page_bytes
)
{
struct
page
*
page
;
page
=
xdr
->
pages
[
page_no
++
];
sge_bytes
=
min_t
(
u32
,
page_bytes
,
(
PAGE_SIZE
-
page_off
));
page_bytes
-=
sge_bytes
;
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_page
(
xprt
->
sc_cm_id
->
device
,
page
,
0
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
page_off
=
0
;
/* reset for next time through loop */
frmr
->
map_len
+=
PAGE_SIZE
;
frmr
->
page_list_len
++
;
}
vec
->
count
++
;
encode_tail:
/* Map tail */
if
(
0
==
xdr
->
tail
[
0
].
iov_len
)
goto
done
;
vec
->
count
++
;
vec
->
sge
[
sge_no
].
iov_len
=
xdr
->
tail
[
0
].
iov_len
;
if
(((
unsigned
long
)
xdr
->
tail
[
0
].
iov_base
&
PAGE_MASK
)
==
((
unsigned
long
)
xdr
->
head
[
0
].
iov_base
&
PAGE_MASK
))
{
/*
* If head and tail use the same page, we don't need
* to map it again.
*/
vec
->
sge
[
sge_no
].
iov_base
=
xdr
->
tail
[
0
].
iov_base
;
}
else
{
void
*
va
;
/* Map another page for the tail */
page_off
=
(
unsigned
long
)
xdr
->
tail
[
0
].
iov_base
&
~
PAGE_MASK
;
va
=
(
void
*
)((
unsigned
long
)
xdr
->
tail
[
0
].
iov_base
&
PAGE_MASK
);
vec
->
sge
[
sge_no
].
iov_base
=
frva
+
frmr
->
map_len
+
page_off
;
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
va
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
frmr
->
map_len
+=
PAGE_SIZE
;
frmr
->
page_list_len
++
;
}
done:
if
(
svc_rdma_fastreg
(
xprt
,
frmr
))
goto
fatal_err
;
return
0
;
fatal_err:
printk
(
"svcrdma: Error fast registering memory for xprt %p
\n
"
,
xprt
);
svc_rdma_put_frmr
(
xprt
,
frmr
);
return
-
EIO
;
}
static
int
map_xdr
(
struct
svcxprt_rdma
*
xprt
,
struct
xdr_buf
*
xdr
,
struct
svc_rdma_req_map
*
vec
)
{
int
sge_max
=
(
xdr
->
len
+
PAGE_SIZE
-
1
)
/
PAGE_SIZE
+
3
;
int
sge_no
;
...
...
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
BUG_ON
(
xdr
->
len
!=
(
xdr
->
head
[
0
].
iov_len
+
xdr
->
page_len
+
xdr
->
tail
[
0
].
iov_len
));
if
(
xprt
->
sc_frmr_pg_list_len
)
return
fast_reg_xdr
(
xprt
,
xdr
,
vec
);
/* Skip the first sge, this is for the RPCRDMA header */
sge_no
=
1
;
...
...
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
BUG_ON
(
sge_no
>
sge_max
);
vec
->
count
=
sge_no
;
return
0
;
}
/* Assumptions:
* - We are using FRMR
* - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static
int
send_write
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rqst
*
rqstp
,
...
...
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_no
=
0
;
/* Copy the remaining SGE */
while
(
bc
!=
0
&&
xdr_sge_no
<
vec
->
count
)
{
sge
[
sge_no
].
lkey
=
xprt
->
sc_phys_mr
->
lkey
;
sge_bytes
=
min
((
size_t
)
bc
,
(
size_t
)(
vec
->
sge
[
xdr_sge_no
].
iov_len
-
sge_off
));
while
(
bc
!=
0
)
{
sge_bytes
=
min_t
(
size_t
,
bc
,
vec
->
sge
[
xdr_sge_no
].
iov_len
-
sge_off
);
sge
[
sge_no
].
length
=
sge_bytes
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
(
void
*
)
vec
->
sge
[
xdr_sge_no
].
iov_base
+
sge_off
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
dma_mapping_error
(
xprt
->
sc_cm_id
->
device
->
dma_device
,
sge
[
sge_no
].
addr
))
goto
err
;
if
(
!
vec
->
frmr
)
{
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
(
void
*
)
vec
->
sge
[
xdr_sge_no
].
iov_base
+
sge_off
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
sge
[
sge_no
].
addr
))
goto
err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
[
sge_no
].
lkey
=
xprt
->
sc_dma_lkey
;
}
else
{
sge
[
sge_no
].
addr
=
(
unsigned
long
)
vec
->
sge
[
xdr_sge_no
].
iov_base
+
sge_off
;
sge
[
sge_no
].
lkey
=
vec
->
frmr
->
mr
->
lkey
;
}
ctxt
->
count
++
;
ctxt
->
frmr
=
vec
->
frmr
;
sge_off
=
0
;
sge_no
++
;
ctxt
->
count
++
;
xdr_sge_no
++
;
BUG_ON
(
xdr_sge_no
>
vec
->
count
);
bc
-=
sge_bytes
;
}
BUG_ON
(
bc
!=
0
);
BUG_ON
(
xdr_sge_no
>
vec
->
count
);
/* Prepare WRITE WR */
memset
(
&
write_wr
,
0
,
sizeof
write_wr
);
ctxt
->
wr_op
=
IB_WR_RDMA_WRITE
;
...
...
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary
=
(
struct
rpcrdma_write_array
*
)
&
rdma_resp
->
rm_body
.
rm_chunks
[
1
];
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
if
(
vec
->
frmr
)
max_write
=
vec
->
frmr
->
map_len
;
else
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
/* Write chunks start at the pagelist */
for
(
xdr_off
=
rqstp
->
rq_res
.
head
[
0
].
iov_len
,
chunk_no
=
0
;
...
...
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary
=
(
struct
rpcrdma_write_array
*
)
&
rdma_resp
->
rm_body
.
rm_chunks
[
2
];
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
if
(
vec
->
frmr
)
max_write
=
vec
->
frmr
->
map_len
;
else
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
/* xdr offset starts at RPC message */
for
(
xdr_off
=
0
,
chunk_no
=
0
;
...
...
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
ch
=
&
arg_ary
->
wc_array
[
chunk_no
].
wc_target
;
write_len
=
min
(
xfer_len
,
ch
->
rs_length
);
/* Prepare the reply chunk given the length actually
* written */
rs_offset
=
get_unaligned
(
&
(
ch
->
rs_offset
));
...
...
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int
byte_count
)
{
struct
ib_send_wr
send_wr
;
struct
ib_send_wr
inv_wr
;
int
sge_no
;
int
sge_bytes
;
int
page_no
;
...
...
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt
->
pages
[
0
]
=
page
;
ctxt
->
count
=
1
;
ctxt
->
frmr
=
vec
->
frmr
;
if
(
vec
->
frmr
)
set_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
else
clear_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
/* Prepare the SGE for the RPCRDMA Header */
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
sge
[
0
].
addr
=
ib_dma_map_page
(
rdma
->
sc_cm_id
->
device
,
page
,
0
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
rdma
->
sc_cm_id
->
device
,
ctxt
->
sge
[
0
].
addr
))
goto
err
;
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
direction
=
DMA_TO_DEVICE
;
ctxt
->
sge
[
0
].
length
=
svc_rdma_xdr_get_reply_hdr_len
(
rdma_resp
);
ctxt
->
sge
[
0
].
lkey
=
rdma
->
sc_
phys_mr
->
lkey
;
ctxt
->
sge
[
0
].
lkey
=
rdma
->
sc_
dma_
lkey
;
/* Determine how many of our SGE are to be transmitted */
for
(
sge_no
=
1
;
byte_count
&&
sge_no
<
vec
->
count
;
sge_no
++
)
{
sge_bytes
=
min_t
(
size_t
,
vec
->
sge
[
sge_no
].
iov_len
,
byte_count
);
byte_count
-=
sge_bytes
;
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
rdma
->
sc_cm_id
->
device
,
vec
->
sge
[
sge_no
].
iov_base
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
!
vec
->
frmr
)
{
ctxt
->
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
rdma
->
sc_cm_id
->
device
,
vec
->
sge
[
sge_no
].
iov_base
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
rdma
->
sc_cm_id
->
device
,
ctxt
->
sge
[
sge_no
].
addr
))
goto
err
;
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
sge
[
sge_no
].
lkey
=
rdma
->
sc_dma_lkey
;
}
else
{
ctxt
->
sge
[
sge_no
].
addr
=
(
unsigned
long
)
vec
->
sge
[
sge_no
].
iov_base
;
ctxt
->
sge
[
sge_no
].
lkey
=
vec
->
frmr
->
mr
->
lkey
;
}
ctxt
->
sge
[
sge_no
].
length
=
sge_bytes
;
ctxt
->
sge
[
sge_no
].
lkey
=
rdma
->
sc_phys_mr
->
lkey
;
}
BUG_ON
(
byte_count
!=
0
);
...
...
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt
->
pages
[
page_no
+
1
]
=
rqstp
->
rq_respages
[
page_no
];
ctxt
->
count
++
;
rqstp
->
rq_respages
[
page_no
]
=
NULL
;
/* If there are more pages than SGE, terminate SGE list */
/*
* If there are more pages than SGE, terminate SGE
* list so that svc_rdma_unmap_dma doesn't attempt to
* unmap garbage.
*/
if
(
page_no
+
1
>=
sge_no
)
ctxt
->
sge
[
page_no
+
1
].
length
=
0
;
}
BUG_ON
(
sge_no
>
rdma
->
sc_max_sge
);
BUG_ON
(
sge_no
>
ctxt
->
count
);
memset
(
&
send_wr
,
0
,
sizeof
send_wr
);
ctxt
->
wr_op
=
IB_WR_SEND
;
send_wr
.
wr_id
=
(
unsigned
long
)
ctxt
;
...
...
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr
.
num_sge
=
sge_no
;
send_wr
.
opcode
=
IB_WR_SEND
;
send_wr
.
send_flags
=
IB_SEND_SIGNALED
;
if
(
vec
->
frmr
)
{
/* Prepare INVALIDATE WR */
memset
(
&
inv_wr
,
0
,
sizeof
inv_wr
);
inv_wr
.
opcode
=
IB_WR_LOCAL_INV
;
inv_wr
.
send_flags
=
IB_SEND_SIGNALED
;
inv_wr
.
ex
.
invalidate_rkey
=
vec
->
frmr
->
mr
->
lkey
;
send_wr
.
next
=
&
inv_wr
;
}
ret
=
svc_rdma_send
(
rdma
,
&
send_wr
);
if
(
ret
)
svc_rdma_put_context
(
ctxt
,
1
)
;
goto
err
;
return
ret
;
return
0
;
err:
svc_rdma_put_frmr
(
rdma
,
vec
->
frmr
);
svc_rdma_put_context
(
ctxt
,
1
);
return
-
EIO
;
}
void
svc_rdma_prep_reply_hdr
(
struct
svc_rqst
*
rqstp
)
...
...
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ctxt
=
svc_rdma_get_context
(
rdma
);
ctxt
->
direction
=
DMA_TO_DEVICE
;
vec
=
svc_rdma_get_req_map
();
xdr_to_sge
(
rdma
,
&
rqstp
->
rq_res
,
vec
);
ret
=
map_xdr
(
rdma
,
&
rqstp
->
rq_res
,
vec
);
if
(
ret
)
goto
err0
;
inline_bytes
=
rqstp
->
rq_res
.
len
;
/* Create the RDMA response header */
...
...
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if
(
ret
<
0
)
{
printk
(
KERN_ERR
"svcrdma: failed to send write chunks, rc=%d
\n
"
,
ret
);
goto
err
or
;
goto
err
1
;
}
inline_bytes
-=
ret
;
...
...
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if
(
ret
<
0
)
{
printk
(
KERN_ERR
"svcrdma: failed to send reply chunks, rc=%d
\n
"
,
ret
);
goto
err
or
;
goto
err
1
;
}
inline_bytes
-=
ret
;
...
...
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
svc_rdma_put_req_map
(
vec
);
dprintk
(
"svcrdma: send_reply returns %d
\n
"
,
ret
);
return
ret
;
error:
err1:
put_page
(
res_page
);
err0:
svc_rdma_put_req_map
(
vec
);
svc_rdma_put_context
(
ctxt
,
0
);
put_page
(
res_page
);
return
ret
;
}
net/sunrpc/xprtrdma/svc_rdma_transport.c
浏览文件 @
107e0008
...
...
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
ctxt
->
xprt
=
xprt
;
INIT_LIST_HEAD
(
&
ctxt
->
dto_q
);
ctxt
->
count
=
0
;
ctxt
->
frmr
=
NULL
;
atomic_inc
(
&
xprt
->
sc_ctxt_used
);
return
ctxt
;
}
static
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*
ctxt
)
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*
ctxt
)
{
struct
svcxprt_rdma
*
xprt
=
ctxt
->
xprt
;
int
i
;
for
(
i
=
0
;
i
<
ctxt
->
count
&&
ctxt
->
sge
[
i
].
length
;
i
++
)
{
atomic_dec
(
&
xprt
->
sc_dma_used
);
ib_dma_unmap_single
(
xprt
->
sc_cm_id
->
device
,
ctxt
->
sge
[
i
].
addr
,
ctxt
->
sge
[
i
].
length
,
ctxt
->
direction
);
/*
* Unmap the DMA addr in the SGE if the lkey matches
* the sc_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
if
(
ctxt
->
sge
[
i
].
lkey
==
xprt
->
sc_dma_lkey
)
{
atomic_dec
(
&
xprt
->
sc_dma_used
);
ib_dma_unmap_single
(
xprt
->
sc_cm_id
->
device
,
ctxt
->
sge
[
i
].
addr
,
ctxt
->
sge
[
i
].
length
,
ctxt
->
direction
);
}
}
}
...
...
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible
(
msecs_to_jiffies
(
500
));
}
map
->
count
=
0
;
map
->
frmr
=
NULL
;
return
map
;
}
...
...
@@ -315,6 +325,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
svc_xprt_enqueue
(
&
xprt
->
sc_xprt
);
}
/*
* Processs a completion context
*/
static
void
process_context
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_op_ctxt
*
ctxt
)
{
svc_rdma_unmap_dma
(
ctxt
);
switch
(
ctxt
->
wr_op
)
{
case
IB_WR_SEND
:
if
(
test_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
))
svc_rdma_put_frmr
(
xprt
,
ctxt
->
frmr
);
svc_rdma_put_context
(
ctxt
,
1
);
break
;
case
IB_WR_RDMA_WRITE
:
svc_rdma_put_context
(
ctxt
,
0
);
break
;
case
IB_WR_RDMA_READ
:
case
IB_WR_RDMA_READ_WITH_INV
:
if
(
test_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
))
{
struct
svc_rdma_op_ctxt
*
read_hdr
=
ctxt
->
read_hdr
;
BUG_ON
(
!
read_hdr
);
if
(
test_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
))
svc_rdma_put_frmr
(
xprt
,
ctxt
->
frmr
);
spin_lock_bh
(
&
xprt
->
sc_rq_dto_lock
);
set_bit
(
XPT_DATA
,
&
xprt
->
sc_xprt
.
xpt_flags
);
list_add_tail
(
&
read_hdr
->
dto_q
,
&
xprt
->
sc_read_complete_q
);
spin_unlock_bh
(
&
xprt
->
sc_rq_dto_lock
);
svc_xprt_enqueue
(
&
xprt
->
sc_xprt
);
}
svc_rdma_put_context
(
ctxt
,
0
);
break
;
default:
printk
(
KERN_ERR
"svcrdma: unexpected completion type, "
"opcode=%d
\n
"
,
ctxt
->
wr_op
);
break
;
}
}
/*
* Send Queue Completion Handler - potentially called on interrupt context.
*
...
...
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
struct
ib_cq
*
cq
=
xprt
->
sc_sq_cq
;
int
ret
;
if
(
!
test_and_clear_bit
(
RDMAXPRT_SQ_PENDING
,
&
xprt
->
sc_flags
))
return
;
ib_req_notify_cq
(
xprt
->
sc_sq_cq
,
IB_CQ_NEXT_COMP
);
atomic_inc
(
&
rdma_stat_sq_poll
);
while
((
ret
=
ib_poll_cq
(
cq
,
1
,
&
wc
))
>
0
)
{
ctxt
=
(
struct
svc_rdma_op_ctxt
*
)(
unsigned
long
)
wc
.
wr_id
;
xprt
=
ctxt
->
xprt
;
svc_rdma_unmap_dma
(
ctxt
);
if
(
wc
.
status
!=
IB_WC_SUCCESS
)
/* Close the transport */
set_bit
(
XPT_CLOSE
,
&
xprt
->
sc_xprt
.
xpt_flags
);
...
...
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
atomic_dec
(
&
xprt
->
sc_sq_count
);
wake_up
(
&
xprt
->
sc_send_wait
);
switch
(
ctxt
->
wr_op
)
{
case
IB_WR_SEND
:
svc_rdma_put_context
(
ctxt
,
1
);
break
;
case
IB_WR_RDMA_WRITE
:
svc_rdma_put_context
(
ctxt
,
0
);
break
;
case
IB_WR_RDMA_READ
:
if
(
test_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
))
{
struct
svc_rdma_op_ctxt
*
read_hdr
=
ctxt
->
read_hdr
;
BUG_ON
(
!
read_hdr
);
spin_lock_bh
(
&
xprt
->
sc_rq_dto_lock
);
set_bit
(
XPT_DATA
,
&
xprt
->
sc_xprt
.
xpt_flags
);
list_add_tail
(
&
read_hdr
->
dto_q
,
&
xprt
->
sc_read_complete_q
);
spin_unlock_bh
(
&
xprt
->
sc_rq_dto_lock
);
svc_xprt_enqueue
(
&
xprt
->
sc_xprt
);
}
svc_rdma_put_context
(
ctxt
,
0
);
break
;
ctxt
=
(
struct
svc_rdma_op_ctxt
*
)(
unsigned
long
)
wc
.
wr_id
;
if
(
ctxt
)
process_context
(
xprt
,
ctxt
);
default:
printk
(
KERN_ERR
"svcrdma: unexpected completion type, "
"opcode=%d, status=%d
\n
"
,
wc
.
opcode
,
wc
.
status
);
break
;
}
svc_xprt_put
(
&
xprt
->
sc_xprt
);
}
...
...
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_dto_q
);
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_rq_dto_q
);
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_read_complete_q
);
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_frmr_q
);
init_waitqueue_head
(
&
cma_xprt
->
sc_send_wait
);
spin_lock_init
(
&
cma_xprt
->
sc_lock
);
spin_lock_init
(
&
cma_xprt
->
sc_rq_dto_lock
);
spin_lock_init
(
&
cma_xprt
->
sc_frmr_q_lock
);
cma_xprt
->
sc_ord
=
svcrdma_ord
;
...
...
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
struct
ib_recv_wr
recv_wr
,
*
bad_recv_wr
;
struct
svc_rdma_op_ctxt
*
ctxt
;
struct
page
*
page
;
unsigned
long
pa
;
dma_addr_t
pa
;
int
sge_no
;
int
buflen
;
int
ret
;
...
...
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON
(
sge_no
>=
xprt
->
sc_max_sge
);
page
=
svc_rdma_get_page
();
ctxt
->
pages
[
sge_no
]
=
page
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
pa
=
ib_dma_map_page
(
xprt
->
sc_cm_id
->
device
,
page
,
0
,
PAGE_SIZE
,
DMA_FROM_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
pa
))
goto
err_put_ctxt
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
ctxt
->
sge
[
sge_no
].
addr
=
pa
;
ctxt
->
sge
[
sge_no
].
length
=
PAGE_SIZE
;
ctxt
->
sge
[
sge_no
].
lkey
=
xprt
->
sc_
phys_mr
->
lkey
;
ctxt
->
sge
[
sge_no
].
lkey
=
xprt
->
sc_
dma_
lkey
;
buflen
+=
PAGE_SIZE
;
}
ctxt
->
count
=
sge_no
;
...
...
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
svc_rdma_put_context
(
ctxt
,
1
);
}
return
ret
;
err_put_ctxt:
svc_rdma_put_context
(
ctxt
,
1
);
return
-
ENOMEM
;
}
/*
...
...
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
dprintk
(
"svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event=%d
\n
"
,
cma_id
,
cma_id
->
context
,
event
->
event
);
handle_connect_req
(
cma_id
,
event
->
param
.
conn
.
responder_resources
);
event
->
param
.
conn
.
initiator_depth
);
break
;
case
RDMA_CM_EVENT_ESTABLISHED
:
...
...
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
return
ERR_PTR
(
ret
);
}
static
struct
svc_rdma_fastreg_mr
*
rdma_alloc_frmr
(
struct
svcxprt_rdma
*
xprt
)
{
struct
ib_mr
*
mr
;
struct
ib_fast_reg_page_list
*
pl
;
struct
svc_rdma_fastreg_mr
*
frmr
;
frmr
=
kmalloc
(
sizeof
(
*
frmr
),
GFP_KERNEL
);
if
(
!
frmr
)
goto
err
;
mr
=
ib_alloc_fast_reg_mr
(
xprt
->
sc_pd
,
RPCSVC_MAXPAGES
);
if
(
!
mr
)
goto
err_free_frmr
;
pl
=
ib_alloc_fast_reg_page_list
(
xprt
->
sc_cm_id
->
device
,
RPCSVC_MAXPAGES
);
if
(
!
pl
)
goto
err_free_mr
;
frmr
->
mr
=
mr
;
frmr
->
page_list
=
pl
;
INIT_LIST_HEAD
(
&
frmr
->
frmr_list
);
return
frmr
;
err_free_mr:
ib_dereg_mr
(
mr
);
err_free_frmr:
kfree
(
frmr
);
err:
return
ERR_PTR
(
-
ENOMEM
);
}
static
void
rdma_dealloc_frmr_q
(
struct
svcxprt_rdma
*
xprt
)
{
struct
svc_rdma_fastreg_mr
*
frmr
;
while
(
!
list_empty
(
&
xprt
->
sc_frmr_q
))
{
frmr
=
list_entry
(
xprt
->
sc_frmr_q
.
next
,
struct
svc_rdma_fastreg_mr
,
frmr_list
);
list_del_init
(
&
frmr
->
frmr_list
);
ib_dereg_mr
(
frmr
->
mr
);
ib_free_fast_reg_page_list
(
frmr
->
page_list
);
kfree
(
frmr
);
}
}
struct
svc_rdma_fastreg_mr
*
svc_rdma_get_frmr
(
struct
svcxprt_rdma
*
rdma
)
{
struct
svc_rdma_fastreg_mr
*
frmr
=
NULL
;
spin_lock_bh
(
&
rdma
->
sc_frmr_q_lock
);
if
(
!
list_empty
(
&
rdma
->
sc_frmr_q
))
{
frmr
=
list_entry
(
rdma
->
sc_frmr_q
.
next
,
struct
svc_rdma_fastreg_mr
,
frmr_list
);
list_del_init
(
&
frmr
->
frmr_list
);
frmr
->
map_len
=
0
;
frmr
->
page_list_len
=
0
;
}
spin_unlock_bh
(
&
rdma
->
sc_frmr_q_lock
);
if
(
frmr
)
return
frmr
;
return
rdma_alloc_frmr
(
rdma
);
}
static
void
frmr_unmap_dma
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_fastreg_mr
*
frmr
)
{
int
page_no
;
for
(
page_no
=
0
;
page_no
<
frmr
->
page_list_len
;
page_no
++
)
{
dma_addr_t
addr
=
frmr
->
page_list
->
page_list
[
page_no
];
if
(
ib_dma_mapping_error
(
frmr
->
mr
->
device
,
addr
))
continue
;
atomic_dec
(
&
xprt
->
sc_dma_used
);
ib_dma_unmap_single
(
frmr
->
mr
->
device
,
addr
,
PAGE_SIZE
,
frmr
->
direction
);
}
}
void
svc_rdma_put_frmr
(
struct
svcxprt_rdma
*
rdma
,
struct
svc_rdma_fastreg_mr
*
frmr
)
{
if
(
frmr
)
{
frmr_unmap_dma
(
rdma
,
frmr
);
spin_lock_bh
(
&
rdma
->
sc_frmr_q_lock
);
BUG_ON
(
!
list_empty
(
&
frmr
->
frmr_list
));
list_add
(
&
frmr
->
frmr_list
,
&
rdma
->
sc_frmr_q
);
spin_unlock_bh
(
&
rdma
->
sc_frmr_q_lock
);
}
}
/*
* This is the xpo_recvfrom function for listening endpoints. Its
* purpose is to accept incoming connections. The CMA callback handler
...
...
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct
rdma_conn_param
conn_param
;
struct
ib_qp_init_attr
qp_attr
;
struct
ib_device_attr
devattr
;
int
dma_mr_acc
;
int
need_dma_mr
;
int
ret
;
int
i
;
...
...
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
newxprt
->
sc_qp
=
newxprt
->
sc_cm_id
->
qp
;
/* Register all of physical memory */
newxprt
->
sc_phys_mr
=
ib_get_dma_mr
(
newxprt
->
sc_pd
,
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
);
if
(
IS_ERR
(
newxprt
->
sc_phys_mr
))
{
dprintk
(
"svcrdma: Failed to create DMA MR ret=%d
\n
"
,
ret
);
/*
* Use the most secure set of MR resources based on the
* transport type and available memory management features in
* the device. Here's the table implemented below:
*
* Fast Global DMA Remote WR
* Reg LKEY MR Access
* Sup'd Sup'd Needed Needed
*
* IWARP N N Y Y
* N Y Y Y
* Y N Y N
* Y Y N -
*
* IB N N Y N
* N Y N -
* Y N Y N
* Y Y N -
*
* NB: iWARP requires remote write access for the data sink
* of an RDMA_READ. IB does not.
*/
if
(
devattr
.
device_cap_flags
&
IB_DEVICE_MEM_MGT_EXTENSIONS
)
{
newxprt
->
sc_frmr_pg_list_len
=
devattr
.
max_fast_reg_page_list_len
;
newxprt
->
sc_dev_caps
|=
SVCRDMA_DEVCAP_FAST_REG
;
}
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
switch
(
rdma_node_get_transport
(
newxprt
->
sc_cm_id
->
device
->
node_type
))
{
case
RDMA_TRANSPORT_IWARP
:
newxprt
->
sc_dev_caps
|=
SVCRDMA_DEVCAP_READ_W_INV
;
if
(
!
(
newxprt
->
sc_dev_caps
&
SVCRDMA_DEVCAP_FAST_REG
))
{
need_dma_mr
=
1
;
dma_mr_acc
=
(
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
);
}
else
if
(
!
(
devattr
.
device_cap_flags
&
IB_DEVICE_LOCAL_DMA_LKEY
))
{
need_dma_mr
=
1
;
dma_mr_acc
=
IB_ACCESS_LOCAL_WRITE
;
}
else
need_dma_mr
=
0
;
break
;
case
RDMA_TRANSPORT_IB
:
if
(
!
(
devattr
.
device_cap_flags
&
IB_DEVICE_LOCAL_DMA_LKEY
))
{
need_dma_mr
=
1
;
dma_mr_acc
=
IB_ACCESS_LOCAL_WRITE
;
}
else
need_dma_mr
=
0
;
break
;
default:
goto
errout
;
}
/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
if
(
need_dma_mr
)
{
/* Register all of physical memory */
newxprt
->
sc_phys_mr
=
ib_get_dma_mr
(
newxprt
->
sc_pd
,
dma_mr_acc
);
if
(
IS_ERR
(
newxprt
->
sc_phys_mr
))
{
dprintk
(
"svcrdma: Failed to create DMA MR ret=%d
\n
"
,
ret
);
goto
errout
;
}
newxprt
->
sc_dma_lkey
=
newxprt
->
sc_phys_mr
->
lkey
;
}
else
newxprt
->
sc_dma_lkey
=
newxprt
->
sc_cm_id
->
device
->
local_dma_lkey
;
/* Post receive buffers */
for
(
i
=
0
;
i
<
newxprt
->
sc_max_requests
;
i
++
)
{
ret
=
svc_rdma_post_recv
(
newxprt
);
...
...
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
WARN_ON
(
atomic_read
(
&
rdma
->
sc_ctxt_used
)
!=
0
);
WARN_ON
(
atomic_read
(
&
rdma
->
sc_dma_used
)
!=
0
);
/* De-allocate fastreg mr */
rdma_dealloc_frmr_q
(
rdma
);
/* Destroy the QP if present (not a listener) */
if
(
rdma
->
sc_qp
&&
!
IS_ERR
(
rdma
->
sc_qp
))
ib_destroy_qp
(
rdma
->
sc_qp
);
...
...
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return
1
;
}
/*
* Attempt to register the kvec representing the RPC memory with the
* device.
*
* Returns:
* NULL : The device does not support fastreg or there were no more
* fastreg mr.
* frmr : The kvec register request was successfully posted.
* <0 : An error was encountered attempting to register the kvec.
*/
int
svc_rdma_fastreg
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_fastreg_mr
*
frmr
)
{
struct
ib_send_wr
fastreg_wr
;
u8
key
;
/* Bump the key */
key
=
(
u8
)(
frmr
->
mr
->
lkey
&
0x000000FF
);
ib_update_fast_reg_key
(
frmr
->
mr
,
++
key
);
/* Prepare FASTREG WR */
memset
(
&
fastreg_wr
,
0
,
sizeof
fastreg_wr
);
fastreg_wr
.
opcode
=
IB_WR_FAST_REG_MR
;
fastreg_wr
.
send_flags
=
IB_SEND_SIGNALED
;
fastreg_wr
.
wr
.
fast_reg
.
iova_start
=
(
unsigned
long
)
frmr
->
kva
;
fastreg_wr
.
wr
.
fast_reg
.
page_list
=
frmr
->
page_list
;
fastreg_wr
.
wr
.
fast_reg
.
page_list_len
=
frmr
->
page_list_len
;
fastreg_wr
.
wr
.
fast_reg
.
page_shift
=
PAGE_SHIFT
;
fastreg_wr
.
wr
.
fast_reg
.
length
=
frmr
->
map_len
;
fastreg_wr
.
wr
.
fast_reg
.
access_flags
=
frmr
->
access_flags
;
fastreg_wr
.
wr
.
fast_reg
.
rkey
=
frmr
->
mr
->
lkey
;
return
svc_rdma_send
(
xprt
,
&
fastreg_wr
);
}
int
svc_rdma_send
(
struct
svcxprt_rdma
*
xprt
,
struct
ib_send_wr
*
wr
)
{
struct
ib_send_wr
*
bad_wr
;
struct
ib_send_wr
*
bad_wr
,
*
n_wr
;
int
wr_count
;
int
i
;
int
ret
;
if
(
test_bit
(
XPT_CLOSE
,
&
xprt
->
sc_xprt
.
xpt_flags
))
return
-
ENOTCONN
;
BUG_ON
(
wr
->
send_flags
!=
IB_SEND_SIGNALED
);
BUG_ON
(((
struct
svc_rdma_op_ctxt
*
)(
unsigned
long
)
wr
->
wr_id
)
->
wr_op
!=
wr
->
opcode
);
wr_count
=
1
;
for
(
n_wr
=
wr
->
next
;
n_wr
;
n_wr
=
n_wr
->
next
)
wr_count
++
;
/* If the SQ is full, wait until an SQ entry is available */
while
(
1
)
{
spin_lock_bh
(
&
xprt
->
sc_lock
);
if
(
xprt
->
sc_sq_depth
==
atomic_read
(
&
xprt
->
sc_sq_count
)
)
{
if
(
xprt
->
sc_sq_depth
<
atomic_read
(
&
xprt
->
sc_sq_count
)
+
wr_count
)
{
spin_unlock_bh
(
&
xprt
->
sc_lock
);
atomic_inc
(
&
rdma_stat_sq_starve
);
...
...
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
return
0
;
continue
;
}
/* Bumped used SQ WR count and post */
svc_xprt_get
(
&
xprt
->
sc_xprt
);
/* Take a transport ref for each WR posted */
for
(
i
=
0
;
i
<
wr_count
;
i
++
)
svc_xprt_get
(
&
xprt
->
sc_xprt
);
/* Bump used SQ WR count and post */
atomic_add
(
wr_count
,
&
xprt
->
sc_sq_count
);
ret
=
ib_post_send
(
xprt
->
sc_qp
,
wr
,
&
bad_wr
);
if
(
!
ret
)
atomic_inc
(
&
xprt
->
sc_sq_count
);
else
{
svc_xprt_put
(
&
xprt
->
sc_xprt
);
if
(
ret
)
{
set_bit
(
XPT_CLOSE
,
&
xprt
->
sc_xprt
.
xpt_flags
);
atomic_sub
(
wr_count
,
&
xprt
->
sc_sq_count
);
for
(
i
=
0
;
i
<
wr_count
;
i
++
)
svc_xprt_put
(
&
xprt
->
sc_xprt
);
dprintk
(
"svcrdma: failed to post SQ WR rc=%d, "
"sc_sq_count=%d, sc_sq_depth=%d
\n
"
,
ret
,
atomic_read
(
&
xprt
->
sc_sq_count
),
xprt
->
sc_sq_depth
);
}
spin_unlock_bh
(
&
xprt
->
sc_lock
);
if
(
ret
)
wake_up
(
&
xprt
->
sc_send_wait
);
break
;
}
return
ret
;
...
...
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
length
=
svc_rdma_xdr_encode_error
(
xprt
,
rmsgp
,
err
,
va
);
/* Prepare SGE for local address */
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
.
addr
=
ib_dma_map_page
(
xprt
->
sc_cm_id
->
device
,
p
,
0
,
PAGE_SIZE
,
DMA_FROM_DEVICE
);
sge
.
lkey
=
xprt
->
sc_phys_mr
->
lkey
;
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
sge
.
addr
))
{
put_page
(
p
);
return
;
}
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
.
lkey
=
xprt
->
sc_dma_lkey
;
sge
.
length
=
length
;
ctxt
=
svc_rdma_get_context
(
xprt
);
...
...
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
if
(
ret
)
{
dprintk
(
"svcrdma: Error %d posting send for protocol error
\n
"
,
ret
);
ib_dma_unmap_page
(
xprt
->
sc_cm_id
->
device
,
sge
.
addr
,
PAGE_SIZE
,
DMA_FROM_DEVICE
);
svc_rdma_put_context
(
ctxt
,
1
);
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录