提交 122e5b9f 编写于 作者: S Sagi Grimberg 提交者: Christoph Hellwig

nvme-tcp: optimize network stack with setting msg flags according to batch size

If we have a long list of request to send, signal the network stack
that more is coming (MSG_MORE). If we have nothing else, signal MSG_EOR.
Signed-off-by: NSagi Grimberg <sagi@grimberg.me>
Tested-by: NMark Wunderlich <mark.wunderlich@intel.com>
Signed-off-by: NChristoph Hellwig <hch@lst.de>
上级 86f0348a
...@@ -79,6 +79,7 @@ struct nvme_tcp_queue { ...@@ -79,6 +79,7 @@ struct nvme_tcp_queue {
struct mutex send_mutex; struct mutex send_mutex;
struct llist_head req_list; struct llist_head req_list;
struct list_head send_list; struct list_head send_list;
bool more_requests;
/* recv state */ /* recv state */
void *pdu; void *pdu;
...@@ -277,7 +278,9 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, ...@@ -277,7 +278,9 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
*/ */
if (queue->io_cpu == smp_processor_id() && if (queue->io_cpu == smp_processor_id() &&
sync && empty && mutex_trylock(&queue->send_mutex)) { sync && empty && mutex_trylock(&queue->send_mutex)) {
queue->more_requests = !last;
nvme_tcp_try_send(queue); nvme_tcp_try_send(queue);
queue->more_requests = false;
mutex_unlock(&queue->send_mutex); mutex_unlock(&queue->send_mutex);
} else if (last) { } else if (last) {
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
...@@ -877,6 +880,12 @@ static void nvme_tcp_state_change(struct sock *sk) ...@@ -877,6 +880,12 @@ static void nvme_tcp_state_change(struct sock *sk)
read_unlock(&sk->sk_callback_lock); read_unlock(&sk->sk_callback_lock);
} }
static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
{
return !list_empty(&queue->send_list) ||
!llist_empty(&queue->req_list) || queue->more_requests;
}
static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
{ {
queue->request = NULL; queue->request = NULL;
...@@ -898,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -898,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
bool last = nvme_tcp_pdu_last_send(req, len); bool last = nvme_tcp_pdu_last_send(req, len);
int ret, flags = MSG_DONTWAIT; int ret, flags = MSG_DONTWAIT;
if (last && !queue->data_digest) if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
flags |= MSG_EOR; flags |= MSG_EOR;
else else
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
...@@ -945,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) ...@@ -945,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
int flags = MSG_DONTWAIT; int flags = MSG_DONTWAIT;
int ret; int ret;
if (inline_data) if (inline_data || nvme_tcp_queue_more(queue))
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else else
flags |= MSG_EOR; flags |= MSG_EOR;
...@@ -1010,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) ...@@ -1010,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
int ret; int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
.iov_base = &req->ddgst + req->offset, .iov_base = &req->ddgst + req->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
}; };
if (nvme_tcp_queue_more(queue))
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
return ret; return ret;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册