提交 05bd375b 编写于 作者: L Linus Torvalds

Merge tag 'for-5.5/io_uring-post-20191128' of git://git.kernel.dk/linux-block

Pull more io_uring updates from Jens Axboe:
 "As mentioned in the first pull request, there was a later batch as
  well. This contains fixes to the stuff that already went in, cleanups,
  and a few later additions. In particular, this contains:

   - Cleanups/fixes/unification of the submission and completion path
     (Pavel,me)

   - Linked timeouts improvements (Pavel,me)

   - Error path fixes (me)

   - Fix lookup window where cancellations wouldn't work (me)

   - Improve DRAIN support (Pavel)

   - Fix backlog flushing -EBUSY on submit (me)

   - Add support for connect(2) (me)

   - Fix for non-iter based fixed IO (Pavel)

   - creds inheritance for async workers (me)

   - Disable cmsg/ancillary data for sendmsg/recvmsg (me)

   - Shrink io_kiocb to 3 cachelines (me)

   - NUMA fix for io-wq (Jann)"

* tag 'for-5.5/io_uring-post-20191128' of git://git.kernel.dk/linux-block: (42 commits)
  io_uring: make poll->wait dynamically allocated
  io-wq: shrink io_wq_work a bit
  io-wq: fix handling of NUMA node IDs
  io_uring: use kzalloc instead of kcalloc for single-element allocations
  io_uring: cleanup io_import_fixed()
  io_uring: inline struct sqe_submit
  io_uring: store timeout's sqe->off in proper place
  net: disallow ancillary data for __sys_{send,recv}msg_file()
  net: separate out the msghdr copy from ___sys_{send,recv}msg()
  io_uring: remove superfluous check for sqe->off in io_accept()
  io_uring: async workers should inherit the user creds
  io-wq: have io_wq_create() take a 'data' argument
  io_uring: fix dead-hung for non-iter fixed rw
  io_uring: add support for IORING_OP_CONNECT
  net: add __sys_connect_file() helper
  io_uring: only return -EBUSY for submit on non-flushed backlog
  io_uring: only !null ptr to io_issue_sqe()
  io_uring: simplify io_req_link_next()
  io_uring: pass only !null to io_req_find_next()
  io_uring: remove io_free_req_find_next()
  ...
...@@ -33,6 +33,7 @@ enum { ...@@ -33,6 +33,7 @@ enum {
enum { enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */ IO_WQ_BIT_EXIT = 0, /* wq exiting */
IO_WQ_BIT_CANCEL = 1, /* cancel work on list */ IO_WQ_BIT_CANCEL = 1, /* cancel work on list */
IO_WQ_BIT_ERROR = 2, /* error on setup */
}; };
enum { enum {
...@@ -56,6 +57,7 @@ struct io_worker { ...@@ -56,6 +57,7 @@ struct io_worker {
struct rcu_head rcu; struct rcu_head rcu;
struct mm_struct *mm; struct mm_struct *mm;
const struct cred *creds;
struct files_struct *restore_files; struct files_struct *restore_files;
}; };
...@@ -82,7 +84,7 @@ enum { ...@@ -82,7 +84,7 @@ enum {
struct io_wqe { struct io_wqe {
struct { struct {
spinlock_t lock; spinlock_t lock;
struct list_head work_list; struct io_wq_work_list work_list;
unsigned long hash_map; unsigned long hash_map;
unsigned flags; unsigned flags;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
...@@ -103,13 +105,13 @@ struct io_wqe { ...@@ -103,13 +105,13 @@ struct io_wqe {
struct io_wq { struct io_wq {
struct io_wqe **wqes; struct io_wqe **wqes;
unsigned long state; unsigned long state;
unsigned nr_wqes;
get_work_fn *get_work; get_work_fn *get_work;
put_work_fn *put_work; put_work_fn *put_work;
struct task_struct *manager; struct task_struct *manager;
struct user_struct *user; struct user_struct *user;
struct cred *creds;
struct mm_struct *mm; struct mm_struct *mm;
refcount_t refs; refcount_t refs;
struct completion done; struct completion done;
...@@ -135,6 +137,11 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) ...@@ -135,6 +137,11 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
{ {
bool dropped_lock = false; bool dropped_lock = false;
if (worker->creds) {
revert_creds(worker->creds);
worker->creds = NULL;
}
if (current->files != worker->restore_files) { if (current->files != worker->restore_files) {
__acquire(&wqe->lock); __acquire(&wqe->lock);
spin_unlock_irq(&wqe->lock); spin_unlock_irq(&wqe->lock);
...@@ -229,7 +236,8 @@ static void io_worker_exit(struct io_worker *worker) ...@@ -229,7 +236,8 @@ static void io_worker_exit(struct io_worker *worker)
static inline bool io_wqe_run_queue(struct io_wqe *wqe) static inline bool io_wqe_run_queue(struct io_wqe *wqe)
__must_hold(wqe->lock) __must_hold(wqe->lock)
{ {
if (!list_empty(&wqe->work_list) && !(wqe->flags & IO_WQE_FLAG_STALLED)) if (!wq_list_empty(&wqe->work_list) &&
!(wqe->flags & IO_WQE_FLAG_STALLED))
return true; return true;
return false; return false;
} }
...@@ -327,9 +335,9 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, ...@@ -327,9 +335,9 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
* If worker is moving from bound to unbound (or vice versa), then * If worker is moving from bound to unbound (or vice versa), then
* ensure we update the running accounting. * ensure we update the running accounting.
*/ */
worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0; worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0; work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
if (worker_bound != work_bound) { if (worker_bound != work_bound) {
io_wqe_dec_running(wqe, worker); io_wqe_dec_running(wqe, worker);
if (work_bound) { if (work_bound) {
worker->flags |= IO_WORKER_F_BOUND; worker->flags |= IO_WORKER_F_BOUND;
...@@ -368,12 +376,15 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) ...@@ -368,12 +376,15 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash) static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash)
__must_hold(wqe->lock) __must_hold(wqe->lock)
{ {
struct io_wq_work_node *node, *prev;
struct io_wq_work *work; struct io_wq_work *work;
list_for_each_entry(work, &wqe->work_list, list) { wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
/* not hashed, can run anytime */ /* not hashed, can run anytime */
if (!(work->flags & IO_WQ_WORK_HASHED)) { if (!(work->flags & IO_WQ_WORK_HASHED)) {
list_del(&work->list); wq_node_del(&wqe->work_list, node, prev);
return work; return work;
} }
...@@ -381,7 +392,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash) ...@@ -381,7 +392,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash)
*hash = work->flags >> IO_WQ_HASH_SHIFT; *hash = work->flags >> IO_WQ_HASH_SHIFT;
if (!(wqe->hash_map & BIT_ULL(*hash))) { if (!(wqe->hash_map & BIT_ULL(*hash))) {
wqe->hash_map |= BIT_ULL(*hash); wqe->hash_map |= BIT_ULL(*hash);
list_del(&work->list); wq_node_del(&wqe->work_list, node, prev);
return work; return work;
} }
} }
...@@ -409,7 +420,7 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -409,7 +420,7 @@ static void io_worker_handle_work(struct io_worker *worker)
work = io_get_next_work(wqe, &hash); work = io_get_next_work(wqe, &hash);
if (work) if (work)
__io_worker_busy(wqe, worker, work); __io_worker_busy(wqe, worker, work);
else if (!list_empty(&wqe->work_list)) else if (!wq_list_empty(&wqe->work_list))
wqe->flags |= IO_WQE_FLAG_STALLED; wqe->flags |= IO_WQE_FLAG_STALLED;
spin_unlock_irq(&wqe->lock); spin_unlock_irq(&wqe->lock);
...@@ -426,6 +437,9 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -426,6 +437,9 @@ static void io_worker_handle_work(struct io_worker *worker)
worker->cur_work = work; worker->cur_work = work;
spin_unlock_irq(&worker->lock); spin_unlock_irq(&worker->lock);
if (work->flags & IO_WQ_WORK_CB)
work->func(&work);
if ((work->flags & IO_WQ_WORK_NEEDS_FILES) && if ((work->flags & IO_WQ_WORK_NEEDS_FILES) &&
current->files != work->files) { current->files != work->files) {
task_lock(current); task_lock(current);
...@@ -438,6 +452,8 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -438,6 +452,8 @@ static void io_worker_handle_work(struct io_worker *worker)
set_fs(USER_DS); set_fs(USER_DS);
worker->mm = wq->mm; worker->mm = wq->mm;
} }
if (!worker->creds)
worker->creds = override_creds(wq->creds);
if (test_bit(IO_WQ_BIT_CANCEL, &wq->state)) if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
work->flags |= IO_WQ_WORK_CANCEL; work->flags |= IO_WQ_WORK_CANCEL;
if (worker->mm) if (worker->mm)
...@@ -514,7 +530,7 @@ static int io_wqe_worker(void *data) ...@@ -514,7 +530,7 @@ static int io_wqe_worker(void *data)
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
if (!list_empty(&wqe->work_list)) if (!wq_list_empty(&wqe->work_list))
io_worker_handle_work(worker); io_worker_handle_work(worker);
else else
spin_unlock_irq(&wqe->lock); spin_unlock_irq(&wqe->lock);
...@@ -562,14 +578,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk) ...@@ -562,14 +578,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
spin_unlock_irq(&wqe->lock); spin_unlock_irq(&wqe->lock);
} }
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
{ {
struct io_wqe_acct *acct =&wqe->acct[index]; struct io_wqe_acct *acct =&wqe->acct[index];
struct io_worker *worker; struct io_worker *worker;
worker = kcalloc_node(1, sizeof(*worker), GFP_KERNEL, wqe->node); worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
if (!worker) if (!worker)
return; return false;
refcount_set(&worker->ref, 1); refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL; worker->nulls_node.pprev = NULL;
...@@ -581,7 +597,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ...@@ -581,7 +597,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
"io_wqe_worker-%d/%d", index, wqe->node); "io_wqe_worker-%d/%d", index, wqe->node);
if (IS_ERR(worker->task)) { if (IS_ERR(worker->task)) {
kfree(worker); kfree(worker);
return; return false;
} }
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
...@@ -599,6 +615,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ...@@ -599,6 +615,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
atomic_inc(&wq->user->processes); atomic_inc(&wq->user->processes);
wake_up_process(worker->task); wake_up_process(worker->task);
return true;
} }
static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
...@@ -606,9 +623,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) ...@@ -606,9 +623,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
{ {
struct io_wqe_acct *acct = &wqe->acct[index]; struct io_wqe_acct *acct = &wqe->acct[index];
/* always ensure we have one bounded worker */
if (index == IO_WQ_ACCT_BOUND && !acct->nr_workers)
return true;
/* if we have available workers or no work, no need */ /* if we have available workers or no work, no need */
if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe)) if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
return false; return false;
...@@ -621,12 +635,22 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) ...@@ -621,12 +635,22 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
static int io_wq_manager(void *data) static int io_wq_manager(void *data)
{ {
struct io_wq *wq = data; struct io_wq *wq = data;
int workers_to_create = num_possible_nodes();
int node;
while (!kthread_should_stop()) { /* create fixed workers */
int i; refcount_set(&wq->refs, workers_to_create);
for_each_node(node) {
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
goto err;
workers_to_create--;
}
complete(&wq->done);
for (i = 0; i < wq->nr_wqes; i++) { while (!kthread_should_stop()) {
struct io_wqe *wqe = wq->wqes[i]; for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
bool fork_worker[2] = { false, false }; bool fork_worker[2] = { false, false };
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
...@@ -645,6 +669,12 @@ static int io_wq_manager(void *data) ...@@ -645,6 +669,12 @@ static int io_wq_manager(void *data)
} }
return 0; return 0;
err:
set_bit(IO_WQ_BIT_ERROR, &wq->state);
set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (refcount_sub_and_test(workers_to_create, &wq->refs))
complete(&wq->done);
return 0;
} }
static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct, static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
...@@ -688,7 +718,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -688,7 +718,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
} }
spin_lock_irqsave(&wqe->lock, flags); spin_lock_irqsave(&wqe->lock, flags);
list_add_tail(&work->list, &wqe->work_list); wq_list_add_tail(&work->list, &wqe->work_list);
wqe->flags &= ~IO_WQE_FLAG_STALLED; wqe->flags &= ~IO_WQE_FLAG_STALLED;
spin_unlock_irqrestore(&wqe->lock, flags); spin_unlock_irqrestore(&wqe->lock, flags);
...@@ -750,7 +780,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe, ...@@ -750,7 +780,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
void io_wq_cancel_all(struct io_wq *wq) void io_wq_cancel_all(struct io_wq *wq)
{ {
int i; int node;
set_bit(IO_WQ_BIT_CANCEL, &wq->state); set_bit(IO_WQ_BIT_CANCEL, &wq->state);
...@@ -759,8 +789,8 @@ void io_wq_cancel_all(struct io_wq *wq) ...@@ -759,8 +789,8 @@ void io_wq_cancel_all(struct io_wq *wq)
* to a worker and the worker putting itself on the busy_list * to a worker and the worker putting itself on the busy_list
*/ */
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < wq->nr_wqes; i++) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[i]; struct io_wqe *wqe = wq->wqes[node];
io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL); io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL);
} }
...@@ -803,14 +833,17 @@ static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe, ...@@ -803,14 +833,17 @@ static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,
.cancel = cancel, .cancel = cancel,
.caller_data = cancel_data, .caller_data = cancel_data,
}; };
struct io_wq_work_node *node, *prev;
struct io_wq_work *work; struct io_wq_work *work;
unsigned long flags; unsigned long flags;
bool found = false; bool found = false;
spin_lock_irqsave(&wqe->lock, flags); spin_lock_irqsave(&wqe->lock, flags);
list_for_each_entry(work, &wqe->work_list, list) { wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
if (cancel(work, cancel_data)) { if (cancel(work, cancel_data)) {
list_del(&work->list); wq_node_del(&wqe->work_list, node, prev);
found = true; found = true;
break; break;
} }
...@@ -833,10 +866,10 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, ...@@ -833,10 +866,10 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
void *data) void *data)
{ {
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND; enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int i; int node;
for (i = 0; i < wq->nr_wqes; i++) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[i]; struct io_wqe *wqe = wq->wqes[node];
ret = io_wqe_cancel_cb_work(wqe, cancel, data); ret = io_wqe_cancel_cb_work(wqe, cancel, data);
if (ret != IO_WQ_CANCEL_NOTFOUND) if (ret != IO_WQ_CANCEL_NOTFOUND)
...@@ -868,6 +901,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) ...@@ -868,6 +901,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
struct io_wq_work *cwork) struct io_wq_work *cwork)
{ {
struct io_wq_work_node *node, *prev;
struct io_wq_work *work; struct io_wq_work *work;
unsigned long flags; unsigned long flags;
bool found = false; bool found = false;
...@@ -880,9 +914,11 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, ...@@ -880,9 +914,11 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
* no completion will be posted for it. * no completion will be posted for it.
*/ */
spin_lock_irqsave(&wqe->lock, flags); spin_lock_irqsave(&wqe->lock, flags);
list_for_each_entry(work, &wqe->work_list, list) { wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
if (work == cwork) { if (work == cwork) {
list_del(&work->list); wq_node_del(&wqe->work_list, node, prev);
found = true; found = true;
break; break;
} }
...@@ -910,10 +946,10 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, ...@@ -910,10 +946,10 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork) enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
{ {
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND; enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int i; int node;
for (i = 0; i < wq->nr_wqes; i++) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[i]; struct io_wqe *wqe = wq->wqes[node];
ret = io_wqe_cancel_work(wqe, cwork); ret = io_wqe_cancel_work(wqe, cwork);
if (ret != IO_WQ_CANCEL_NOTFOUND) if (ret != IO_WQ_CANCEL_NOTFOUND)
...@@ -944,10 +980,10 @@ static void io_wq_flush_func(struct io_wq_work **workptr) ...@@ -944,10 +980,10 @@ static void io_wq_flush_func(struct io_wq_work **workptr)
void io_wq_flush(struct io_wq *wq) void io_wq_flush(struct io_wq *wq)
{ {
struct io_wq_flush_data data; struct io_wq_flush_data data;
int i; int node;
for (i = 0; i < wq->nr_wqes; i++) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[i]; struct io_wqe *wqe = wq->wqes[node];
init_completion(&data.done); init_completion(&data.done);
INIT_IO_WORK(&data.work, io_wq_flush_func); INIT_IO_WORK(&data.work, io_wq_flush_func);
...@@ -957,43 +993,39 @@ void io_wq_flush(struct io_wq *wq) ...@@ -957,43 +993,39 @@ void io_wq_flush(struct io_wq *wq)
} }
} }
struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm, struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
struct user_struct *user, get_work_fn *get_work,
put_work_fn *put_work)
{ {
int ret = -ENOMEM, i, node; int ret = -ENOMEM, node;
struct io_wq *wq; struct io_wq *wq;
wq = kcalloc(1, sizeof(*wq), GFP_KERNEL); wq = kzalloc(sizeof(*wq), GFP_KERNEL);
if (!wq) if (!wq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
wq->nr_wqes = num_online_nodes(); wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
wq->wqes = kcalloc(wq->nr_wqes, sizeof(struct io_wqe *), GFP_KERNEL);
if (!wq->wqes) { if (!wq->wqes) {
kfree(wq); kfree(wq);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
wq->get_work = get_work; wq->get_work = data->get_work;
wq->put_work = put_work; wq->put_work = data->put_work;
/* caller must already hold a reference to this */ /* caller must already hold a reference to this */
wq->user = user; wq->user = data->user;
wq->creds = data->creds;
i = 0; for_each_node(node) {
refcount_set(&wq->refs, wq->nr_wqes);
for_each_online_node(node) {
struct io_wqe *wqe; struct io_wqe *wqe;
wqe = kcalloc_node(1, sizeof(struct io_wqe), GFP_KERNEL, node); wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node);
if (!wqe) if (!wqe)
break; goto err;
wq->wqes[i] = wqe; wq->wqes[node] = wqe;
wqe->node = node; wqe->node = node;
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0); atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
if (user) { if (wq->user) {
wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
task_rlimit(current, RLIMIT_NPROC); task_rlimit(current, RLIMIT_NPROC);
} }
...@@ -1001,33 +1033,36 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm, ...@@ -1001,33 +1033,36 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
wqe->node = node; wqe->node = node;
wqe->wq = wq; wqe->wq = wq;
spin_lock_init(&wqe->lock); spin_lock_init(&wqe->lock);
INIT_LIST_HEAD(&wqe->work_list); INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1); INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1);
INIT_LIST_HEAD(&wqe->all_list); INIT_LIST_HEAD(&wqe->all_list);
i++;
} }
init_completion(&wq->done); init_completion(&wq->done);
if (i != wq->nr_wqes)
goto err;
/* caller must have already done mmgrab() on this mm */ /* caller must have already done mmgrab() on this mm */
wq->mm = mm; wq->mm = data->mm;
wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager"); wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
if (!IS_ERR(wq->manager)) { if (!IS_ERR(wq->manager)) {
wake_up_process(wq->manager); wake_up_process(wq->manager);
wait_for_completion(&wq->done);
if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
ret = -ENOMEM;
goto err;
}
reinit_completion(&wq->done);
return wq; return wq;
} }
ret = PTR_ERR(wq->manager); ret = PTR_ERR(wq->manager);
wq->manager = NULL;
err:
complete(&wq->done); complete(&wq->done);
io_wq_destroy(wq); err:
for_each_node(node)
kfree(wq->wqes[node]);
kfree(wq->wqes);
kfree(wq);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
...@@ -1039,27 +1074,21 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data) ...@@ -1039,27 +1074,21 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
void io_wq_destroy(struct io_wq *wq) void io_wq_destroy(struct io_wq *wq)
{ {
int i; int node;
if (wq->manager) { set_bit(IO_WQ_BIT_EXIT, &wq->state);
set_bit(IO_WQ_BIT_EXIT, &wq->state); if (wq->manager)
kthread_stop(wq->manager); kthread_stop(wq->manager);
}
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < wq->nr_wqes; i++) { for_each_node(node)
struct io_wqe *wqe = wq->wqes[i]; io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
if (!wqe)
continue;
io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL);
}
rcu_read_unlock(); rcu_read_unlock();
wait_for_completion(&wq->done); wait_for_completion(&wq->done);
for (i = 0; i < wq->nr_wqes; i++) for_each_node(node)
kfree(wq->wqes[i]); kfree(wq->wqes[node]);
kfree(wq->wqes); kfree(wq->wqes);
kfree(wq); kfree(wq);
} }
...@@ -11,6 +11,7 @@ enum { ...@@ -11,6 +11,7 @@ enum {
IO_WQ_WORK_NEEDS_FILES = 16, IO_WQ_WORK_NEEDS_FILES = 16,
IO_WQ_WORK_UNBOUND = 32, IO_WQ_WORK_UNBOUND = 32,
IO_WQ_WORK_INTERNAL = 64, IO_WQ_WORK_INTERNAL = 64,
IO_WQ_WORK_CB = 128,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
}; };
...@@ -21,15 +22,60 @@ enum io_wq_cancel { ...@@ -21,15 +22,60 @@ enum io_wq_cancel {
IO_WQ_CANCEL_NOTFOUND, /* work not found */ IO_WQ_CANCEL_NOTFOUND, /* work not found */
}; };
struct io_wq_work_node {
struct io_wq_work_node *next;
};
struct io_wq_work_list {
struct io_wq_work_node *first;
struct io_wq_work_node *last;
};
static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
if (!list->first) {
list->first = list->last = node;
} else {
list->last->next = node;
list->last = node;
}
}
static inline void wq_node_del(struct io_wq_work_list *list,
struct io_wq_work_node *node,
struct io_wq_work_node *prev)
{
if (node == list->first)
list->first = node->next;
if (node == list->last)
list->last = prev;
if (prev)
prev->next = node->next;
}
#define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
#define wq_list_empty(list) ((list)->first == NULL)
#define INIT_WQ_LIST(list) do { \
(list)->first = NULL; \
(list)->last = NULL; \
} while (0)
struct io_wq_work { struct io_wq_work {
struct list_head list; union {
struct io_wq_work_node list;
void *data;
};
void (*func)(struct io_wq_work **); void (*func)(struct io_wq_work **);
unsigned flags;
struct files_struct *files; struct files_struct *files;
unsigned flags;
}; };
#define INIT_IO_WORK(work, _func) \ #define INIT_IO_WORK(work, _func) \
do { \ do { \
(work)->list.next = NULL; \
(work)->func = _func; \ (work)->func = _func; \
(work)->flags = 0; \ (work)->flags = 0; \
(work)->files = NULL; \ (work)->files = NULL; \
...@@ -38,9 +84,16 @@ struct io_wq_work { ...@@ -38,9 +84,16 @@ struct io_wq_work {
typedef void (get_work_fn)(struct io_wq_work *); typedef void (get_work_fn)(struct io_wq_work *);
typedef void (put_work_fn)(struct io_wq_work *); typedef void (put_work_fn)(struct io_wq_work *);
struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm, struct io_wq_data {
struct user_struct *user, struct mm_struct *mm;
get_work_fn *get_work, put_work_fn *put_work); struct user_struct *user;
struct cred *creds;
get_work_fn *get_work;
put_work_fn *put_work;
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_destroy(struct io_wq *wq); void io_wq_destroy(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
......
此差异已折叠。
...@@ -399,6 +399,9 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, ...@@ -399,6 +399,9 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags); int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol); extern int __sys_socket(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file,
struct sockaddr __user *uservaddr, int addrlen,
int file_flags);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen); int addrlen);
extern int __sys_listen(int fd, int backlog); extern int __sys_listen(int fd, int backlog);
......
...@@ -163,35 +163,35 @@ TRACE_EVENT(io_uring_queue_async_work, ...@@ -163,35 +163,35 @@ TRACE_EVENT(io_uring_queue_async_work,
); );
/** /**
* io_uring_defer_list - called before the io_uring work added into defer_list * io_uring_defer - called when an io_uring request is deferred
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to a deferred request * @req: pointer to a deferred request
* @shadow: whether request is shadow or not * @user_data: user data associated with the request
* *
* Allows to track deferred requests, to get an insight about what requests are * Allows to track deferred requests, to get an insight about what requests are
* not started immediately. * not started immediately.
*/ */
TRACE_EVENT(io_uring_defer, TRACE_EVENT(io_uring_defer,
TP_PROTO(void *ctx, void *req, bool shadow), TP_PROTO(void *ctx, void *req, unsigned long long user_data),
TP_ARGS(ctx, req, shadow), TP_ARGS(ctx, req, user_data),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req ) __field( void *, req )
__field( bool, shadow ) __field( unsigned long long, data )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req; __entry->req = req;
__entry->shadow = shadow; __entry->data = user_data;
), ),
TP_printk("ring %p, request %p%s", __entry->ctx, __entry->req, TP_printk("ring %p, request %p user_data %llu", __entry->ctx,
__entry->shadow ? ", shadow": "") __entry->req, __entry->data)
); );
/** /**
......
...@@ -73,6 +73,7 @@ struct io_uring_sqe { ...@@ -73,6 +73,7 @@ struct io_uring_sqe {
#define IORING_OP_ACCEPT 13 #define IORING_OP_ACCEPT 13
#define IORING_OP_ASYNC_CANCEL 14 #define IORING_OP_ASYNC_CANCEL 14
#define IORING_OP_LINK_TIMEOUT 15 #define IORING_OP_LINK_TIMEOUT 15
#define IORING_OP_CONNECT 16
/* /*
* sqe->fsync_flags * sqe->fsync_flags
......
...@@ -1825,32 +1825,46 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, ...@@ -1825,32 +1825,46 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
* include the -EINPROGRESS status for such sockets. * include the -EINPROGRESS status for such sockets.
*/ */
int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) int __sys_connect_file(struct file *file, struct sockaddr __user *uservaddr,
int addrlen, int file_flags)
{ {
struct socket *sock; struct socket *sock;
struct sockaddr_storage address; struct sockaddr_storage address;
int err, fput_needed; int err;
sock = sockfd_lookup_light(fd, &err, &fput_needed); sock = sock_from_file(file, &err);
if (!sock) if (!sock)
goto out; goto out;
err = move_addr_to_kernel(uservaddr, addrlen, &address); err = move_addr_to_kernel(uservaddr, addrlen, &address);
if (err < 0) if (err < 0)
goto out_put; goto out;
err = err =
security_socket_connect(sock, (struct sockaddr *)&address, addrlen); security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
if (err) if (err)
goto out_put; goto out;
err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
sock->file->f_flags); sock->file->f_flags | file_flags);
out_put:
fput_light(sock->file, fput_needed);
out: out:
return err; return err;
} }
int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
int ret = -EBADF;
struct fd f;
f = fdget(fd);
if (f.file) {
ret = __sys_connect_file(f.file, uservaddr, addrlen, 0);
if (f.flags)
fput(f.file);
}
return ret;
}
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
int, addrlen) int, addrlen)
{ {
...@@ -2250,15 +2264,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, ...@@ -2250,15 +2264,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
return err < 0 ? err : 0; return err < 0 ? err : 0;
} }
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
struct msghdr *msg_sys, unsigned int flags, unsigned int flags, struct used_address *used_address,
struct used_address *used_address, unsigned int allowed_msghdr_flags)
unsigned int allowed_msghdr_flags)
{ {
struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *)msg;
struct sockaddr_storage address;
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
unsigned char ctl[sizeof(struct cmsghdr) + 20] unsigned char ctl[sizeof(struct cmsghdr) + 20]
__aligned(sizeof(__kernel_size_t)); __aligned(sizeof(__kernel_size_t));
/* 20 is size of ipv6_pktinfo */ /* 20 is size of ipv6_pktinfo */
...@@ -2266,19 +2275,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2266,19 +2275,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
int ctl_len; int ctl_len;
ssize_t err; ssize_t err;
msg_sys->msg_name = &address;
if (MSG_CMSG_COMPAT & flags)
err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
else
err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
if (err < 0)
return err;
err = -ENOBUFS; err = -ENOBUFS;
if (msg_sys->msg_controllen > INT_MAX) if (msg_sys->msg_controllen > INT_MAX)
goto out_freeiov; goto out;
flags |= (msg_sys->msg_flags & allowed_msghdr_flags); flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
ctl_len = msg_sys->msg_controllen; ctl_len = msg_sys->msg_controllen;
if ((MSG_CMSG_COMPAT & flags) && ctl_len) { if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
...@@ -2286,7 +2286,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2286,7 +2286,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
sizeof(ctl)); sizeof(ctl));
if (err) if (err)
goto out_freeiov; goto out;
ctl_buf = msg_sys->msg_control; ctl_buf = msg_sys->msg_control;
ctl_len = msg_sys->msg_controllen; ctl_len = msg_sys->msg_controllen;
} else if (ctl_len) { } else if (ctl_len) {
...@@ -2295,7 +2295,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2295,7 +2295,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
if (ctl_len > sizeof(ctl)) { if (ctl_len > sizeof(ctl)) {
ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
if (ctl_buf == NULL) if (ctl_buf == NULL)
goto out_freeiov; goto out;
} }
err = -EFAULT; err = -EFAULT;
/* /*
...@@ -2341,7 +2341,47 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2341,7 +2341,47 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
out_freectl: out_freectl:
if (ctl_buf != ctl) if (ctl_buf != ctl)
sock_kfree_s(sock->sk, ctl_buf, ctl_len); sock_kfree_s(sock->sk, ctl_buf, ctl_len);
out_freeiov: out:
return err;
}
static int sendmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags,
struct iovec **iov)
{
int err;
if (flags & MSG_CMSG_COMPAT) {
struct compat_msghdr __user *msg_compat;
msg_compat = (struct compat_msghdr __user *) umsg;
err = get_compat_msghdr(msg, msg_compat, NULL, iov);
} else {
err = copy_msghdr_from_user(msg, umsg, NULL, iov);
}
if (err < 0)
return err;
return 0;
}
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address,
unsigned int allowed_msghdr_flags)
{
struct sockaddr_storage address;
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
ssize_t err;
msg_sys->msg_name = &address;
err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
if (err < 0)
return err;
err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
allowed_msghdr_flags);
kfree(iov); kfree(iov);
return err; return err;
} }
...@@ -2349,12 +2389,27 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2349,12 +2389,27 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
/* /*
* BSD sendmsg interface * BSD sendmsg interface
*/ */
long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg, long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *umsg,
unsigned int flags) unsigned int flags)
{ {
struct msghdr msg_sys; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
struct sockaddr_storage address;
struct msghdr msg = { .msg_name = &address };
ssize_t err;
err = sendmsg_copy_msghdr(&msg, umsg, flags, &iov);
if (err)
return err;
/* disallow ancillary data requests from this path */
if (msg.msg_control || msg.msg_controllen) {
err = -EINVAL;
goto out;
}
return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); err = ____sys_sendmsg(sock, &msg, flags, NULL, 0);
out:
kfree(iov);
return err;
} }
long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
...@@ -2460,33 +2515,41 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, ...@@ -2460,33 +2515,41 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
return __sys_sendmmsg(fd, mmsg, vlen, flags, true); return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
} }
static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, static int recvmsg_copy_msghdr(struct msghdr *msg,
struct msghdr *msg_sys, unsigned int flags, int nosec) struct user_msghdr __user *umsg, unsigned flags,
struct sockaddr __user **uaddr,
struct iovec **iov)
{ {
struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *)msg;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
unsigned long cmsg_ptr;
int len;
ssize_t err; ssize_t err;
/* kernel mode address */ if (MSG_CMSG_COMPAT & flags) {
struct sockaddr_storage addr; struct compat_msghdr __user *msg_compat;
/* user mode address pointers */
struct sockaddr __user *uaddr;
int __user *uaddr_len = COMPAT_NAMELEN(msg);
msg_sys->msg_name = &addr; msg_compat = (struct compat_msghdr __user *) umsg;
err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
if (MSG_CMSG_COMPAT & flags) } else {
err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov); err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
else }
err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
if (err < 0) if (err < 0)
return err; return err;
return 0;
}
static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
struct user_msghdr __user *msg,
struct sockaddr __user *uaddr,
unsigned int flags, int nosec)
{
struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *) msg;
int __user *uaddr_len = COMPAT_NAMELEN(msg);
struct sockaddr_storage addr;
unsigned long cmsg_ptr;
int len;
ssize_t err;
msg_sys->msg_name = &addr;
cmsg_ptr = (unsigned long)msg_sys->msg_control; cmsg_ptr = (unsigned long)msg_sys->msg_control;
msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
...@@ -2497,7 +2560,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2497,7 +2560,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
flags |= MSG_DONTWAIT; flags |= MSG_DONTWAIT;
err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags); err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
if (err < 0) if (err < 0)
goto out_freeiov; goto out;
len = err; len = err;
if (uaddr != NULL) { if (uaddr != NULL) {
...@@ -2505,12 +2568,12 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2505,12 +2568,12 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
msg_sys->msg_namelen, uaddr, msg_sys->msg_namelen, uaddr,
uaddr_len); uaddr_len);
if (err < 0) if (err < 0)
goto out_freeiov; goto out;
} }
err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
COMPAT_FLAGS(msg)); COMPAT_FLAGS(msg));
if (err) if (err)
goto out_freeiov; goto out;
if (MSG_CMSG_COMPAT & flags) if (MSG_CMSG_COMPAT & flags)
err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
&msg_compat->msg_controllen); &msg_compat->msg_controllen);
...@@ -2518,10 +2581,25 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2518,10 +2581,25 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
&msg->msg_controllen); &msg->msg_controllen);
if (err) if (err)
goto out_freeiov; goto out;
err = len; err = len;
out:
return err;
}
static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags, int nosec)
{
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
/* user mode address pointers */
struct sockaddr __user *uaddr;
ssize_t err;
out_freeiov: err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
if (err < 0)
return err;
err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
kfree(iov); kfree(iov);
return err; return err;
} }
...@@ -2530,12 +2608,28 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2530,12 +2608,28 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
* BSD recvmsg interface * BSD recvmsg interface
*/ */
long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg, long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *umsg,
unsigned int flags) unsigned int flags)
{ {
struct msghdr msg_sys; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
struct sockaddr_storage address;
struct msghdr msg = { .msg_name = &address };
struct sockaddr __user *uaddr;
ssize_t err;
return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); err = recvmsg_copy_msghdr(&msg, umsg, flags, &uaddr, &iov);
if (err)
return err;
/* disallow ancillary data requests from this path */
if (msg.msg_control || msg.msg_controllen) {
err = -EINVAL;
goto out;
}
err = ____sys_recvmsg(sock, &msg, umsg, uaddr, flags, 0);
out:
kfree(iov);
return err;
} }
long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册