提交 af472a9e 编写于 作者: L Linus Torvalds

Merge tag 'for-5.18/io_uring-2022-03-18' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:

 - Fixes for current file position. Still doesn't have the f_pos_lock
   sorted, but it's a step in the right direction (Dylan)

 - Tracing updates (Dylan, Stefan)

 - Improvements to io-wq locking (Hao)

 - Improvements for provided buffers (me, Pavel)

 - Support for registered file descriptors (me, Xiaoguang)

 - Support for ring messages (me)

 - Poll improvements (me)

 - Fix for fixed buffers and non-iterator reads/writes (me)

 - Support for NAPI on sockets (Olivier)

 - Ring quiesce improvements (Usama)

 - Misc fixes (Olivier, Pavel)

* tag 'for-5.18/io_uring-2022-03-18' of git://git.kernel.dk/linux-block: (42 commits)
  io_uring: terminate manual loop iterator loop correctly for non-vecs
  io_uring: don't check unrelated req->open.how in accept request
  io_uring: manage provided buffers strictly ordered
  io_uring: fold evfd signalling under a slower path
  io_uring: thin down io_commit_cqring()
  io_uring: shuffle io_eventfd_signal() bits around
  io_uring: remove extra barrier for non-sqpoll iopoll
  io_uring: fix provided buffer return on failure for kiocb_done()
  io_uring: extend provided buf return to fails
  io_uring: refactor timeout cancellation cqe posting
  io_uring: normilise naming for fill_cqe*
  io_uring: cache poll/double-poll state with a request flag
  io_uring: cache req->apoll->events in req->cflags
  io_uring: move req->poll_refs into previous struct hole
  io_uring: make tracing format consistent
  io_uring: recycle apoll_poll entries
  io_uring: remove duplicated member check for io_msg_ring_prep()
  io_uring: allow submissions to continue on error
  io_uring: recycle provided buffers if request goes async
  io_uring: ensure reads re-import for selected buffers
  ...
...@@ -76,6 +76,7 @@ struct io_wqe_acct { ...@@ -76,6 +76,7 @@ struct io_wqe_acct {
unsigned max_workers; unsigned max_workers;
int index; int index;
atomic_t nr_running; atomic_t nr_running;
raw_spinlock_t lock;
struct io_wq_work_list work_list; struct io_wq_work_list work_list;
unsigned long flags; unsigned long flags;
}; };
...@@ -91,7 +92,7 @@ enum { ...@@ -91,7 +92,7 @@ enum {
*/ */
struct io_wqe { struct io_wqe {
raw_spinlock_t lock; raw_spinlock_t lock;
struct io_wqe_acct acct[2]; struct io_wqe_acct acct[IO_WQ_ACCT_NR];
int node; int node;
...@@ -224,12 +225,12 @@ static void io_worker_exit(struct io_worker *worker) ...@@ -224,12 +225,12 @@ static void io_worker_exit(struct io_worker *worker)
if (worker->flags & IO_WORKER_F_FREE) if (worker->flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node); hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list); list_del_rcu(&worker->all_list);
preempt_disable(); raw_spin_unlock(&wqe->lock);
io_wqe_dec_running(worker); io_wqe_dec_running(worker);
worker->flags = 0; worker->flags = 0;
preempt_disable();
current->flags &= ~PF_IO_WORKER; current->flags &= ~PF_IO_WORKER;
preempt_enable(); preempt_enable();
raw_spin_unlock(&wqe->lock);
kfree_rcu(worker, rcu); kfree_rcu(worker, rcu);
io_worker_ref_put(wqe->wq); io_worker_ref_put(wqe->wq);
...@@ -238,10 +239,15 @@ static void io_worker_exit(struct io_worker *worker) ...@@ -238,10 +239,15 @@ static void io_worker_exit(struct io_worker *worker)
static inline bool io_acct_run_queue(struct io_wqe_acct *acct) static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
{ {
bool ret = false;
raw_spin_lock(&acct->lock);
if (!wq_list_empty(&acct->work_list) && if (!wq_list_empty(&acct->work_list) &&
!test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
return true; ret = true;
return false; raw_spin_unlock(&acct->lock);
return ret;
} }
/* /*
...@@ -385,7 +391,6 @@ static bool io_queue_worker_create(struct io_worker *worker, ...@@ -385,7 +391,6 @@ static bool io_queue_worker_create(struct io_worker *worker,
} }
static void io_wqe_dec_running(struct io_worker *worker) static void io_wqe_dec_running(struct io_worker *worker)
__must_hold(wqe->lock)
{ {
struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe; struct io_wqe *wqe = worker->wqe;
...@@ -393,13 +398,14 @@ static void io_wqe_dec_running(struct io_worker *worker) ...@@ -393,13 +398,14 @@ static void io_wqe_dec_running(struct io_worker *worker)
if (!(worker->flags & IO_WORKER_F_UP)) if (!(worker->flags & IO_WORKER_F_UP))
return; return;
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { if (!atomic_dec_and_test(&acct->nr_running))
atomic_inc(&acct->nr_running); return;
atomic_inc(&wqe->wq->worker_refs); if (!io_acct_run_queue(acct))
raw_spin_unlock(&wqe->lock); return;
io_queue_worker_create(worker, acct, create_worker_cb);
raw_spin_lock(&wqe->lock); atomic_inc(&acct->nr_running);
} atomic_inc(&wqe->wq->worker_refs);
io_queue_worker_create(worker, acct, create_worker_cb);
} }
/* /*
...@@ -407,11 +413,12 @@ static void io_wqe_dec_running(struct io_worker *worker) ...@@ -407,11 +413,12 @@ static void io_wqe_dec_running(struct io_worker *worker)
* it's currently on the freelist * it's currently on the freelist
*/ */
static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker) static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
__must_hold(wqe->lock)
{ {
if (worker->flags & IO_WORKER_F_FREE) { if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE; worker->flags &= ~IO_WORKER_F_FREE;
raw_spin_lock(&wqe->lock);
hlist_nulls_del_init_rcu(&worker->nulls_node); hlist_nulls_del_init_rcu(&worker->nulls_node);
raw_spin_unlock(&wqe->lock);
} }
} }
...@@ -456,7 +463,7 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) ...@@ -456,7 +463,7 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
struct io_worker *worker) struct io_worker *worker)
__must_hold(wqe->lock) __must_hold(acct->lock)
{ {
struct io_wq_work_node *node, *prev; struct io_wq_work_node *node, *prev;
struct io_wq_work *work, *tail; struct io_wq_work *work, *tail;
...@@ -498,9 +505,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, ...@@ -498,9 +505,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
* work being added and clearing the stalled bit. * work being added and clearing the stalled bit.
*/ */
set_bit(IO_ACCT_STALLED_BIT, &acct->flags); set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&acct->lock);
unstalled = io_wait_on_hash(wqe, stall_hash); unstalled = io_wait_on_hash(wqe, stall_hash);
raw_spin_lock(&wqe->lock); raw_spin_lock(&acct->lock);
if (unstalled) { if (unstalled) {
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
if (wq_has_sleeper(&wqe->wq->hash->wait)) if (wq_has_sleeper(&wqe->wq->hash->wait))
...@@ -538,7 +545,6 @@ static void io_assign_current_work(struct io_worker *worker, ...@@ -538,7 +545,6 @@ static void io_assign_current_work(struct io_worker *worker,
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
static void io_worker_handle_work(struct io_worker *worker) static void io_worker_handle_work(struct io_worker *worker)
__releases(wqe->lock)
{ {
struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe; struct io_wqe *wqe = worker->wqe;
...@@ -555,7 +561,9 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -555,7 +561,9 @@ static void io_worker_handle_work(struct io_worker *worker)
* can't make progress, any work completion or insertion will * can't make progress, any work completion or insertion will
* clear the stalled flag. * clear the stalled flag.
*/ */
raw_spin_lock(&acct->lock);
work = io_get_next_work(acct, worker); work = io_get_next_work(acct, worker);
raw_spin_unlock(&acct->lock);
if (work) { if (work) {
__io_worker_busy(wqe, worker); __io_worker_busy(wqe, worker);
...@@ -569,10 +577,9 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -569,10 +577,9 @@ static void io_worker_handle_work(struct io_worker *worker)
raw_spin_lock(&worker->lock); raw_spin_lock(&worker->lock);
worker->next_work = work; worker->next_work = work;
raw_spin_unlock(&worker->lock); raw_spin_unlock(&worker->lock);
} } else {
raw_spin_unlock(&wqe->lock);
if (!work)
break; break;
}
io_assign_current_work(worker, work); io_assign_current_work(worker, work);
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -608,8 +615,6 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -608,8 +615,6 @@ static void io_worker_handle_work(struct io_worker *worker)
wake_up(&wq->hash->wait); wake_up(&wq->hash->wait);
} }
} while (work); } while (work);
raw_spin_lock(&wqe->lock);
} while (1); } while (1);
} }
...@@ -633,12 +638,10 @@ static int io_wqe_worker(void *data) ...@@ -633,12 +638,10 @@ static int io_wqe_worker(void *data)
long ret; long ret;
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
loop: while (io_acct_run_queue(acct))
raw_spin_lock(&wqe->lock);
if (io_acct_run_queue(acct)) {
io_worker_handle_work(worker); io_worker_handle_work(worker);
goto loop;
} raw_spin_lock(&wqe->lock);
/* timed out, exit unless we're the last worker */ /* timed out, exit unless we're the last worker */
if (last_timeout && acct->nr_workers > 1) { if (last_timeout && acct->nr_workers > 1) {
acct->nr_workers--; acct->nr_workers--;
...@@ -662,10 +665,8 @@ static int io_wqe_worker(void *data) ...@@ -662,10 +665,8 @@ static int io_wqe_worker(void *data)
last_timeout = !ret; last_timeout = !ret;
} }
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
raw_spin_lock(&wqe->lock);
io_worker_handle_work(worker); io_worker_handle_work(worker);
}
audit_free(current); audit_free(current);
io_worker_exit(worker); io_worker_exit(worker);
...@@ -705,10 +706,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk) ...@@ -705,10 +706,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
return; return;
worker->flags &= ~IO_WORKER_F_RUNNING; worker->flags &= ~IO_WORKER_F_RUNNING;
raw_spin_lock(&worker->wqe->lock);
io_wqe_dec_running(worker); io_wqe_dec_running(worker);
raw_spin_unlock(&worker->wqe->lock);
} }
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
...@@ -778,10 +776,12 @@ static void create_worker_cont(struct callback_head *cb) ...@@ -778,10 +776,12 @@ static void create_worker_cont(struct callback_head *cb)
.cancel_all = true, .cancel_all = true,
}; };
raw_spin_unlock(&wqe->lock);
while (io_acct_cancel_pending_work(wqe, acct, &match)) while (io_acct_cancel_pending_work(wqe, acct, &match))
raw_spin_lock(&wqe->lock); ;
} else {
raw_spin_unlock(&wqe->lock);
} }
raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wqe->wq); io_worker_ref_put(wqe->wq);
kfree(worker); kfree(worker);
return; return;
...@@ -914,6 +914,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) ...@@ -914,6 +914,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{ {
struct io_wqe_acct *acct = io_work_get_acct(wqe, work); struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
struct io_cb_cancel_data match;
unsigned work_flags = work->flags; unsigned work_flags = work->flags;
bool do_create; bool do_create;
...@@ -927,10 +928,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -927,10 +928,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return; return;
} }
raw_spin_lock(&wqe->lock); raw_spin_lock(&acct->lock);
io_wqe_insert_work(wqe, work); io_wqe_insert_work(wqe, work);
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
raw_spin_unlock(&acct->lock);
raw_spin_lock(&wqe->lock);
rcu_read_lock(); rcu_read_lock();
do_create = !io_wqe_activate_free_worker(wqe, acct); do_create = !io_wqe_activate_free_worker(wqe, acct);
rcu_read_unlock(); rcu_read_unlock();
...@@ -946,18 +949,18 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -946,18 +949,18 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return; return;
raw_spin_lock(&wqe->lock); raw_spin_lock(&wqe->lock);
/* fatal condition, failed to create the first worker */ if (acct->nr_workers) {
if (!acct->nr_workers) { raw_spin_unlock(&wqe->lock);
struct io_cb_cancel_data match = { return;
.fn = io_wq_work_match_item,
.data = work,
.cancel_all = false,
};
if (io_acct_cancel_pending_work(wqe, acct, &match))
raw_spin_lock(&wqe->lock);
} }
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&wqe->lock);
/* fatal condition, failed to create the first worker */
match.fn = io_wq_work_match_item,
match.data = work,
match.cancel_all = false,
io_acct_cancel_pending_work(wqe, acct, &match);
} }
} }
...@@ -1032,22 +1035,23 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe, ...@@ -1032,22 +1035,23 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
static bool io_acct_cancel_pending_work(struct io_wqe *wqe, static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct, struct io_wqe_acct *acct,
struct io_cb_cancel_data *match) struct io_cb_cancel_data *match)
__releases(wqe->lock)
{ {
struct io_wq_work_node *node, *prev; struct io_wq_work_node *node, *prev;
struct io_wq_work *work; struct io_wq_work *work;
raw_spin_lock(&acct->lock);
wq_list_for_each(node, prev, &acct->work_list) { wq_list_for_each(node, prev, &acct->work_list) {
work = container_of(node, struct io_wq_work, list); work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data)) if (!match->fn(work, match->data))
continue; continue;
io_wqe_remove_pending(wqe, work, prev); io_wqe_remove_pending(wqe, work, prev);
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&acct->lock);
io_run_cancel(work, wqe); io_run_cancel(work, wqe);
match->nr_pending++; match->nr_pending++;
/* not safe to continue after unlock */ /* not safe to continue after unlock */
return true; return true;
} }
raw_spin_unlock(&acct->lock);
return false; return false;
} }
...@@ -1061,7 +1065,6 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe, ...@@ -1061,7 +1065,6 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) { if (io_acct_cancel_pending_work(wqe, acct, match)) {
raw_spin_lock(&wqe->lock);
if (match->cancel_all) if (match->cancel_all)
goto retry; goto retry;
break; break;
...@@ -1103,13 +1106,11 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, ...@@ -1103,13 +1106,11 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node]; struct io_wqe *wqe = wq->wqes[node];
raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match); io_wqe_cancel_pending_work(wqe, &match);
if (match.nr_pending && !match.cancel_all) { if (match.nr_pending && !match.cancel_all)
raw_spin_unlock(&wqe->lock);
return IO_WQ_CANCEL_OK; return IO_WQ_CANCEL_OK;
}
raw_spin_lock(&wqe->lock);
io_wqe_cancel_running_work(wqe, &match); io_wqe_cancel_running_work(wqe, &match);
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all) if (match.nr_running && !match.cancel_all)
...@@ -1190,6 +1191,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1190,6 +1191,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
acct->index = i; acct->index = i;
atomic_set(&acct->nr_running, 0); atomic_set(&acct->nr_running, 0);
INIT_WQ_LIST(&acct->work_list); INIT_WQ_LIST(&acct->work_list);
raw_spin_lock_init(&acct->lock);
} }
wqe->wq = wq; wqe->wq = wq;
raw_spin_lock_init(&wqe->lock); raw_spin_lock_init(&wqe->lock);
...@@ -1282,9 +1284,7 @@ static void io_wq_destroy(struct io_wq *wq) ...@@ -1282,9 +1284,7 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all, .fn = io_wq_work_match_all,
.cancel_all = true, .cancel_all = true,
}; };
raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match); io_wqe_cancel_pending_work(wqe, &match);
raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask); free_cpumask_var(wqe->cpu_mask);
kfree(wqe); kfree(wqe);
} }
...@@ -1376,7 +1376,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) ...@@ -1376,7 +1376,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2);
for (i = 0; i < 2; i++) { for (i = 0; i < IO_WQ_ACCT_NR; i++) {
if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
new_count[i] = task_rlimit(current, RLIMIT_NPROC); new_count[i] = task_rlimit(current, RLIMIT_NPROC);
} }
......
此差异已折叠。
...@@ -9,11 +9,14 @@ ...@@ -9,11 +9,14 @@
struct sock *io_uring_get_socket(struct file *file); struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all); void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk); void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
static inline void io_uring_files_cancel(void) static inline void io_uring_files_cancel(void)
{ {
if (current->io_uring) if (current->io_uring) {
io_uring_unreg_ringfd();
__io_uring_cancel(false); __io_uring_cancel(false);
}
} }
static inline void io_uring_task_cancel(void) static inline void io_uring_task_cancel(void)
{ {
......
...@@ -29,22 +29,22 @@ TRACE_EVENT(io_uring_create, ...@@ -29,22 +29,22 @@ TRACE_EVENT(io_uring_create,
TP_ARGS(fd, ctx, sq_entries, cq_entries, flags), TP_ARGS(fd, ctx, sq_entries, cq_entries, flags),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( int, fd ) __field( int, fd )
__field( void *, ctx ) __field( void *, ctx )
__field( u32, sq_entries ) __field( u32, sq_entries )
__field( u32, cq_entries ) __field( u32, cq_entries )
__field( u32, flags ) __field( u32, flags )
), ),
TP_fast_assign( TP_fast_assign(
__entry->fd = fd; __entry->fd = fd;
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->sq_entries = sq_entries; __entry->sq_entries = sq_entries;
__entry->cq_entries = cq_entries; __entry->cq_entries = cq_entries;
__entry->flags = flags; __entry->flags = flags;
), ),
TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d", TP_printk("ring %p, fd %d sq size %d, cq size %d, flags 0x%x",
__entry->ctx, __entry->fd, __entry->sq_entries, __entry->ctx, __entry->fd, __entry->sq_entries,
__entry->cq_entries, __entry->flags) __entry->cq_entries, __entry->flags)
); );
...@@ -57,10 +57,9 @@ TRACE_EVENT(io_uring_create, ...@@ -57,10 +57,9 @@ TRACE_EVENT(io_uring_create,
* @opcode: describes which operation to perform * @opcode: describes which operation to perform
* @nr_user_files: number of registered files * @nr_user_files: number of registered files
* @nr_user_bufs: number of registered buffers * @nr_user_bufs: number of registered buffers
* @cq_ev_fd: whether eventfs registered or not
* @ret: return code * @ret: return code
* *
* Allows to trace fixed files/buffers/eventfds, that could be registered to * Allows to trace fixed files/buffers, that could be registered to
* avoid an overhead of getting references to them for every operation. This * avoid an overhead of getting references to them for every operation. This
* event, together with io_uring_file_get, can provide a full picture of how * event, together with io_uring_file_get, can provide a full picture of how
* much overhead one can reduce via fixing. * much overhead one can reduce via fixing.
...@@ -68,17 +67,16 @@ TRACE_EVENT(io_uring_create, ...@@ -68,17 +67,16 @@ TRACE_EVENT(io_uring_create,
TRACE_EVENT(io_uring_register, TRACE_EVENT(io_uring_register,
TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files, TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files,
unsigned nr_bufs, bool eventfd, long ret), unsigned nr_bufs, long ret),
TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret), TP_ARGS(ctx, opcode, nr_files, nr_bufs, ret),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( unsigned, opcode ) __field( unsigned, opcode )
__field( unsigned, nr_files ) __field( unsigned, nr_files)
__field( unsigned, nr_bufs ) __field( unsigned, nr_bufs )
__field( bool, eventfd ) __field( long, ret )
__field( long, ret )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -86,20 +84,21 @@ TRACE_EVENT(io_uring_register, ...@@ -86,20 +84,21 @@ TRACE_EVENT(io_uring_register,
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->nr_files = nr_files; __entry->nr_files = nr_files;
__entry->nr_bufs = nr_bufs; __entry->nr_bufs = nr_bufs;
__entry->eventfd = eventfd;
__entry->ret = ret; __entry->ret = ret;
), ),
TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, " TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, "
"eventfd %d, ret %ld", "ret %ld",
__entry->ctx, __entry->opcode, __entry->nr_files, __entry->ctx, __entry->opcode, __entry->nr_files,
__entry->nr_bufs, __entry->eventfd, __entry->ret) __entry->nr_bufs, __entry->ret)
); );
/** /**
* io_uring_file_get - called before getting references to an SQE file * io_uring_file_get - called before getting references to an SQE file
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to a submitted request
* @user_data: user data associated with the request
* @fd: SQE file descriptor * @fd: SQE file descriptor
* *
* Allows to trace out how often an SQE file reference is obtained, which can * Allows to trace out how often an SQE file reference is obtained, which can
...@@ -108,59 +107,71 @@ TRACE_EVENT(io_uring_register, ...@@ -108,59 +107,71 @@ TRACE_EVENT(io_uring_register,
*/ */
TRACE_EVENT(io_uring_file_get, TRACE_EVENT(io_uring_file_get,
TP_PROTO(void *ctx, int fd), TP_PROTO(void *ctx, void *req, unsigned long long user_data, int fd),
TP_ARGS(ctx, fd), TP_ARGS(ctx, req, user_data, fd),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( int, fd ) __field( void *, req )
__field( u64, user_data )
__field( int, fd )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req;
__entry->user_data = user_data;
__entry->fd = fd; __entry->fd = fd;
), ),
TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd) TP_printk("ring %p, req %p, user_data 0x%llx, fd %d",
__entry->ctx, __entry->req, __entry->user_data, __entry->fd)
); );
/** /**
* io_uring_queue_async_work - called before submitting a new async work * io_uring_queue_async_work - called before submitting a new async work
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @hashed: type of workqueue, hashed or normal
* @req: pointer to a submitted request * @req: pointer to a submitted request
* @user_data: user data associated with the request
* @opcode: opcode of request
* @flags request flags
* @work: pointer to a submitted io_wq_work * @work: pointer to a submitted io_wq_work
* @rw: type of workqueue, hashed or normal
* *
* Allows to trace asynchronous work submission. * Allows to trace asynchronous work submission.
*/ */
TRACE_EVENT(io_uring_queue_async_work, TRACE_EVENT(io_uring_queue_async_work,
TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work, TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode,
unsigned int flags), unsigned int flags, struct io_wq_work *work, int rw),
TP_ARGS(ctx, rw, req, work, flags), TP_ARGS(ctx, req, user_data, flags, opcode, work, rw),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( int, rw ) __field( void *, req )
__field( void *, req ) __field( u64, user_data )
__field( struct io_wq_work *, work ) __field( u8, opcode )
__field( unsigned int, flags ) __field( unsigned int, flags )
__field( struct io_wq_work *, work )
__field( int, rw )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->rw = rw; __entry->req = req;
__entry->req = req; __entry->user_data = user_data;
__entry->work = work; __entry->flags = flags;
__entry->flags = flags; __entry->opcode = opcode;
__entry->work = work;
__entry->rw = rw;
), ),
TP_printk("ring %p, request %p, flags %d, %s queue, work %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
__entry->ctx, __entry->req, __entry->flags, __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
__entry->rw ? "hashed" : "normal", __entry->work) __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
); );
/** /**
...@@ -169,30 +180,33 @@ TRACE_EVENT(io_uring_queue_async_work, ...@@ -169,30 +180,33 @@ TRACE_EVENT(io_uring_queue_async_work,
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to a deferred request * @req: pointer to a deferred request
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @opcode: opcode of request
* *
* Allows to track deferred requests, to get an insight about what requests are * Allows to track deferred requests, to get an insight about what requests are
* not started immediately. * not started immediately.
*/ */
TRACE_EVENT(io_uring_defer, TRACE_EVENT(io_uring_defer,
TP_PROTO(void *ctx, void *req, unsigned long long user_data), TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode),
TP_ARGS(ctx, req, user_data), TP_ARGS(ctx, req, user_data, opcode),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req ) __field( void *, req )
__field( unsigned long long, data ) __field( unsigned long long, data )
__field( u8, opcode )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req; __entry->req = req;
__entry->data = user_data; __entry->data = user_data;
__entry->opcode = opcode;
), ),
TP_printk("ring %p, request %p user_data %llu", __entry->ctx, TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
__entry->req, __entry->data) __entry->ctx, __entry->req, __entry->data, __entry->opcode)
); );
/** /**
...@@ -250,7 +264,7 @@ TRACE_EVENT(io_uring_cqring_wait, ...@@ -250,7 +264,7 @@ TRACE_EVENT(io_uring_cqring_wait,
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->min_events = min_events; __entry->min_events = min_events;
), ),
...@@ -260,7 +274,10 @@ TRACE_EVENT(io_uring_cqring_wait, ...@@ -260,7 +274,10 @@ TRACE_EVENT(io_uring_cqring_wait,
/** /**
* io_uring_fail_link - called before failing a linked request * io_uring_fail_link - called before failing a linked request
* *
* @ctx: pointer to a ring context structure
* @req: request, which links were cancelled * @req: request, which links were cancelled
* @user_data: user data associated with the request
* @opcode: opcode of request
* @link: cancelled link * @link: cancelled link
* *
* Allows to track linked requests cancellation, to see not only that some work * Allows to track linked requests cancellation, to see not only that some work
...@@ -268,27 +285,36 @@ TRACE_EVENT(io_uring_cqring_wait, ...@@ -268,27 +285,36 @@ TRACE_EVENT(io_uring_cqring_wait,
*/ */
TRACE_EVENT(io_uring_fail_link, TRACE_EVENT(io_uring_fail_link,
TP_PROTO(void *req, void *link), TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, void *link),
TP_ARGS(req, link), TP_ARGS(ctx, req, user_data, opcode, link),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, req ) __field( void *, ctx )
__field( void *, link ) __field( void *, req )
__field( unsigned long long, user_data )
__field( u8, opcode )
__field( void *, link )
), ),
TP_fast_assign( TP_fast_assign(
__entry->req = req; __entry->ctx = ctx;
__entry->link = link; __entry->req = req;
__entry->user_data = user_data;
__entry->opcode = opcode;
__entry->link = link;
), ),
TP_printk("request %p, link %p", __entry->req, __entry->link) TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
__entry->link)
); );
/** /**
* io_uring_complete - called when completing an SQE * io_uring_complete - called when completing an SQE
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to a submitted request
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @res: result of the request * @res: result of the request
* @cflags: completion flags * @cflags: completion flags
...@@ -296,12 +322,13 @@ TRACE_EVENT(io_uring_fail_link, ...@@ -296,12 +322,13 @@ TRACE_EVENT(io_uring_fail_link,
*/ */
TRACE_EVENT(io_uring_complete, TRACE_EVENT(io_uring_complete,
TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags), TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
TP_ARGS(ctx, user_data, res, cflags), TP_ARGS(ctx, req, user_data, res, cflags),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req )
__field( u64, user_data ) __field( u64, user_data )
__field( int, res ) __field( int, res )
__field( unsigned, cflags ) __field( unsigned, cflags )
...@@ -309,14 +336,16 @@ TRACE_EVENT(io_uring_complete, ...@@ -309,14 +336,16 @@ TRACE_EVENT(io_uring_complete,
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->res = res; __entry->res = res;
__entry->cflags = cflags; __entry->cflags = cflags;
), ),
TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x", TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
__entry->ctx, (unsigned long long)__entry->user_data, __entry->ctx, __entry->req,
__entry->res, __entry->cflags) __entry->user_data,
__entry->res, __entry->cflags)
); );
/** /**
...@@ -324,8 +353,8 @@ TRACE_EVENT(io_uring_complete, ...@@ -324,8 +353,8 @@ TRACE_EVENT(io_uring_complete,
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to a submitted request * @req: pointer to a submitted request
* @opcode: opcode of request
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @opcode: opcode of request
* @flags request flags * @flags request flags
* @force_nonblock: whether a context blocking or not * @force_nonblock: whether a context blocking or not
* @sq_thread: true if sq_thread has submitted this SQE * @sq_thread: true if sq_thread has submitted this SQE
...@@ -335,34 +364,34 @@ TRACE_EVENT(io_uring_complete, ...@@ -335,34 +364,34 @@ TRACE_EVENT(io_uring_complete,
*/ */
TRACE_EVENT(io_uring_submit_sqe, TRACE_EVENT(io_uring_submit_sqe,
TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags, TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, u32 flags,
bool force_nonblock, bool sq_thread), bool force_nonblock, bool sq_thread),
TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread), TP_ARGS(ctx, req, user_data, opcode, flags, force_nonblock, sq_thread),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req ) __field( void *, req )
__field( u8, opcode ) __field( unsigned long long, user_data )
__field( u64, user_data ) __field( u8, opcode )
__field( u32, flags ) __field( u32, flags )
__field( bool, force_nonblock ) __field( bool, force_nonblock )
__field( bool, sq_thread ) __field( bool, sq_thread )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req; __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->opcode = opcode;
__entry->flags = flags; __entry->flags = flags;
__entry->force_nonblock = force_nonblock; __entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread; __entry->sq_thread = sq_thread;
), ),
TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, " TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req, "non block %d, sq_thread %d", __entry->ctx, __entry->req,
__entry->opcode, (unsigned long long)__entry->user_data, __entry->user_data, __entry->opcode,
__entry->flags, __entry->force_nonblock, __entry->sq_thread) __entry->flags, __entry->force_nonblock, __entry->sq_thread)
); );
...@@ -371,8 +400,8 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -371,8 +400,8 @@ TRACE_EVENT(io_uring_submit_sqe,
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to the armed request * @req: pointer to the armed request
* @opcode: opcode of request
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @opcode: opcode of request
* @mask: request poll events mask * @mask: request poll events mask
* @events: registered events of interest * @events: registered events of interest
* *
...@@ -381,155 +410,110 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -381,155 +410,110 @@ TRACE_EVENT(io_uring_submit_sqe,
*/ */
TRACE_EVENT(io_uring_poll_arm, TRACE_EVENT(io_uring_poll_arm,
TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, TP_PROTO(void *ctx, void *req, u64 user_data, u8 opcode,
int mask, int events), int mask, int events),
TP_ARGS(ctx, req, opcode, user_data, mask, events), TP_ARGS(ctx, req, user_data, opcode, mask, events),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req ) __field( void *, req )
__field( u8, opcode ) __field( unsigned long long, user_data )
__field( u64, user_data ) __field( u8, opcode )
__field( int, mask ) __field( int, mask )
__field( int, events ) __field( int, events )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req; __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->opcode = opcode;
__entry->mask = mask; __entry->mask = mask;
__entry->events = events; __entry->events = events;
), ),
TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
__entry->ctx, __entry->req, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
(unsigned long long) __entry->user_data,
__entry->mask, __entry->events) __entry->mask, __entry->events)
); );
TRACE_EVENT(io_uring_poll_wake,
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
TP_ARGS(ctx, opcode, user_data, mask),
TP_STRUCT__entry (
__field( void *, ctx )
__field( u8, opcode )
__field( u64, user_data )
__field( int, mask )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->opcode = opcode;
__entry->user_data = user_data;
__entry->mask = mask;
),
TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
__entry->ctx, __entry->opcode,
(unsigned long long) __entry->user_data,
__entry->mask)
);
TRACE_EVENT(io_uring_task_add,
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
TP_ARGS(ctx, opcode, user_data, mask),
TP_STRUCT__entry (
__field( void *, ctx )
__field( u8, opcode )
__field( u64, user_data )
__field( int, mask )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->opcode = opcode;
__entry->user_data = user_data;
__entry->mask = mask;
),
TP_printk("ring %p, op %d, data 0x%llx, mask %x",
__entry->ctx, __entry->opcode,
(unsigned long long) __entry->user_data,
__entry->mask)
);
/* /*
* io_uring_task_run - called when task_work_run() executes the poll events * io_uring_task_add - called after adding a task
* notification callbacks
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to the armed request * @req: pointer to request
* @opcode: opcode of request
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @opcode: opcode of request
* @mask: request poll events mask
* *
* Allows to track when notified poll events are processed
*/ */
TRACE_EVENT(io_uring_task_run, TRACE_EVENT(io_uring_task_add,
TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data), TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, int mask),
TP_ARGS(ctx, req, opcode, user_data), TP_ARGS(ctx, req, user_data, opcode, mask),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req ) __field( void *, req )
__field( u8, opcode ) __field( unsigned long long, user_data )
__field( u64, user_data ) __field( u8, opcode )
__field( int, mask )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req; __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->opcode = opcode;
__entry->mask = mask;
), ),
TP_printk("ring %p, req %p, op %d, data 0x%llx", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
__entry->ctx, __entry->req, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
(unsigned long long) __entry->user_data) __entry->mask)
); );
/* /*
* io_uring_req_failed - called when an sqe is errored dring submission * io_uring_req_failed - called when an sqe is errored dring submission
* *
* @sqe: pointer to the io_uring_sqe that failed * @sqe: pointer to the io_uring_sqe that failed
* @ctx: pointer to a ring context structure
* @req: pointer to request
* @error: error it failed with * @error: error it failed with
* *
* Allows easier diagnosing of malformed requests in production systems. * Allows easier diagnosing of malformed requests in production systems.
*/ */
TRACE_EVENT(io_uring_req_failed, TRACE_EVENT(io_uring_req_failed,
TP_PROTO(const struct io_uring_sqe *sqe, int error), TP_PROTO(const struct io_uring_sqe *sqe, void *ctx, void *req, int error),
TP_ARGS(sqe, error), TP_ARGS(sqe, ctx, req, error),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( u8, opcode ) __field( void *, ctx )
__field( u8, flags ) __field( void *, req )
__field( u8, ioprio ) __field( unsigned long long, user_data )
__field( u64, off ) __field( u8, opcode )
__field( u64, addr ) __field( u8, flags )
__field( u32, len ) __field( u8, ioprio )
__field( u32, op_flags ) __field( u64, off )
__field( u64, user_data ) __field( u64, addr )
__field( u16, buf_index ) __field( u32, len )
__field( u16, personality ) __field( u32, op_flags )
__field( u32, file_index ) __field( u16, buf_index )
__field( u64, pad1 ) __field( u16, personality )
__field( u64, pad2 ) __field( u32, file_index )
__field( int, error ) __field( u64, pad1 )
__field( u64, pad2 )
__field( int, error )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx;
__entry->req = req;
__entry->user_data = sqe->user_data;
__entry->opcode = sqe->opcode; __entry->opcode = sqe->opcode;
__entry->flags = sqe->flags; __entry->flags = sqe->flags;
__entry->ioprio = sqe->ioprio; __entry->ioprio = sqe->ioprio;
...@@ -537,7 +521,6 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -537,7 +521,6 @@ TRACE_EVENT(io_uring_req_failed,
__entry->addr = sqe->addr; __entry->addr = sqe->addr;
__entry->len = sqe->len; __entry->len = sqe->len;
__entry->op_flags = sqe->rw_flags; __entry->op_flags = sqe->rw_flags;
__entry->user_data = sqe->user_data;
__entry->buf_index = sqe->buf_index; __entry->buf_index = sqe->buf_index;
__entry->personality = sqe->personality; __entry->personality = sqe->personality;
__entry->file_index = sqe->file_index; __entry->file_index = sqe->file_index;
...@@ -546,13 +529,15 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -546,13 +529,15 @@ TRACE_EVENT(io_uring_req_failed,
__entry->error = error; __entry->error = error;
), ),
TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, " TP_printk("ring %p, req %p, user_data 0x%llx, "
"len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, " "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
"len=%u, rw_flags=0x%x, buf_index=%d, "
"personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
__entry->ctx, __entry->req, __entry->user_data,
__entry->opcode, __entry->flags, __entry->ioprio, __entry->opcode, __entry->flags, __entry->ioprio,
(unsigned long long)__entry->off, (unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len, (unsigned long long) __entry->addr, __entry->len,
__entry->op_flags, (unsigned long long) __entry->user_data, __entry->op_flags,
__entry->buf_index, __entry->personality, __entry->file_index, __entry->buf_index, __entry->personality, __entry->file_index,
(unsigned long long) __entry->pad1, (unsigned long long) __entry->pad1,
(unsigned long long) __entry->pad2, __entry->error) (unsigned long long) __entry->pad2, __entry->error)
......
...@@ -101,6 +101,7 @@ enum { ...@@ -101,6 +101,7 @@ enum {
#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
enum { enum {
IORING_OP_NOP, IORING_OP_NOP,
...@@ -143,6 +144,7 @@ enum { ...@@ -143,6 +144,7 @@ enum {
IORING_OP_MKDIRAT, IORING_OP_MKDIRAT,
IORING_OP_SYMLINKAT, IORING_OP_SYMLINKAT,
IORING_OP_LINKAT, IORING_OP_LINKAT,
IORING_OP_MSG_RING,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
...@@ -199,9 +201,11 @@ struct io_uring_cqe { ...@@ -199,9 +201,11 @@ struct io_uring_cqe {
* *
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
* IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING
*/ */
#define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_MSG (1U << 2)
enum { enum {
IORING_CQE_BUFFER_SHIFT = 16, IORING_CQE_BUFFER_SHIFT = 16,
...@@ -257,10 +261,11 @@ struct io_cqring_offsets { ...@@ -257,10 +261,11 @@ struct io_cqring_offsets {
/* /*
* io_uring_enter(2) flags * io_uring_enter(2) flags
*/ */
#define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_SQ_WAIT (1U << 2)
#define IORING_ENTER_EXT_ARG (1U << 3) #define IORING_ENTER_EXT_ARG (1U << 3)
#define IORING_ENTER_REGISTERED_RING (1U << 4)
/* /*
* Passed in for io_uring_setup(2). Copied back with updated info on success * Passed in for io_uring_setup(2). Copied back with updated info on success
...@@ -325,6 +330,10 @@ enum { ...@@ -325,6 +330,10 @@ enum {
/* set/get max number of io-wq workers */ /* set/get max number of io-wq workers */
IORING_REGISTER_IOWQ_MAX_WORKERS = 19, IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
/* register/unregister io_uring fd with the ring */
IORING_REGISTER_RING_FDS = 20,
IORING_UNREGISTER_RING_FDS = 21,
/* this goes last */ /* this goes last */
IORING_REGISTER_LAST IORING_REGISTER_LAST
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册