提交 58c85dc2 编写于 作者: K Kent Overstreet 提交者: Linus Torvalds

aio: kill struct aio_ring_info

struct aio_ring_info was kind of odd, the only place it's used is where
it's embedded in struct kioctx - there's no real need for it.

The next patch rearranges struct kioctx and puts various things on their
own cachelines - getting rid of struct aio_ring_info now makes that
reordering a bit clearer.
Signed-off-by: NKent Overstreet <koverstreet@google.com>
Cc: Zach Brown <zab@redhat.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Asai Thambi S P <asamymuthupa@micron.com>
Cc: Selvan Mani <smani@micron.com>
Cc: Sam Bradshaw <sbradshaw@micron.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Reviewed-by: N"Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 a1c8eae7
...@@ -58,18 +58,6 @@ struct aio_ring { ...@@ -58,18 +58,6 @@ struct aio_ring {
}; /* 128 bytes + ring size */ }; /* 128 bytes + ring size */
#define AIO_RING_PAGES 8 #define AIO_RING_PAGES 8
struct aio_ring_info {
unsigned long mmap_base;
unsigned long mmap_size;
struct page **ring_pages;
struct mutex ring_lock;
long nr_pages;
unsigned nr, tail;
struct page *internal_pages[AIO_RING_PAGES];
};
struct kioctx { struct kioctx {
atomic_t users; atomic_t users;
...@@ -90,14 +78,30 @@ struct kioctx { ...@@ -90,14 +78,30 @@ struct kioctx {
* This is what userspace passed to io_setup(), it's not used for * This is what userspace passed to io_setup(), it's not used for
* anything but counting against the global max_reqs quota. * anything but counting against the global max_reqs quota.
* *
* The real limit is ring->nr - 1, which will be larger (see * The real limit is nr_events - 1, which will be larger (see
* aio_setup_ring()) * aio_setup_ring())
*/ */
unsigned max_reqs; unsigned max_reqs;
struct aio_ring_info ring_info; /* Size of ringbuffer, in units of struct io_event */
unsigned nr_events;
spinlock_t completion_lock; unsigned long mmap_base;
unsigned long mmap_size;
struct page **ring_pages;
long nr_pages;
struct {
struct mutex ring_lock;
} ____cacheline_aligned;
struct {
unsigned tail;
spinlock_t completion_lock;
} ____cacheline_aligned;
struct page *internal_pages[AIO_RING_PAGES];
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct rcu_work; struct work_struct rcu_work;
...@@ -129,26 +133,21 @@ __initcall(aio_setup); ...@@ -129,26 +133,21 @@ __initcall(aio_setup);
static void aio_free_ring(struct kioctx *ctx) static void aio_free_ring(struct kioctx *ctx)
{ {
struct aio_ring_info *info = &ctx->ring_info;
long i; long i;
for (i=0; i<info->nr_pages; i++) for (i = 0; i < ctx->nr_pages; i++)
put_page(info->ring_pages[i]); put_page(ctx->ring_pages[i]);
if (info->mmap_size) { if (ctx->mmap_size)
vm_munmap(info->mmap_base, info->mmap_size); vm_munmap(ctx->mmap_base, ctx->mmap_size);
}
if (info->ring_pages && info->ring_pages != info->internal_pages) if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
kfree(info->ring_pages); kfree(ctx->ring_pages);
info->ring_pages = NULL;
info->nr = 0;
} }
static int aio_setup_ring(struct kioctx *ctx) static int aio_setup_ring(struct kioctx *ctx)
{ {
struct aio_ring *ring; struct aio_ring *ring;
struct aio_ring_info *info = &ctx->ring_info;
unsigned nr_events = ctx->max_reqs; unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
unsigned long size, populate; unsigned long size, populate;
...@@ -166,45 +165,44 @@ static int aio_setup_ring(struct kioctx *ctx) ...@@ -166,45 +165,44 @@ static int aio_setup_ring(struct kioctx *ctx)
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
info->nr = 0; ctx->nr_events = 0;
info->ring_pages = info->internal_pages; ctx->ring_pages = ctx->internal_pages;
if (nr_pages > AIO_RING_PAGES) { if (nr_pages > AIO_RING_PAGES) {
info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
if (!info->ring_pages) GFP_KERNEL);
if (!ctx->ring_pages)
return -ENOMEM; return -ENOMEM;
} }
info->mmap_size = nr_pages * PAGE_SIZE; ctx->mmap_size = nr_pages * PAGE_SIZE;
pr_debug("attempting mmap of %lu bytes\n", info->mmap_size); pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size,
PROT_READ|PROT_WRITE, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, 0, MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate);
&populate); if (IS_ERR((void *)ctx->mmap_base)) {
if (IS_ERR((void *)info->mmap_base)) {
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
info->mmap_size = 0; ctx->mmap_size = 0;
aio_free_ring(ctx); aio_free_ring(ctx);
return -EAGAIN; return -EAGAIN;
} }
pr_debug("mmap address: 0x%08lx\n", info->mmap_base); pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
info->nr_pages = get_user_pages(current, mm, info->mmap_base, nr_pages, ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
1, 0, info->ring_pages, NULL); 1, 0, ctx->ring_pages, NULL);
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
if (unlikely(info->nr_pages != nr_pages)) { if (unlikely(ctx->nr_pages != nr_pages)) {
aio_free_ring(ctx); aio_free_ring(ctx);
return -EAGAIN; return -EAGAIN;
} }
if (populate) if (populate)
mm_populate(info->mmap_base, populate); mm_populate(ctx->mmap_base, populate);
ctx->user_id = info->mmap_base; ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
info->nr = nr_events; /* trusted copy */ ring = kmap_atomic(ctx->ring_pages[0]);
ring = kmap_atomic(info->ring_pages[0]);
ring->nr = nr_events; /* user copy */ ring->nr = nr_events; /* user copy */
ring->id = ctx->user_id; ring->id = ctx->user_id;
ring->head = ring->tail = 0; ring->head = ring->tail = 0;
...@@ -213,7 +211,7 @@ static int aio_setup_ring(struct kioctx *ctx) ...@@ -213,7 +211,7 @@ static int aio_setup_ring(struct kioctx *ctx)
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring); ring->header_length = sizeof(struct aio_ring);
kunmap_atomic(ring); kunmap_atomic(ring);
flush_dcache_page(info->ring_pages[0]); flush_dcache_page(ctx->ring_pages[0]);
return 0; return 0;
} }
...@@ -284,7 +282,6 @@ static void free_ioctx_rcu(struct rcu_head *head) ...@@ -284,7 +282,6 @@ static void free_ioctx_rcu(struct rcu_head *head)
*/ */
static void free_ioctx(struct kioctx *ctx) static void free_ioctx(struct kioctx *ctx)
{ {
struct aio_ring_info *info = &ctx->ring_info;
struct aio_ring *ring; struct aio_ring *ring;
struct io_event res; struct io_event res;
struct kiocb *req; struct kiocb *req;
...@@ -302,18 +299,18 @@ static void free_ioctx(struct kioctx *ctx) ...@@ -302,18 +299,18 @@ static void free_ioctx(struct kioctx *ctx)
spin_unlock_irq(&ctx->ctx_lock); spin_unlock_irq(&ctx->ctx_lock);
ring = kmap_atomic(info->ring_pages[0]); ring = kmap_atomic(ctx->ring_pages[0]);
head = ring->head; head = ring->head;
kunmap_atomic(ring); kunmap_atomic(ring);
while (atomic_read(&ctx->reqs_active) > 0) { while (atomic_read(&ctx->reqs_active) > 0) {
wait_event(ctx->wait, head != info->tail); wait_event(ctx->wait, head != ctx->tail);
avail = (head <= info->tail ? info->tail : info->nr) - head; avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
atomic_sub(avail, &ctx->reqs_active); atomic_sub(avail, &ctx->reqs_active);
head += avail; head += avail;
head %= info->nr; head %= ctx->nr_events;
} }
WARN_ON(atomic_read(&ctx->reqs_active) < 0); WARN_ON(atomic_read(&ctx->reqs_active) < 0);
...@@ -372,7 +369,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -372,7 +369,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
atomic_set(&ctx->dead, 0); atomic_set(&ctx->dead, 0);
spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock); spin_lock_init(&ctx->completion_lock);
mutex_init(&ctx->ring_info.ring_lock); mutex_init(&ctx->ring_lock);
init_waitqueue_head(&ctx->wait); init_waitqueue_head(&ctx->wait);
INIT_LIST_HEAD(&ctx->active_reqs); INIT_LIST_HEAD(&ctx->active_reqs);
...@@ -396,7 +393,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -396,7 +393,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
spin_unlock(&mm->ioctx_lock); spin_unlock(&mm->ioctx_lock);
pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
ctx, ctx->user_id, mm, ctx->ring_info.nr); ctx, ctx->user_id, mm, ctx->nr_events);
return ctx; return ctx;
out_cleanup: out_cleanup:
...@@ -491,7 +488,7 @@ void exit_aio(struct mm_struct *mm) ...@@ -491,7 +488,7 @@ void exit_aio(struct mm_struct *mm)
* just set it to 0; aio_free_ring() is the only * just set it to 0; aio_free_ring() is the only
* place that uses ->mmap_size, so it's safe. * place that uses ->mmap_size, so it's safe.
*/ */
ctx->ring_info.mmap_size = 0; ctx->mmap_size = 0;
if (!atomic_xchg(&ctx->dead, 1)) { if (!atomic_xchg(&ctx->dead, 1)) {
hlist_del_rcu(&ctx->list); hlist_del_rcu(&ctx->list);
...@@ -514,10 +511,10 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) ...@@ -514,10 +511,10 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
{ {
struct kiocb *req; struct kiocb *req;
if (atomic_read(&ctx->reqs_active) >= ctx->ring_info.nr) if (atomic_read(&ctx->reqs_active) >= ctx->nr_events)
return NULL; return NULL;
if (atomic_inc_return(&ctx->reqs_active) > ctx->ring_info.nr - 1) if (atomic_inc_return(&ctx->reqs_active) > ctx->nr_events - 1)
goto out_put; goto out_put;
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
...@@ -578,7 +575,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) ...@@ -578,7 +575,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
void aio_complete(struct kiocb *iocb, long res, long res2) void aio_complete(struct kiocb *iocb, long res, long res2)
{ {
struct kioctx *ctx = iocb->ki_ctx; struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring_info *info;
struct aio_ring *ring; struct aio_ring *ring;
struct io_event *ev_page, *event; struct io_event *ev_page, *event;
unsigned long flags; unsigned long flags;
...@@ -599,8 +595,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ...@@ -599,8 +595,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
return; return;
} }
info = &ctx->ring_info;
/* /*
* Take rcu_read_lock() in case the kioctx is being destroyed, as we * Take rcu_read_lock() in case the kioctx is being destroyed, as we
* need to issue a wakeup after decrementing reqs_active. * need to issue a wakeup after decrementing reqs_active.
...@@ -633,13 +627,13 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ...@@ -633,13 +627,13 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
*/ */
spin_lock_irqsave(&ctx->completion_lock, flags); spin_lock_irqsave(&ctx->completion_lock, flags);
tail = info->tail; tail = ctx->tail;
pos = tail + AIO_EVENTS_OFFSET; pos = tail + AIO_EVENTS_OFFSET;
if (++tail >= info->nr) if (++tail >= ctx->nr_events)
tail = 0; tail = 0;
ev_page = kmap_atomic(info->ring_pages[pos / AIO_EVENTS_PER_PAGE]); ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE; event = ev_page + pos % AIO_EVENTS_PER_PAGE;
event->obj = (u64)(unsigned long)iocb->ki_obj.user; event->obj = (u64)(unsigned long)iocb->ki_obj.user;
...@@ -648,7 +642,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ...@@ -648,7 +642,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
event->res2 = res2; event->res2 = res2;
kunmap_atomic(ev_page); kunmap_atomic(ev_page);
flush_dcache_page(info->ring_pages[pos / AIO_EVENTS_PER_PAGE]); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
...@@ -659,12 +653,12 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ...@@ -659,12 +653,12 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
*/ */
smp_wmb(); /* make event visible before updating tail */ smp_wmb(); /* make event visible before updating tail */
info->tail = tail; ctx->tail = tail;
ring = kmap_atomic(info->ring_pages[0]); ring = kmap_atomic(ctx->ring_pages[0]);
ring->tail = tail; ring->tail = tail;
kunmap_atomic(ring); kunmap_atomic(ring);
flush_dcache_page(info->ring_pages[0]); flush_dcache_page(ctx->ring_pages[0]);
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
...@@ -704,21 +698,20 @@ EXPORT_SYMBOL(aio_complete); ...@@ -704,21 +698,20 @@ EXPORT_SYMBOL(aio_complete);
static long aio_read_events_ring(struct kioctx *ctx, static long aio_read_events_ring(struct kioctx *ctx,
struct io_event __user *event, long nr) struct io_event __user *event, long nr)
{ {
struct aio_ring_info *info = &ctx->ring_info;
struct aio_ring *ring; struct aio_ring *ring;
unsigned head, pos; unsigned head, pos;
long ret = 0; long ret = 0;
int copy_ret; int copy_ret;
mutex_lock(&info->ring_lock); mutex_lock(&ctx->ring_lock);
ring = kmap_atomic(info->ring_pages[0]); ring = kmap_atomic(ctx->ring_pages[0]);
head = ring->head; head = ring->head;
kunmap_atomic(ring); kunmap_atomic(ring);
pr_debug("h%u t%u m%u\n", head, info->tail, info->nr); pr_debug("h%u t%u m%u\n", head, ctx->tail, ctx->nr_events);
if (head == info->tail) if (head == ctx->tail)
goto out; goto out;
while (ret < nr) { while (ret < nr) {
...@@ -726,8 +719,8 @@ static long aio_read_events_ring(struct kioctx *ctx, ...@@ -726,8 +719,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
struct io_event *ev; struct io_event *ev;
struct page *page; struct page *page;
avail = (head <= info->tail ? info->tail : info->nr) - head; avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
if (head == info->tail) if (head == ctx->tail)
break; break;
avail = min(avail, nr - ret); avail = min(avail, nr - ret);
...@@ -735,7 +728,7 @@ static long aio_read_events_ring(struct kioctx *ctx, ...@@ -735,7 +728,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
pos = head + AIO_EVENTS_OFFSET; pos = head + AIO_EVENTS_OFFSET;
page = info->ring_pages[pos / AIO_EVENTS_PER_PAGE]; page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE; pos %= AIO_EVENTS_PER_PAGE;
ev = kmap(page); ev = kmap(page);
...@@ -750,19 +743,19 @@ static long aio_read_events_ring(struct kioctx *ctx, ...@@ -750,19 +743,19 @@ static long aio_read_events_ring(struct kioctx *ctx,
ret += avail; ret += avail;
head += avail; head += avail;
head %= info->nr; head %= ctx->nr_events;
} }
ring = kmap_atomic(info->ring_pages[0]); ring = kmap_atomic(ctx->ring_pages[0]);
ring->head = head; ring->head = head;
kunmap_atomic(ring); kunmap_atomic(ring);
flush_dcache_page(info->ring_pages[0]); flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, info->tail); pr_debug("%li h%u t%u\n", ret, head, ctx->tail);
atomic_sub(ret, &ctx->reqs_active); atomic_sub(ret, &ctx->reqs_active);
out: out:
mutex_unlock(&info->ring_lock); mutex_unlock(&ctx->ring_lock);
return ret; return ret;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册