提交 2a8a9867 编写于 作者: M Mauricio Faria de Oliveira 提交者: Benjamin LaHaise

fs: aio: fix the increment of aio-nr and counting against aio-max-nr

Currently, aio-nr is incremented in steps of 'num_possible_cpus() * 8'
for io_setup(nr_events, ..) with 'nr_events < num_possible_cpus() * 4':

    ioctx_alloc()
    ...
        nr_events = max(nr_events, num_possible_cpus() * 4);
        nr_events *= 2;
    ...
        ctx->max_reqs = nr_events;
    ...
        aio_nr += ctx->max_reqs;
    ....

This limits the number of aio contexts actually available to much less
than aio-max-nr, and is increasingly worse with greater number of CPUs.

For example, with 64 CPUs, only 256 aio contexts are actually available
(with aio-max-nr = 65536) because the increment is 512 in that scenario.

Note: 65536 [max aio contexts] / (64*4*2) [increment per aio context]
is 128, but make it 256 (double) as counting against 'aio-max-nr * 2':

    ioctx_alloc()
    ...
        if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
        ...
            goto err_ctx;
    ...

This patch uses the original value of nr_events (from userspace) to
increment aio-nr and count against aio-max-nr, which resolves those.
Signed-off-by: NMauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Reported-by: NLekshmi C. Pillai <lekshmi.cpillai@in.ibm.com>
Tested-by: NLekshmi C. Pillai <lekshmi.cpillai@in.ibm.com>
Tested-by: NPaul Nguyen <nguyenp@us.ibm.com>
Reviewed-by: NJeff Moyer <jmoyer@redhat.com>
Signed-off-by: NBenjamin LaHaise <bcrl@kvack.org>
上级 569dbb88
...@@ -441,10 +441,9 @@ static const struct address_space_operations aio_ctx_aops = { ...@@ -441,10 +441,9 @@ static const struct address_space_operations aio_ctx_aops = {
#endif #endif
}; };
static int aio_setup_ring(struct kioctx *ctx) static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
{ {
struct aio_ring *ring; struct aio_ring *ring;
unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
unsigned long size, unused; unsigned long size, unused;
int nr_pages; int nr_pages;
...@@ -706,6 +705,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -706,6 +705,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
struct kioctx *ctx; struct kioctx *ctx;
int err = -ENOMEM; int err = -ENOMEM;
/*
* Store the original nr_events -- what userspace passed to io_setup(),
* for counting against the global limit -- before it changes.
*/
unsigned int max_reqs = nr_events;
/* /*
* We keep track of the number of available ringbuffer slots, to prevent * We keep track of the number of available ringbuffer slots, to prevent
* overflow (reqs_available), and we also use percpu counters for this. * overflow (reqs_available), and we also use percpu counters for this.
...@@ -724,14 +729,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -724,14 +729,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
if (!nr_events || (unsigned long)nr_events > (aio_max_nr * 2UL)) if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
if (!ctx) if (!ctx)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ctx->max_reqs = nr_events; ctx->max_reqs = max_reqs;
spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock); spin_lock_init(&ctx->completion_lock);
...@@ -753,7 +758,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -753,7 +758,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (!ctx->cpu) if (!ctx->cpu)
goto err; goto err;
err = aio_setup_ring(ctx); err = aio_setup_ring(ctx, nr_events);
if (err < 0) if (err < 0)
goto err; goto err;
...@@ -764,8 +769,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -764,8 +769,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
/* limit the number of system wide aios */ /* limit the number of system wide aios */
spin_lock(&aio_nr_lock); spin_lock(&aio_nr_lock);
if (aio_nr + nr_events > (aio_max_nr * 2UL) || if (aio_nr + ctx->max_reqs > aio_max_nr ||
aio_nr + nr_events < aio_nr) { aio_nr + ctx->max_reqs < aio_nr) {
spin_unlock(&aio_nr_lock); spin_unlock(&aio_nr_lock);
err = -EAGAIN; err = -EAGAIN;
goto err_ctx; goto err_ctx;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册