提交 79eb597c 编写于 作者: D Daniel Jordan 提交者: Linus Torvalds

mm: add account_locked_vm utility function

locked_vm accounting is done roughly the same way in five places, so
unify them in a helper.

Include the helper's caller in the debug print to distinguish between
callsites.

Error codes stay the same, so user-visible behavior does too.  The one
exception is that the -EPERM case in tce_account_locked_vm is removed
because Alexey has never seen it triggered.

[daniel.m.jordan@oracle.com: v3]
  Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com
[sfr@canb.auug.org.au: fix mm/util.c]
Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.comSigned-off-by: NDaniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: NStephen Rothwell <sfr@canb.auug.org.au>
Tested-by: NAlexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: NAlex Williamson <alex.williamson@redhat.com>
Cc: Alan Tull <atull@kernel.org>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Moritz Fischer <mdf@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Steve Sistare <steven.sistare@oracle.com>
Cc: Wu Hao <hao.wu@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 73b20c84
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/mm.h>
#include <asm/kvm_ppc.h> #include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h> #include <asm/kvm_book3s.h>
...@@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages) ...@@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
} }
static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
long ret = 0;
if (!current || !current->mm)
return ret; /* process exited */
down_write(&current->mm->mmap_sem);
if (inc) {
unsigned long locked, lock_limit;
locked = current->mm->locked_vm + stt_pages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
current->mm->locked_vm += stt_pages;
} else {
if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
stt_pages = current->mm->locked_vm;
current->mm->locked_vm -= stt_pages;
}
pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
inc ? '+' : '-',
stt_pages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
up_write(&current->mm->mmap_sem);
return ret;
}
static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
{ {
struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
...@@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) ...@@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
kvm_put_kvm(stt->kvm); kvm_put_kvm(stt->kvm);
kvmppc_account_memlimit( account_locked_vm(current->mm,
kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table); call_rcu(&stt->rcu, release_spapr_tce_table);
...@@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EINVAL; return -EINVAL;
npages = kvmppc_tce_pages(size); npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
if (ret) if (ret)
return ret; return ret;
...@@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kfree(stt); kfree(stt);
fail_acct: fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
return ret; return ret;
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/mm.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pte-walk.h> #include <asm/pte-walk.h>
#include <linux/mm_inline.h> #include <linux/mm_inline.h>
...@@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t { ...@@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t {
u64 dev_hpa; /* Device memory base address */ u64 dev_hpa; /* Device memory base address */
}; };
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
unsigned long npages, bool incr)
{
long ret = 0, locked, lock_limit;
if (!npages)
return 0;
down_write(&mm->mmap_sem);
if (incr) {
locked = mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
mm->locked_vm += npages;
} else {
if (WARN_ON_ONCE(npages > mm->locked_vm))
npages = mm->locked_vm;
mm->locked_vm -= npages;
}
pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
current ? current->pid : 0,
incr ? '+' : '-',
npages << PAGE_SHIFT,
mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK));
up_write(&mm->mmap_sem);
return ret;
}
bool mm_iommu_preregistered(struct mm_struct *mm) bool mm_iommu_preregistered(struct mm_struct *mm)
{ {
return !list_empty(&mm->context.iommu_group_mem_list); return !list_empty(&mm->context.iommu_group_mem_list);
...@@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, ...@@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entry, chunk; unsigned long entry, chunk;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
ret = mm_iommu_adjust_locked_vm(mm, entries, true); ret = account_locked_vm(mm, entries, true);
if (ret) if (ret)
return ret; return ret;
...@@ -211,7 +178,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, ...@@ -211,7 +178,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
kfree(mem); kfree(mem);
unlock_exit: unlock_exit:
mm_iommu_adjust_locked_vm(mm, locked_entries, false); account_locked_vm(mm, locked_entries, false);
return ret; return ret;
} }
...@@ -311,7 +278,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) ...@@ -311,7 +278,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
unlock_exit: unlock_exit:
mutex_unlock(&mem_list_mutex); mutex_unlock(&mem_list_mutex);
mm_iommu_adjust_locked_vm(mm, unlock_entries, false); account_locked_vm(mm, unlock_entries, false);
return ret; return ret;
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/mm.h>
#include "dfl-afu.h" #include "dfl-afu.h"
...@@ -31,52 +32,6 @@ void afu_dma_region_init(struct dfl_feature_platform_data *pdata) ...@@ -31,52 +32,6 @@ void afu_dma_region_init(struct dfl_feature_platform_data *pdata)
afu->dma_regions = RB_ROOT; afu->dma_regions = RB_ROOT;
} }
/**
* afu_dma_adjust_locked_vm - adjust locked memory
* @dev: port device
* @npages: number of pages
* @incr: increase or decrease locked memory
*
* Increase or decrease the locked memory size with npages input.
*
* Return 0 on success.
* Return -ENOMEM if locked memory size is over the limit and no CAP_IPC_LOCK.
*/
static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr)
{
unsigned long locked, lock_limit;
int ret = 0;
/* the task is exiting. */
if (!current->mm)
return 0;
down_write(&current->mm->mmap_sem);
if (incr) {
locked = current->mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
current->mm->locked_vm += npages;
} else {
if (WARN_ON_ONCE(npages > current->mm->locked_vm))
npages = current->mm->locked_vm;
current->mm->locked_vm -= npages;
}
dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid,
incr ? '+' : '-', npages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK),
ret ? "- exceeded" : "");
up_write(&current->mm->mmap_sem);
return ret;
}
/** /**
* afu_dma_pin_pages - pin pages of given dma memory region * afu_dma_pin_pages - pin pages of given dma memory region
* @pdata: feature device platform data * @pdata: feature device platform data
...@@ -92,7 +47,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata, ...@@ -92,7 +47,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
struct device *dev = &pdata->dev->dev; struct device *dev = &pdata->dev->dev;
int ret, pinned; int ret, pinned;
ret = afu_dma_adjust_locked_vm(dev, npages, true); ret = account_locked_vm(current->mm, npages, true);
if (ret) if (ret)
return ret; return ret;
...@@ -121,7 +76,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata, ...@@ -121,7 +76,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
free_pages: free_pages:
kfree(region->pages); kfree(region->pages);
unlock_vm: unlock_vm:
afu_dma_adjust_locked_vm(dev, npages, false); account_locked_vm(current->mm, npages, false);
return ret; return ret;
} }
...@@ -141,7 +96,7 @@ static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata, ...@@ -141,7 +96,7 @@ static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata,
put_all_pages(region->pages, npages); put_all_pages(region->pages, npages);
kfree(region->pages); kfree(region->pages);
afu_dma_adjust_locked_vm(dev, npages, false); account_locked_vm(current->mm, npages, false);
dev_dbg(dev, "%ld pages unpinned\n", npages); dev_dbg(dev, "%ld pages unpinned\n", npages);
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/mm.h>
#include <asm/iommu.h> #include <asm/iommu.h>
#include <asm/tce.h> #include <asm/tce.h>
...@@ -31,51 +32,6 @@ ...@@ -31,51 +32,6 @@
static void tce_iommu_detach_group(void *iommu_data, static void tce_iommu_detach_group(void *iommu_data,
struct iommu_group *iommu_group); struct iommu_group *iommu_group);
static long try_increment_locked_vm(struct mm_struct *mm, long npages)
{
long ret = 0, locked, lock_limit;
if (WARN_ON_ONCE(!mm))
return -EPERM;
if (!npages)
return 0;
down_write(&mm->mmap_sem);
locked = mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
mm->locked_vm += npages;
pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
npages << PAGE_SHIFT,
mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
up_write(&mm->mmap_sem);
return ret;
}
static void decrement_locked_vm(struct mm_struct *mm, long npages)
{
if (!mm || !npages)
return;
down_write(&mm->mmap_sem);
if (WARN_ON_ONCE(npages > mm->locked_vm))
npages = mm->locked_vm;
mm->locked_vm -= npages;
pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
npages << PAGE_SHIFT,
mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK));
up_write(&mm->mmap_sem);
}
/* /*
* VFIO IOMMU fd for SPAPR_TCE IOMMU implementation * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
* *
...@@ -333,7 +289,7 @@ static int tce_iommu_enable(struct tce_container *container) ...@@ -333,7 +289,7 @@ static int tce_iommu_enable(struct tce_container *container)
return ret; return ret;
locked = table_group->tce32_size >> PAGE_SHIFT; locked = table_group->tce32_size >> PAGE_SHIFT;
ret = try_increment_locked_vm(container->mm, locked); ret = account_locked_vm(container->mm, locked, true);
if (ret) if (ret)
return ret; return ret;
...@@ -352,7 +308,7 @@ static void tce_iommu_disable(struct tce_container *container) ...@@ -352,7 +308,7 @@ static void tce_iommu_disable(struct tce_container *container)
container->enabled = false; container->enabled = false;
BUG_ON(!container->mm); BUG_ON(!container->mm);
decrement_locked_vm(container->mm, container->locked_pages); account_locked_vm(container->mm, container->locked_pages, false);
} }
static void *tce_iommu_open(unsigned long arg) static void *tce_iommu_open(unsigned long arg)
...@@ -656,7 +612,7 @@ static long tce_iommu_create_table(struct tce_container *container, ...@@ -656,7 +612,7 @@ static long tce_iommu_create_table(struct tce_container *container,
if (!table_size) if (!table_size)
return -EINVAL; return -EINVAL;
ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT); ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
if (ret) if (ret)
return ret; return ret;
...@@ -675,7 +631,7 @@ static void tce_iommu_free_table(struct tce_container *container, ...@@ -675,7 +631,7 @@ static void tce_iommu_free_table(struct tce_container *container,
unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
iommu_tce_table_put(tbl); iommu_tce_table_put(tbl);
decrement_locked_vm(container->mm, pages); account_locked_vm(container->mm, pages, false);
} }
static long tce_iommu_create_window(struct tce_container *container, static long tce_iommu_create_window(struct tce_container *container,
......
...@@ -272,21 +272,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) ...@@ -272,21 +272,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
ret = down_write_killable(&mm->mmap_sem); ret = down_write_killable(&mm->mmap_sem);
if (!ret) { if (!ret) {
if (npage > 0) { ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
if (!dma->lock_cap) { dma->lock_cap);
unsigned long limit;
limit = task_rlimit(dma->task,
RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (mm->locked_vm + npage > limit)
ret = -ENOMEM;
}
}
if (!ret)
mm->locked_vm += npage;
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
} }
......
...@@ -1543,6 +1543,10 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, ...@@ -1543,6 +1543,10 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
int get_user_pages_fast(unsigned long start, int nr_pages, int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages); unsigned int gup_flags, struct page **pages);
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
struct task_struct *task, bool bypass_rlim);
/* Container for pinned pfns / pages */ /* Container for pinned pfns / pages */
struct frame_vector { struct frame_vector {
unsigned int nr_allocated; /* Number of frames we have space for */ unsigned int nr_allocated; /* Number of frames we have space for */
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/err.h> #include <linux/err.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/swap.h> #include <linux/swap.h>
...@@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) ...@@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
} }
#endif #endif
/**
* __account_locked_vm - account locked pages to an mm's locked_vm
* @mm: mm to account against
* @pages: number of pages to account
* @inc: %true if @pages should be considered positive, %false if not
* @task: task used to check RLIMIT_MEMLOCK
* @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
*
* Assumes @task and @mm are valid (i.e. at least one reference on each), and
* that mmap_sem is held as writer.
*
* Return:
* * 0 on success
* * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
*/
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
struct task_struct *task, bool bypass_rlim)
{
unsigned long locked_vm, limit;
int ret = 0;
lockdep_assert_held_write(&mm->mmap_sem);
locked_vm = mm->locked_vm;
if (inc) {
if (!bypass_rlim) {
limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked_vm + pages > limit)
ret = -ENOMEM;
}
if (!ret)
mm->locked_vm = locked_vm + pages;
} else {
WARN_ON_ONCE(pages > locked_vm);
mm->locked_vm = locked_vm - pages;
}
pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid,
(void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT,
locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
return ret;
}
EXPORT_SYMBOL_GPL(__account_locked_vm);
/**
* account_locked_vm - account locked pages to an mm's locked_vm
* @mm: mm to account against, may be NULL
* @pages: number of pages to account
* @inc: %true if @pages should be considered positive, %false if not
*
* Assumes a non-NULL @mm is valid (i.e. at least one reference on it).
*
* Return:
* * 0 on success, or if mm is NULL
* * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
*/
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
{
int ret;
if (pages == 0 || !mm)
return 0;
down_write(&mm->mmap_sem);
ret = __account_locked_vm(mm, pages, inc, current,
capable(CAP_IPC_LOCK));
up_write(&mm->mmap_sem);
return ret;
}
EXPORT_SYMBOL_GPL(account_locked_vm);
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long len, unsigned long prot,
unsigned long flag, unsigned long pgoff) unsigned long flag, unsigned long pgoff)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册