/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License, version 2, as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2010 Paul Mackerras, IBM Corp. * Copyright 2011 David Gibson, IBM Corporation */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) static unsigned long kvmppc_tce_pages(unsigned long window_size) { return ALIGN((window_size >> SPAPR_TCE_SHIFT) * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; } static unsigned long kvmppc_stt_pages(unsigned long tce_pages) { unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + (tce_pages * sizeof(struct page *)); return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; } static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) { long ret = 0; if (!current || !current->mm) return ret; /* process exited */ down_write(¤t->mm->mmap_sem); if (inc) { unsigned long locked, lock_limit; locked = current->mm->locked_vm + stt_pages; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) ret = -ENOMEM; else current->mm->locked_vm += stt_pages; } else { if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) stt_pages = current->mm->locked_vm; current->mm->locked_vm -= stt_pages; } pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, inc ? '+' : '-', stt_pages << PAGE_SHIFT, current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK), ret ? " - exceeded" : ""); up_write(¤t->mm->mmap_sem); return ret; } static void release_spapr_tce_table(struct rcu_head *head) { struct kvmppc_spapr_tce_table *stt = container_of(head, struct kvmppc_spapr_tce_table, rcu); int i; unsigned long npages = kvmppc_tce_pages(stt->window_size); for (i = 0; i < npages; i++) __free_page(stt->pages[i]); kfree(stt); } static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; struct page *page; if (vmf->pgoff >= kvmppc_tce_pages(stt->window_size)) return VM_FAULT_SIGBUS; page = stt->pages[vmf->pgoff]; get_page(page); vmf->page = page; return 0; } static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { .fault = kvm_spapr_tce_fault, }; static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) { vma->vm_ops = &kvm_spapr_tce_vm_ops; return 0; } static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; list_del_rcu(&stt->list); kvm_put_kvm(stt->kvm); kvmppc_account_memlimit( kvmppc_stt_pages(kvmppc_tce_pages(stt->window_size)), false); call_rcu(&stt->rcu, release_spapr_tce_table); return 0; } static const struct file_operations kvm_spapr_tce_fops = { .mmap = kvm_spapr_tce_mmap, .release = kvm_spapr_tce_release, }; long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args) { struct kvmppc_spapr_tce_table *stt = NULL; unsigned long npages; int ret = -ENOMEM; int i; /* Check this LIOBN hasn't been previously allocated */ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { if (stt->liobn == args->liobn) return -EBUSY; } npages = kvmppc_tce_pages(args->window_size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); if (ret) { stt = NULL; goto fail; } stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) goto fail; stt->liobn = args->liobn; stt->window_size = args->window_size; stt->kvm = kvm; for (i = 0; i < npages; i++) { stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!stt->pages[i]) goto fail; } kvm_get_kvm(kvm); mutex_lock(&kvm->lock); list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); mutex_unlock(&kvm->lock); return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, stt, O_RDWR | O_CLOEXEC); fail: if (stt) { for (i = 0; i < npages; i++) if (stt->pages[i]) __free_page(stt->pages[i]); kfree(stt); } return ret; }