userfaultfd.c 17.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8
/*
 *  mm/userfaultfd.c
 *
 *  Copyright (C) 2015  Red Hat, Inc.
 */

#include <linux/mm.h>
9
#include <linux/sched/signal.h>
10 11 12 13 14 15
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/userfaultfd_k.h>
#include <linux/mmu_notifier.h>
16
#include <linux/hugetlb.h>
17
#include <linux/shmem_fs.h>
18 19 20
#include <asm/tlbflush.h>
#include "internal.h"

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
static __always_inline
struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
				    unsigned long dst_start,
				    unsigned long len)
{
	/*
	 * Make sure that the dst range is both valid and fully within a
	 * single existing vma.
	 */
	struct vm_area_struct *dst_vma;

	dst_vma = find_vma(dst_mm, dst_start);
	if (!dst_vma)
		return NULL;

	if (dst_start < dst_vma->vm_start ||
	    dst_start + len > dst_vma->vm_end)
		return NULL;

	/*
	 * Check the vma is registered in uffd, this is required to
	 * enforce the VM_MAYWRITE check done at uffd registration
	 * time.
	 */
	if (!dst_vma->vm_userfaultfd_ctx.ctx)
		return NULL;

	return dst_vma;
}

51 52 53
/*
 * Install PTEs, to map dst_addr (within dst_vma) to page.
 *
54 55
 * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
 * and anon, and for both shared and private VMAs.
56
 */
57 58 59 60
int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
			     struct vm_area_struct *dst_vma,
			     unsigned long dst_addr, struct page *page,
			     bool newly_allocated, bool wp_copy)
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
{
	int ret;
	pte_t _dst_pte, *dst_pte;
	bool writable = dst_vma->vm_flags & VM_WRITE;
	bool vm_shared = dst_vma->vm_flags & VM_SHARED;
	bool page_in_cache = page->mapping;
	spinlock_t *ptl;
	struct inode *inode;
	pgoff_t offset, max_off;

	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
	if (page_in_cache && !vm_shared)
		writable = false;
	if (writable || !page_in_cache)
		_dst_pte = pte_mkdirty(_dst_pte);
	if (writable) {
		if (wp_copy)
			_dst_pte = pte_mkuffd_wp(_dst_pte);
		else
			_dst_pte = pte_mkwrite(_dst_pte);
	}

	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);

	if (vma_is_shmem(dst_vma)) {
		/* serialize against truncate with the page table lock */
		inode = dst_vma->vm_file->f_inode;
		offset = linear_page_index(dst_vma, dst_addr);
		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
		ret = -EFAULT;
		if (unlikely(offset >= max_off))
			goto out_unlock;
	}

	ret = -EEXIST;
	if (!pte_none(*dst_pte))
		goto out_unlock;

	if (page_in_cache)
		page_add_file_rmap(page, false);
	else
		page_add_new_anon_rmap(page, dst_vma, dst_addr, false);

	/*
	 * Must happen after rmap, as mm_counter() checks mapping (via
	 * PageAnon()), which is set by __page_set_anon_rmap().
	 */
	inc_mm_counter(dst_mm, mm_counter(page));

	if (newly_allocated)
		lru_cache_add_inactive_or_unevictable(page, dst_vma);

	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(dst_vma, dst_addr, dst_pte);
	ret = 0;
out_unlock:
	pte_unmap_unlock(dst_pte, ptl);
	return ret;
}

123 124 125 126
static int mcopy_atomic_pte(struct mm_struct *dst_mm,
			    pmd_t *dst_pmd,
			    struct vm_area_struct *dst_vma,
			    unsigned long dst_addr,
127
			    unsigned long src_addr,
128 129
			    struct page **pagep,
			    bool wp_copy)
130 131 132
{
	void *page_kaddr;
	int ret;
133
	struct page *page;
134

135 136 137 138 139 140 141 142 143 144 145 146
	if (!*pagep) {
		ret = -ENOMEM;
		page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
		if (!page)
			goto out;

		page_kaddr = kmap_atomic(page);
		ret = copy_from_user(page_kaddr,
				     (const void __user *) src_addr,
				     PAGE_SIZE);
		kunmap_atomic(page_kaddr);

147
		/* fallback to copy_from_user outside mmap_lock */
148
		if (unlikely(ret)) {
149
			ret = -ENOENT;
150 151 152 153 154 155 156 157
			*pagep = page;
			/* don't free the page */
			goto out;
		}
	} else {
		page = *pagep;
		*pagep = NULL;
	}
158 159 160

	/*
	 * The memory barrier inside __SetPageUptodate makes sure that
161
	 * preceding stores to the page contents become visible before
162 163 164 165 166
	 * the set_pte_at() write.
	 */
	__SetPageUptodate(page);

	ret = -ENOMEM;
167
	if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
168 169
		goto out_release;

170 171 172 173
	ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
				       page, true, wp_copy);
	if (ret)
		goto out_release;
174 175 176
out:
	return ret;
out_release:
177
	put_page(page);
178 179 180 181 182 183 184 185 186 187 188
	goto out;
}

static int mfill_zeropage_pte(struct mm_struct *dst_mm,
			      pmd_t *dst_pmd,
			      struct vm_area_struct *dst_vma,
			      unsigned long dst_addr)
{
	pte_t _dst_pte, *dst_pte;
	spinlock_t *ptl;
	int ret;
189 190
	pgoff_t offset, max_off;
	struct inode *inode;
191 192 193 194

	_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
					 dst_vma->vm_page_prot));
	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
195 196 197 198 199 200 201 202 203 204
	if (dst_vma->vm_file) {
		/* the shmem MAP_PRIVATE case requires checking the i_size */
		inode = dst_vma->vm_file->f_inode;
		offset = linear_page_index(dst_vma, dst_addr);
		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
		ret = -EFAULT;
		if (unlikely(offset >= max_off))
			goto out_unlock;
	}
	ret = -EEXIST;
205 206 207 208 209 210 211 212 213 214 215
	if (!pte_none(*dst_pte))
		goto out_unlock;
	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
	/* No need to invalidate - it was non-present before */
	update_mmu_cache(dst_vma, dst_addr, dst_pte);
	ret = 0;
out_unlock:
	pte_unmap_unlock(dst_pte, ptl);
	return ret;
}

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
				pmd_t *dst_pmd,
				struct vm_area_struct *dst_vma,
				unsigned long dst_addr,
				bool wp_copy)
{
	struct inode *inode = file_inode(dst_vma->vm_file);
	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
	struct page *page;
	int ret;

	ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
	if (ret)
		goto out;
	if (!page) {
		ret = -EFAULT;
		goto out;
	}

	ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
				       page, false, wp_copy);
	if (ret)
		goto out_release;

	unlock_page(page);
	ret = 0;
out:
	return ret;
out_release:
	unlock_page(page);
	put_page(page);
	goto out;
}

251 252 253
static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
{
	pgd_t *pgd;
254
	p4d_t *p4d;
255 256 257
	pud_t *pud;

	pgd = pgd_offset(mm, address);
258 259 260 261 262 263 264 265 266 267 268 269
	p4d = p4d_alloc(mm, pgd, address);
	if (!p4d)
		return NULL;
	pud = pud_alloc(mm, p4d, address);
	if (!pud)
		return NULL;
	/*
	 * Note that we didn't run this because the pmd was
	 * missing, the *pmd may be already established and in
	 * turn it may also be a trans_huge_pmd.
	 */
	return pmd_alloc(mm, pud, address);
270 271
}

272 273 274
#ifdef CONFIG_HUGETLB_PAGE
/*
 * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
275
 * called with mmap_lock held, it will release mmap_lock before returning.
276 277 278 279 280 281
 */
static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
					      struct vm_area_struct *dst_vma,
					      unsigned long dst_start,
					      unsigned long src_start,
					      unsigned long len,
282
					      enum mcopy_atomic_mode mode)
283
{
284
	int vm_shared = dst_vma->vm_flags & VM_SHARED;
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
	ssize_t err;
	pte_t *dst_pte;
	unsigned long src_addr, dst_addr;
	long copied;
	struct page *page;
	unsigned long vma_hpagesize;
	pgoff_t idx;
	u32 hash;
	struct address_space *mapping;

	/*
	 * There is no default zero huge page for all huge page sizes as
	 * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
	 * by THP.  Since we can not reliably insert a zero page, this
	 * feature is not supported.
	 */
301
	if (mode == MCOPY_ATOMIC_ZEROPAGE) {
302
		mmap_read_unlock(dst_mm);
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
		return -EINVAL;
	}

	src_addr = src_start;
	dst_addr = dst_start;
	copied = 0;
	page = NULL;
	vma_hpagesize = vma_kernel_pagesize(dst_vma);

	/*
	 * Validate alignment based on huge page size
	 */
	err = -EINVAL;
	if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
		goto out_unlock;

retry:
	/*
321
	 * On routine entry dst_vma is set.  If we had to drop mmap_lock and
322 323 324
	 * retry, dst_vma will be set to NULL and we must lookup again.
	 */
	if (!dst_vma) {
325
		err = -ENOENT;
326
		dst_vma = find_dst_vma(dst_mm, dst_start, len);
327 328
		if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
			goto out_unlock;
329

330 331 332 333
		err = -EINVAL;
		if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
			goto out_unlock;

334
		vm_shared = dst_vma->vm_flags & VM_SHARED;
335 336 337
	}

	/*
338
	 * If not shared, ensure the dst_vma has a anon_vma.
339 340
	 */
	err = -ENOMEM;
341 342 343 344
	if (!vm_shared) {
		if (unlikely(anon_vma_prepare(dst_vma)))
			goto out_unlock;
	}
345 346 347 348 349

	while (src_addr < src_start + len) {
		BUG_ON(dst_addr >= dst_start + len);

		/*
350 351 352 353
		 * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
		 * i_mmap_rwsem ensures the dst_pte remains valid even
		 * in the case of shared pmds.  fault mutex prevents
		 * races with other faulting threads.
354
		 */
355
		mapping = dst_vma->vm_file->f_mapping;
356 357
		i_mmap_lock_read(mapping);
		idx = linear_page_index(dst_vma, dst_addr);
358
		hash = hugetlb_fault_mutex_hash(mapping, idx);
359 360 361
		mutex_lock(&hugetlb_fault_mutex_table[hash]);

		err = -ENOMEM;
362
		dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
363 364
		if (!dst_pte) {
			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
365
			i_mmap_unlock_read(mapping);
366 367 368
			goto out_unlock;
		}

369 370 371
		if (mode != MCOPY_ATOMIC_CONTINUE &&
		    !huge_pte_none(huge_ptep_get(dst_pte))) {
			err = -EEXIST;
372
			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
373
			i_mmap_unlock_read(mapping);
374 375 376 377
			goto out_unlock;
		}

		err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
378
					       dst_addr, src_addr, mode, &page);
379 380

		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
381
		i_mmap_unlock_read(mapping);
382 383 384

		cond_resched();

385
		if (unlikely(err == -ENOENT)) {
386
			mmap_read_unlock(dst_mm);
387 388 389 390
			BUG_ON(!page);

			err = copy_huge_page_from_user(page,
						(const void __user *)src_addr,
391 392
						vma_hpagesize / PAGE_SIZE,
						true);
393 394 395 396
			if (unlikely(err)) {
				err = -EFAULT;
				goto out;
			}
397
			mmap_read_lock(dst_mm);
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416

			dst_vma = NULL;
			goto retry;
		} else
			BUG_ON(page);

		if (!err) {
			dst_addr += vma_hpagesize;
			src_addr += vma_hpagesize;
			copied += vma_hpagesize;

			if (fatal_signal_pending(current))
				err = -EINTR;
		}
		if (err)
			break;
	}

out_unlock:
417
	mmap_read_unlock(dst_mm);
418
out:
419
	if (page)
420 421 422 423 424 425 426 427 428 429 430 431 432
		put_page(page);
	BUG_ON(copied < 0);
	BUG_ON(err > 0);
	BUG_ON(!copied && !err);
	return copied ? copied : err;
}
#else /* !CONFIG_HUGETLB_PAGE */
/* fail at build time if gcc attempts to use this */
extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
				      struct vm_area_struct *dst_vma,
				      unsigned long dst_start,
				      unsigned long src_start,
				      unsigned long len,
433
				      enum mcopy_atomic_mode mode);
434 435
#endif /* CONFIG_HUGETLB_PAGE */

436 437 438 439 440 441
static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
						pmd_t *dst_pmd,
						struct vm_area_struct *dst_vma,
						unsigned long dst_addr,
						unsigned long src_addr,
						struct page **page,
442
						enum mcopy_atomic_mode mode,
443
						bool wp_copy)
444 445 446
{
	ssize_t err;

447 448 449 450 451
	if (mode == MCOPY_ATOMIC_CONTINUE) {
		return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
					    wp_copy);
	}

452 453 454 455 456 457 458 459 460 461 462
	/*
	 * The normal page fault path for a shmem will invoke the
	 * fault, fill the hole in the file and COW it right away. The
	 * result generates plain anonymous memory. So when we are
	 * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
	 * generate anonymous memory directly without actually filling
	 * the hole. For the MAP_PRIVATE case the robustness check
	 * only happens in the pagetable (to verify it's still none)
	 * and not in the radix tree.
	 */
	if (!(dst_vma->vm_flags & VM_SHARED)) {
463
		if (mode == MCOPY_ATOMIC_NORMAL)
464
			err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
465 466
					       dst_addr, src_addr, page,
					       wp_copy);
467 468 469 470
		else
			err = mfill_zeropage_pte(dst_mm, dst_pmd,
						 dst_vma, dst_addr);
	} else {
471
		VM_WARN_ON_ONCE(wp_copy);
472
		err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
473 474
					     dst_addr, src_addr,
					     mode != MCOPY_ATOMIC_NORMAL,
475
					     page);
476 477 478 479 480
	}

	return err;
}

481 482 483 484
static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
					      unsigned long dst_start,
					      unsigned long src_start,
					      unsigned long len,
485
					      enum mcopy_atomic_mode mcopy_mode,
486
					      atomic_t *mmap_changing,
487
					      __u64 mode)
488 489 490 491 492
{
	struct vm_area_struct *dst_vma;
	ssize_t err;
	pmd_t *dst_pmd;
	unsigned long src_addr, dst_addr;
493 494
	long copied;
	struct page *page;
495
	bool wp_copy;
496 497 498 499 500 501 502 503 504 505 506

	/*
	 * Sanitize the command parameters:
	 */
	BUG_ON(dst_start & ~PAGE_MASK);
	BUG_ON(len & ~PAGE_MASK);

	/* Does the address range wrap, or is the span zero-sized? */
	BUG_ON(src_start + len <= src_start);
	BUG_ON(dst_start + len <= dst_start);

507 508 509 510 511
	src_addr = src_start;
	dst_addr = dst_start;
	copied = 0;
	page = NULL;
retry:
512
	mmap_read_lock(dst_mm);
513

514 515 516 517 518 519
	/*
	 * If memory mappings are changing because of non-cooperative
	 * operation (e.g. mremap) running in parallel, bail out and
	 * request the user to retry later
	 */
	err = -EAGAIN;
520
	if (mmap_changing && atomic_read(mmap_changing))
521 522
		goto out_unlock;

523 524 525 526
	/*
	 * Make sure the vma is not shared, that the dst range is
	 * both valid and fully within a single existing vma.
	 */
527
	err = -ENOENT;
528
	dst_vma = find_dst_vma(dst_mm, dst_start, len);
529 530
	if (!dst_vma)
		goto out_unlock;
531

532 533 534 535 536 537 538 539 540
	err = -EINVAL;
	/*
	 * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
	 * it will overwrite vm_ops, so vma_is_anonymous must return false.
	 */
	if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
	    dst_vma->vm_flags & VM_SHARED))
		goto out_unlock;

541 542 543 544 545 546 547 548
	/*
	 * validate 'mode' now that we know the dst_vma: don't allow
	 * a wrprotect copy if the userfaultfd didn't register as WP.
	 */
	wp_copy = mode & UFFDIO_COPY_MODE_WP;
	if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
		goto out_unlock;

549 550 551 552 553
	/*
	 * If this is a HUGETLB vma, pass off to appropriate routine
	 */
	if (is_vm_hugetlb_page(dst_vma))
		return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
554
						src_start, len, mcopy_mode);
555

556
	if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
557
		goto out_unlock;
558
	if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
559
		goto out_unlock;
560 561 562 563 564 565 566

	/*
	 * Ensure the dst_vma has a anon_vma or this page
	 * would get a NULL anon_vma when moved in the
	 * dst_vma.
	 */
	err = -ENOMEM;
567 568
	if (!(dst_vma->vm_flags & VM_SHARED) &&
	    unlikely(anon_vma_prepare(dst_vma)))
569
		goto out_unlock;
570

571
	while (src_addr < src_start + len) {
572
		pmd_t dst_pmdval;
573

574
		BUG_ON(dst_addr >= dst_start + len);
575

576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
		dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
		if (unlikely(!dst_pmd)) {
			err = -ENOMEM;
			break;
		}

		dst_pmdval = pmd_read_atomic(dst_pmd);
		/*
		 * If the dst_pmd is mapped as THP don't
		 * override it and just be strict.
		 */
		if (unlikely(pmd_trans_huge(dst_pmdval))) {
			err = -EEXIST;
			break;
		}
		if (unlikely(pmd_none(dst_pmdval)) &&
592
		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
593 594 595 596 597 598 599 600 601 602 603 604
			err = -ENOMEM;
			break;
		}
		/* If an huge pmd materialized from under us fail */
		if (unlikely(pmd_trans_huge(*dst_pmd))) {
			err = -EFAULT;
			break;
		}

		BUG_ON(pmd_none(*dst_pmd));
		BUG_ON(pmd_trans_huge(*dst_pmd));

605
		err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
606
				       src_addr, &page, mcopy_mode, wp_copy);
607 608
		cond_resched();

609
		if (unlikely(err == -ENOENT)) {
610 611
			void *page_kaddr;

612
			mmap_read_unlock(dst_mm);
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
			BUG_ON(!page);

			page_kaddr = kmap(page);
			err = copy_from_user(page_kaddr,
					     (const void __user *) src_addr,
					     PAGE_SIZE);
			kunmap(page);
			if (unlikely(err)) {
				err = -EFAULT;
				goto out;
			}
			goto retry;
		} else
			BUG_ON(page);

628 629 630 631 632 633 634 635 636 637 638 639
		if (!err) {
			dst_addr += PAGE_SIZE;
			src_addr += PAGE_SIZE;
			copied += PAGE_SIZE;

			if (fatal_signal_pending(current))
				err = -EINTR;
		}
		if (err)
			break;
	}

640
out_unlock:
641
	mmap_read_unlock(dst_mm);
642 643
out:
	if (page)
644
		put_page(page);
645 646 647 648 649 650 651
	BUG_ON(copied < 0);
	BUG_ON(err > 0);
	BUG_ON(!copied && !err);
	return copied ? copied : err;
}

ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
652
		     unsigned long src_start, unsigned long len,
653
		     atomic_t *mmap_changing, __u64 mode)
654
{
655 656
	return __mcopy_atomic(dst_mm, dst_start, src_start, len,
			      MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
657 658 659
}

ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
660
		       unsigned long len, atomic_t *mmap_changing)
661
{
662 663 664 665 666
	return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
			      mmap_changing, 0);
}

ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
667
		       unsigned long len, atomic_t *mmap_changing)
668 669 670
{
	return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
			      mmap_changing, 0);
671
}
672 673

int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
674 675
			unsigned long len, bool enable_wp,
			atomic_t *mmap_changing)
676 677 678 679 680 681 682 683 684 685 686 687 688 689
{
	struct vm_area_struct *dst_vma;
	pgprot_t newprot;
	int err;

	/*
	 * Sanitize the command parameters:
	 */
	BUG_ON(start & ~PAGE_MASK);
	BUG_ON(len & ~PAGE_MASK);

	/* Does the address range wrap, or is the span zero-sized? */
	BUG_ON(start + len <= start);

690
	mmap_read_lock(dst_mm);
691 692 693 694 695 696 697

	/*
	 * If memory mappings are changing because of non-cooperative
	 * operation (e.g. mremap) running in parallel, bail out and
	 * request the user to retry later
	 */
	err = -EAGAIN;
698
	if (mmap_changing && atomic_read(mmap_changing))
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
		goto out_unlock;

	err = -ENOENT;
	dst_vma = find_dst_vma(dst_mm, start, len);
	/*
	 * Make sure the vma is not shared, that the dst range is
	 * both valid and fully within a single existing vma.
	 */
	if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
		goto out_unlock;
	if (!userfaultfd_wp(dst_vma))
		goto out_unlock;
	if (!vma_is_anonymous(dst_vma))
		goto out_unlock;

	if (enable_wp)
		newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
	else
		newprot = vm_get_page_prot(dst_vma->vm_flags);

	change_protection(dst_vma, start, start + len, newprot,
			  enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);

	err = 0;
out_unlock:
724
	mmap_read_unlock(dst_mm);
725 726
	return err;
}