diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 7d0022f82844ea8ba4cfac1ead8b67209bc01d84..c8ec0227f340095163320a0093c878122eebc79e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -27,13 +27,11 @@
 #include <linux/ioctl.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/userswap.h>
 
 int sysctl_unprivileged_userfaultfd __read_mostly = 1;
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
-#ifdef CONFIG_USERSWAP
-int enable_userswap;
-#endif
 
 /*
  * Start with fault_pending_wqh and fault_wqh so they're more likely
@@ -220,6 +218,9 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
 		msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
 	if (features & UFFD_FEATURE_THREAD_ID)
 		msg.arg.pagefault.feat.ptid = task_pid_vnr(current);
+#ifdef CONFIG_USERSWAP
+	uswap_get_cpu_id(reason, &msg);
+#endif
 	return msg;
 }
 
@@ -334,8 +335,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 	 * changes under us.
 	 */
 #ifdef CONFIG_USERSWAP
-	if ((reason & VM_USWAP) && (!pte_present(*pte)))
-		ret = true;
+	uswap_must_wait(reason, *pte, &ret);
 #endif
 	if (pte_none(*pte))
 		ret = true;
@@ -408,8 +408,12 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 
 	BUG_ON(ctx->mm != mm);
 
+#ifdef CONFIG_USERSWAP
+	VM_BUG_ON(uswap_vm_flag_bug_on(reason));
+#else
 	VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
 	VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
+#endif
 
 	if (ctx->features & UFFD_FEATURE_SIGBUS)
 		goto out;
@@ -483,6 +487,10 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	uwq.wq.private = current;
 	uwq.msg = userfault_msg(vmf->address, vmf->flags, reason,
 			ctx->features);
+#ifdef CONFIG_USERSWAP
+	if (reason & VM_USWAP && pte_none(vmf->orig_pte))
+		uwq.msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_FPF;
+#endif
 	uwq.ctx = ctx;
 	uwq.waken = false;
 
@@ -866,8 +874,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		userfault_flags = VM_UFFD_MISSING | VM_UFFD_WP;
 #ifdef CONFIG_USERSWAP
-		if (enable_userswap)
-			userfault_flags |= VM_USWAP;
+		uswap_release(&userfault_flags);
 #endif
 		cond_resched();
 		BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
@@ -1275,6 +1282,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	bool found;
 	bool basic_ioctls;
 	unsigned long start, end, vma_end;
+#ifdef CONFIG_USERSWAP
+	bool uswap_mode = false;
+#endif
 
 	user_uffdio_register = (struct uffdio_register __user *) arg;
 
@@ -1288,26 +1298,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		goto out;
 	vm_flags = 0;
 #ifdef CONFIG_USERSWAP
-	/*
-	 * register the whole vma overlapping with the address range to avoid
-	 * splitting the vma.
-	 */
-	if (enable_userswap && (uffdio_register.mode & UFFDIO_REGISTER_MODE_USWAP)) {
-		uffdio_register.mode &= ~UFFDIO_REGISTER_MODE_USWAP;
-		if (!uffdio_register.mode)
-			goto out;
-		vm_flags |= VM_USWAP;
-		end = uffdio_register.range.start + uffdio_register.range.len - 1;
-		vma = find_vma(mm, uffdio_register.range.start);
-		if (!vma)
-			goto out;
-		uffdio_register.range.start = vma->vm_start;
-
-		vma = find_vma(mm, end);
-		if (!vma)
-			goto out;
-		uffdio_register.range.len = vma->vm_end - uffdio_register.range.start;
-	}
+	if (!uswap_register(&uffdio_register, &uswap_mode))
+		goto out;
 #endif
 	if (uffdio_register.mode & ~(UFFDIO_REGISTER_MODE_MISSING|
 				     UFFDIO_REGISTER_MODE_WP))
@@ -1321,7 +1313,13 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 			     uffdio_register.range.len);
 	if (ret)
 		goto out;
-
+#ifdef CONFIG_USERSWAP
+	if (uswap_mode && !uswap_adjust_uffd_range(&uffdio_register,
+						   &vm_flags, mm)) {
+		ret = -EINVAL;
+		goto out;
+	}
+#endif
 	start = uffdio_register.range.start;
 	end = start + uffdio_register.range.len;
 
@@ -1717,7 +1715,10 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	ret = -EINVAL;
 	if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
 		goto out;
-	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
+	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE |
+				 UFFDIO_COPY_MODE_WP |
+				 IS_ENABLED(CONFIG_USERSWAP) ?
+				 UFFDIO_COPY_MODE_DIRECT_MAP : 0))
 		goto out;
 	if (mmget_not_zero(ctx->mm)) {
 		ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
@@ -2029,15 +2030,6 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
 	return fd;
 }
 
-#ifdef CONFIG_USERSWAP
-static int __init enable_userswap_setup(char *str)
-{
-	enable_userswap = true;
-	return 1;
-}
-__setup("enable_userswap", enable_userswap_setup);
-#endif
-
 static int __init userfaultfd_init(void)
 {
 	userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache",
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index e1cacab86bde2f7931c6c5fc105e5608d1e11e65..e91f31a4c830f89cf7b7b1a9e83fede93a612043 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -31,9 +31,6 @@
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
 extern int sysctl_unprivileged_userfaultfd;
-#ifdef CONFIG_USERSWAP
-extern int enable_userswap;
-#endif
 
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
diff --git a/include/linux/userswap.h b/include/linux/userswap.h
new file mode 100644
index 0000000000000000000000000000000000000000..43b419f9813fc2e6a61c93805ba303a87427fcd3
--- /dev/null
+++ b/include/linux/userswap.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ */
+
+#ifndef _LINUX_USERSWAP_H
+#define _LINUX_USERSWAP_H
+
+#include <linux/mman.h>
+#include <linux/userfaultfd.h>
+
+#ifdef CONFIG_USERSWAP
+
+extern struct static_key_false userswap_enabled;
+
+/*
+ * In uswap situation, we use the bit 0 of the returned address to indicate
+ * whether the pages are dirty.
+ */
+#define USWAP_PAGES_DIRTY	1
+
+int mfill_atomic_pte_nocopy(struct mm_struct *dst_mm,
+			    pmd_t *dst_pmd,
+			    struct vm_area_struct *dst_vma,
+			    unsigned long dst_addr,
+			    unsigned long src_addr);
+
+unsigned long uswap_mremap(unsigned long old_addr, unsigned long old_len,
+			   unsigned long new_addr, unsigned long new_len);
+
+bool uswap_register(struct uffdio_register *uffdio_register, bool *uswap_mode);
+
+bool uswap_adjust_uffd_range(struct uffdio_register *uffdio_register,
+			     unsigned long *vm_flags, struct mm_struct *mm);
+
+bool do_uswap_page(swp_entry_t entry, struct vm_fault *vmf,
+		   struct vm_area_struct *vma, vm_fault_t *ret);
+
+static inline bool uswap_check_copy(struct vm_area_struct *vma,
+				    unsigned long src_addr,
+				    unsigned long len, __u64 mode)
+{
+	if (vma->vm_flags & VM_USWAP) {
+		if (!(mode & UFFDIO_COPY_MODE_DIRECT_MAP))
+			return false;
+		if (offset_in_page(src_addr))
+			return false;
+		if (src_addr > TASK_SIZE || src_addr > TASK_SIZE - len)
+			return false;
+	} else {
+		if (mode & UFFDIO_COPY_MODE_DIRECT_MAP)
+			return false;
+	}
+
+	return true;
+}
+
+static inline bool uswap_validate_mremap_flags(unsigned long flags)
+{
+	if (static_branch_unlikely(&userswap_enabled)) {
+		if (flags & MREMAP_USWAP_SET_PTE &&
+		    flags & ~MREMAP_USWAP_SET_PTE)
+			return false;
+		if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE |
+			      MREMAP_DONTUNMAP | MREMAP_USWAP_SET_PTE))
+			return false;
+	} else {
+		if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE |
+			      MREMAP_DONTUNMAP))
+			return false;
+	}
+	return true;
+}
+
+/* When CONFIG_USERSWAP=y, VM_UFFD_MISSING|VM_USWAP is right;
+ * 0 or > 1 flags set is a bug; we expect exactly 1.
+ */
+static inline bool uswap_vm_flag_bug_on(unsigned long reason)
+{
+	if (reason & ~(VM_UFFD_MISSING | VM_UFFD_WP | VM_USWAP))
+		return true;
+	if (reason & VM_USWAP)
+		return !(reason & VM_UFFD_MISSING) ||
+		       reason & ~(VM_USWAP|VM_UFFD_MISSING);
+	return !(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP);
+}
+
+static inline bool uswap_missing(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_USWAP && vma->vm_flags & VM_UFFD_MISSING)
+		return true;
+	return false;
+}
+
+static inline void uswap_get_cpu_id(unsigned long reason, struct uffd_msg *msg)
+{
+	if (reason & VM_USWAP)
+		msg->reserved3 = smp_processor_id();
+}
+
+static inline void uswap_release(unsigned long *userfault_flags)
+{
+	if (static_branch_unlikely(&userswap_enabled))
+		*userfault_flags |= VM_USWAP;
+}
+
+static inline void uswap_must_wait(unsigned long reason, pte_t pte, bool *ret)
+{
+	if ((reason & VM_USWAP) && (!pte_present(pte)))
+		*ret = true;
+}
+
+#endif /* CONFIG_USERSWAP */
+
+#endif /* _LINUX_USERSWAP_H */
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 898ea134b2f398f08a9bed3a7915d470cb94a195..66c408ccc6c662a0bacba4ad8474df19707036f4 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -30,8 +30,6 @@
 #define MAP_SYNC		0x080000 /* perform synchronous page faults for the mapping */
 #define MAP_FIXED_NOREPLACE	0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 
-#define MAP_REPLACE		0x1000000
-
 #define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
 					 * uninitialized */
 
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index f55bc680b5b0a45eefca46732e265a8bfe2f46aa..174a1a2eb041018fde4906ede126d5626fac5353 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -8,6 +8,7 @@
 #define MREMAP_MAYMOVE		1
 #define MREMAP_FIXED		2
 #define MREMAP_DONTUNMAP	4
+#define MREMAP_USWAP_SET_PTE	64
 
 #define OVERCOMMIT_GUESS		0
 #define OVERCOMMIT_ALWAYS		1
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index ada058f8b94b3699d5f30e0229c62e1785a5f910..b8689050455d992373d8f2308766b1326d90f25b 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -127,6 +127,7 @@ struct uffd_msg {
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
 #define UFFD_PAGEFAULT_FLAG_WP		(1<<1)	/* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_FPF		(1<<10) /* If this was the first page fault */
 
 struct uffdio_api {
 	/* userland asks for an API number and the features to enable */
@@ -217,6 +218,7 @@ struct uffdio_copy {
 	 * according to the uffdio_register.ioctls.
 	 */
 #define UFFDIO_COPY_MODE_WP			((__u64)1<<1)
+#define UFFDIO_COPY_MODE_DIRECT_MAP		((__u64)1<<10)
 	__u64 mode;
 
 	/*
diff --git a/mm/Makefile b/mm/Makefile
index 696ee59c2ac758aaf7d5f5b98291af4522af1128..a014a5e08f7b6a011a27088ed197208f7d4ad442 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -113,6 +113,7 @@ obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
 obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
 obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
 obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
+obj-$(CONFIG_USERSWAP) += userswap.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
diff --git a/mm/memory.c b/mm/memory.c
index 8f7d4531c7634a8f9bb7581720add64182095a95..5941a4f4ea4b154243a368f73f2925814f1cf45d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -73,6 +73,7 @@
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
+#include <linux/userswap.h>
 
 #include <trace/events/kmem.h>
 
@@ -3395,22 +3396,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 
 	entry = pte_to_swp_entry(vmf->orig_pte);
 #ifdef CONFIG_USERSWAP
-	if (swp_type(entry) == SWP_USERSWAP_ENTRY) {
-		/* print error if we come across a nested fault */
-		if (!strncmp(current->comm, "uswap", 5)) {
-			pr_err("USWAP: fault %lx is triggered by %s\n",
-					vmf->address, current->comm);
-			return VM_FAULT_SIGBUS;
-		}
-		if (!(vma->vm_flags & VM_UFFD_MISSING)) {
-			pr_err("USWAP: addr %lx flags %lx is not a user swap page",
-					vmf->address, vma->vm_flags);
-			goto skip_uswap;
-		}
-		ret = handle_userfault(vmf, VM_UFFD_MISSING | VM_USWAP);
+	if (!do_uswap_page(entry, vmf, vma, &ret))
 		return ret;
-	}
-skip_uswap:
 #endif
 	if (unlikely(non_swap_entry(entry))) {
 		if (is_migration_entry(entry)) {
@@ -3689,6 +3676,12 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		if (ret)
 			goto unlock;
 		/* Deliver the page fault to userland, check inside PT lock */
+#ifdef CONFIG_USERSWAP
+		if (uswap_missing(vma)) {
+			pte_unmap_unlock(vmf->pte, vmf->ptl);
+			return handle_userfault(vmf, VM_UFFD_MISSING|VM_USWAP);
+		}
+#endif
 		if (userfaultfd_missing(vma)) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			return handle_userfault(vmf, VM_UFFD_MISSING);
@@ -3731,6 +3724,13 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		goto release;
 
 	/* Deliver the page fault to userland, check inside PT lock */
+#ifdef CONFIG_USERSWAP
+	if (uswap_missing(vma)) {
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		put_page(page);
+		return handle_userfault(vmf, VM_UFFD_MISSING | VM_USWAP);
+	}
+#endif
 	if (userfaultfd_missing(vma)) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		put_page(page);
diff --git a/mm/mmap.c b/mm/mmap.c
index b3694e09be0f0963679ed79005494a36c0dab7c7..bddd7f0f88b934ec923576c80809a380b7533eee 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1623,205 +1623,6 @@ __do_mmap(struct file *file, unsigned long addr, unsigned long len,
 {
 	return __do_mmap_mm(current->mm, file, addr, len, prot, flags, vm_flags, pgoff, populate, uf);
 }
-#ifdef CONFIG_USERSWAP
-/*
- * Check if pages between 'addr ~ addr+len' can be user swapped. If so, get
- * the reference of the pages and return the pages through input parameters
- * 'ppages'.
- */
-static int pages_can_be_swapped(struct mm_struct *mm, unsigned long addr,
-				unsigned long len, struct page ***ppages)
-{
-	struct vm_area_struct *vma;
-	struct page *page = NULL;
-	struct page **pages = NULL;
-	unsigned long addr_end = addr + len;
-	unsigned long ret;
-	int i, page_num = 0;
-
-	pages = kmalloc(sizeof(struct page *) * (len / PAGE_SIZE), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	while (addr < addr_end) {
-		vma = find_vma(mm, addr);
-		if (!vma || !vma_is_anonymous(vma) || vma->vm_file ||
-		    (vma->vm_flags & VM_LOCKED) || (vma->vm_flags & VM_STACK) ||
-		    (vma->vm_flags & (VM_IO | VM_PFNMAP))) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (!(vma->vm_flags & VM_UFFD_MISSING)) {
-			ret = -EAGAIN;
-			goto out;
-		}
-get_again:
-		/* follow_page will inc page ref, dec the ref after we remap the page */
-		page = follow_page(vma, addr, FOLL_GET);
-		if (IS_ERR_OR_NULL(page)) {
-			ret = -ENODEV;
-			goto out;
-		}
-		pages[page_num++] = page;
-		if (!PageAnon(page) || !PageSwapBacked(page) ||
-		    PageHuge(page) || PageSwapCache(page)) {
-			ret = -EINVAL;
-			goto out;
-		} else if (PageTransCompound(page)) {
-			if (trylock_page(page)) {
-				if (!split_huge_page(page)) {
-					put_page(page);
-					page_num--;
-					unlock_page(page);
-					goto get_again;
-				} else {
-					unlock_page(page);
-					ret = -EINVAL;
-					goto out;
-				}
-			} else {
-				ret = -EINVAL;
-				goto out;
-			}
-		}
-		if (page_mapcount(page) > 1 ||
-		    page_mapcount(page) + 1 != page_count(page)) {
-			ret = -EBUSY;
-			goto out;
-		}
-		addr += PAGE_SIZE;
-	}
-
-	*ppages = pages;
-	return 0;
-
-out:
-	for (i = 0; i < page_num; i++)
-		put_page(pages[i]);
-	if (pages)
-		kfree(pages);
-	*ppages = NULL;
-	return ret;
-}
-
-/*
- * In uswap situation, we use the bit 0 of the returned address to indicate
- * whether the pages are dirty.
- */
-#define USWAP_PAGES_DIRTY	1
-
-/* unmap the pages between 'addr ~ addr+len' and remap them to a new address */
-static unsigned long
-do_user_swap(struct mm_struct *mm, unsigned long addr_start, unsigned long len,
-	     struct page **pages, unsigned long new_addr)
-{
-	struct vm_area_struct *vma;
-	struct page *page;
-	struct mmu_notifier_range range;
-	pmd_t *pmd;
-	pte_t *pte, old_pte;
-	spinlock_t *ptl;
-	unsigned long addr;
-	bool pages_dirty = false;
-	int i = 0;
-
-	addr = addr_start;
-	lru_add_drain();
-	i = 0;
-	while (addr < addr_start + len) {
-		page = pages[i];
-		vma = find_vma(mm, addr);
-		if (!vma)
-			return -EINVAL;
-
-		mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma,
-				vma->vm_mm, addr, addr + PAGE_SIZE);
-		mmu_notifier_invalidate_range_start(&range);
-		pmd = mm_find_pmd(mm, addr);
-		if (!pmd) {
-			mmu_notifier_invalidate_range_end(&range);
-			return -ENXIO;
-		}
-		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
-		flush_cache_page(vma, addr, pte_pfn(*pte));
-		old_pte = ptep_clear_flush(vma, addr, pte);
-		if (pte_dirty(old_pte)  || PageDirty(page))
-			pages_dirty = true;
-		set_pte(pte, swp_entry_to_pte(swp_entry(SWP_USERSWAP_ENTRY,
-							page_to_pfn(page))));
-		dec_mm_counter(mm, MM_ANONPAGES);
-		reliable_page_counter(page, mm, -1);
-		page_remove_rmap(page, false);
-		put_page(page);
-
-		pte_unmap_unlock(pte, ptl);
-		mmu_notifier_invalidate_range_end(&range);
-		vma->vm_flags |= VM_USWAP;
-		page->mapping = NULL;
-		addr += PAGE_SIZE;
-		i++;
-	}
-
-	addr = new_addr;
-	vma = find_vma(mm, addr);
-	i = 0;
-	while (addr < new_addr + len) {
-		if (addr > vma->vm_end - 1)
-			vma = find_vma(mm, addr);
-		if (!vma)
-			return -ENODEV;
-
-		page = pages[i++];
-		if (vm_insert_page(vma, addr, page))
-			return -EFAULT;
-
-		addr += PAGE_SIZE;
-	}
-	vma->vm_flags |= VM_USWAP;
-
-	if (pages_dirty)
-		new_addr = new_addr | USWAP_PAGES_DIRTY;
-
-	return new_addr;
-}
-
-static inline unsigned long
-do_uswap_mmap(struct file *file, unsigned long addr, unsigned long len,
-	      unsigned long prot, unsigned long flags, unsigned long pgoff,
-	      unsigned long *populate, struct list_head *uf)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long old_addr = addr;
-	struct page **pages = NULL;
-	unsigned long ret;
-	int i;
-
-	if (!len || offset_in_page(addr) || (len % PAGE_SIZE))
-		return -EINVAL;
-
-	ret = pages_can_be_swapped(mm, addr, len, &pages);
-	if (ret)
-		return ret;
-
-	/* mark the vma as special to avoid merging with other vmas */
-	addr = __do_mmap(file, addr, len, prot, flags, VM_SPECIAL, pgoff,
-			 populate, uf);
-	if (IS_ERR_VALUE(addr)) {
-		ret = addr;
-		goto out;
-	}
-
-	ret = do_user_swap(mm, old_addr, len, pages, addr);
-out:
-	/* follow_page() above increased the reference*/
-	for (i = 0; i < len / PAGE_SIZE; i++)
-		put_page(pages[i]);
-	if (pages)
-		kfree(pages);
-
-	return ret;
-}
-#endif
 
 /*
  * The caller must write-lock current->mm->mmap_lock. 
@@ -1831,11 +1632,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			unsigned long flags, unsigned long pgoff,
 			unsigned long *populate, struct list_head *uf)
 {
-#ifdef CONFIG_USERSWAP
-	if (enable_userswap && (flags & MAP_REPLACE))
-		return do_uswap_mmap(file, addr, len, prot, flags, pgoff,
-				     populate, uf);
-#endif
 	return __do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
 }
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 2f7f3494a990bc003369e3de46c2b6c922bf5931..b8b694be40bdcee5ded1c7b5e29a5cd73e08be2a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -25,6 +25,7 @@
 #include <linux/mm-arch-hooks.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/share_pool.h>
+#include <linux/userswap.h>
 
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
@@ -915,8 +916,13 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	 */
 	addr = untagged_addr(addr);
 
+#ifdef CONFIG_USERSWAP
+	if (!uswap_validate_mremap_flags(flags))
+		return ret;
+#else
 	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
 		return ret;
+#endif
 
 	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
 		return ret;
@@ -947,6 +953,11 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	if (!new_len)
 		return ret;
 
+#ifdef CONFIG_USERSWAP
+	if (flags & MREMAP_USWAP_SET_PTE)
+		return uswap_mremap(addr, old_len, new_addr, new_len);
+#endif
+
 	if (mmap_write_lock_killable(current->mm))
 		return -EINTR;
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 15c46208a2accb1b097f9ff6412b2b63f7400c4a..070359ee383a30490f759f4f77ae39d62fe2ef3e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -15,6 +15,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/hugetlb.h>
 #include <linux/shmem_fs.h>
+#include <linux/userswap.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
 
@@ -90,10 +91,6 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 		*pagep = NULL;
 	}
 
-#ifdef CONFIG_USERSWAP
-	if (dst_vma->vm_flags & VM_USWAP)
-		ClearPageDirty(page);
-#endif
 	/*
 	 * The memory barrier inside __SetPageUptodate makes sure that
 	 * preceding stores to the page contents become visible before
@@ -112,10 +109,6 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 		else
 			_dst_pte = pte_mkwrite(_dst_pte);
 	}
-#ifdef CONFIG_USERSWAP
-	if (dst_vma->vm_flags & VM_USWAP)
-		_dst_pte = pte_mkclean(_dst_pte);
-#endif
 
 	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 	if (dst_vma->vm_file) {
@@ -128,26 +121,9 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 			goto out_release_uncharge_unlock;
 	}
 
-#ifdef CONFIG_USERSWAP
-	if (!(dst_vma->vm_flags & VM_USWAP)) {
-		ret = -EEXIST;
-		if (!pte_none(*dst_pte))
-			goto out_release_uncharge_unlock;
-	} else {
-		/*
-		 * The userspace may swap in a large area. Part of the area is
-		 * not swapped out. Skip those pages.
-		 */
-		ret = 0;
-		if (swp_type(pte_to_swp_entry(*dst_pte)) != SWP_USERSWAP_ENTRY ||
-		    pte_present(*dst_pte))
-			goto out_release_uncharge_unlock;
-	}
-#else
 	ret = -EEXIST;
 	if (!pte_none(*dst_pte))
 		goto out_release_uncharge_unlock;
-#endif
 
 	inc_mm_counter(dst_mm, MM_ANONPAGES);
 	reliable_page_counter(page, dst_mm, 1);
@@ -535,6 +511,10 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 		goto out_unlock;
 
 	err = -EINVAL;
+#ifdef CONFIG_USERSWAP
+	if (!uswap_check_copy(dst_vma, src_addr, len, mode))
+		goto out_unlock;
+#endif
 	/*
 	 * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
 	 * it will overwrite vm_ops, so vma_is_anonymous must return false.
@@ -605,8 +585,17 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 		BUG_ON(pmd_none(*dst_pmd));
 		BUG_ON(pmd_trans_huge(*dst_pmd));
 
-		err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
-				       src_addr, &page, zeropage, wp_copy);
+#ifdef CONFIG_USERSWAP
+		if (static_branch_unlikely(&userswap_enabled) &&
+		    dst_vma->vm_flags & VM_USWAP &&
+		    mode & UFFDIO_COPY_MODE_DIRECT_MAP)
+			err = mfill_atomic_pte_nocopy(dst_mm, dst_pmd, dst_vma,
+						      dst_addr, src_addr);
+		else
+#endif
+			err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
+					       dst_addr, src_addr, &page,
+					       zeropage, wp_copy);
 		cond_resched();
 
 		if (unlikely(err == -ENOENT)) {
diff --git a/mm/userswap.c b/mm/userswap.c
new file mode 100644
index 0000000000000000000000000000000000000000..2d47f6ed9f9165200e0de0c2408e7b96fa6b9b5d
--- /dev/null
+++ b/mm/userswap.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ *
+ * userswap core file include swap-in and swap-out core function
+ */
+
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
+#include <linux/userswap.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/security.h>
+
+#include "internal.h"
+
+DEFINE_STATIC_KEY_FALSE(userswap_enabled);
+
+static bool vma_uswap_compatible(struct vm_area_struct *vma)
+{
+	if (!vma || !(vma->vm_flags & VM_USWAP) || !vma_is_anonymous(vma) ||
+	    vma->vm_file || vma->vm_flags & (VM_SHARED | VM_LOCKED | VM_STACK |
+					     VM_IO | VM_PFNMAP))
+		return false;
+	return true;
+}
+
+static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none_or_clear_bad(pgd))
+		return NULL;
+
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none_or_clear_bad(p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none_or_clear_bad(pud))
+		return NULL;
+
+	return pud;
+}
+
+static bool is_thp_or_huge(struct mm_struct *mm, unsigned long addr)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pud = get_old_pud(mm, addr);
+	if (!pud)
+		return false;
+	else if (pud_huge(*pud))
+		return true;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd)
+		return false;
+	else if (pmd_huge(*pmd) || pmd_trans_huge(*pmd))
+		return true;
+
+	return false;
+}
+
+/*
+ * Check if pages between 'addr ~ addr+len' can be user swapped. If so, get
+ * the reference of the pages and return the pages through input parameters
+ * 'ppages'.
+ */
+static unsigned long pages_can_be_swapped(struct mm_struct *mm,
+					  unsigned long addr,
+					  unsigned long len,
+					  struct page ***ppages)
+{
+	struct vm_area_struct *vma;
+	struct page *page = NULL;
+	struct page **pages = NULL;
+	unsigned long addr_end = addr + len;
+	unsigned long ret;
+	unsigned long i, page_num = 0;
+	*ppages = NULL;
+
+
+	pages = kmalloc(sizeof(struct page *) * (len / PAGE_SIZE), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	while (addr < addr_end) {
+		vma = find_vma(mm, addr);
+		if (!vma || addr < vma->vm_start ||
+		    !vma_uswap_compatible(vma)) {
+			ret = -EINVAL;
+			goto out_err;
+		}
+
+		if (!(vma->vm_flags & VM_UFFD_MISSING)) {
+			ret = -EAGAIN;
+			goto out_err;
+		}
+get_again:
+		/*
+		 * follow_page will inc page ref, dec the ref after we remap
+		 * the page.
+		 */
+		page = follow_page(vma, addr, FOLL_GET);
+		if (IS_ERR_OR_NULL(page)) {
+			ret = -ENODEV;
+			goto out_err;
+		}
+
+		pages[page_num++] = page;
+		if (!PageAnon(page) || !PageSwapBacked(page) ||
+		    PageHuge(page) || PageSwapCache(page)) {
+			ret = -EINVAL;
+			goto out_err;
+		}
+
+		if (PageTransCompound(page)) {
+			if (trylock_page(page)) {
+				if (!split_huge_page(page)) {
+					unlock_page(page);
+					put_page(page);
+					page_num--;
+					goto get_again;
+				} else
+					unlock_page(page);
+			}
+			ret = -EINVAL;
+			goto out_err;
+		}
+
+		/*
+		 * Check that no O_DIRECT or similar I/O is in progress on the
+		 * page
+		 */
+		if (page_mapcount(page) > 1) {
+			ret = -EBUSY;
+			goto out_err;
+		}
+		addr += PAGE_SIZE;
+	}
+
+	*ppages = pages;
+	return 0;
+
+out_err:
+	for (i = 0; i < page_num; i++)
+		put_page(pages[i]);
+	kfree(pages);
+	return ret;
+}
+
+static void uswap_unmap_anon_page(struct mm_struct *mm,
+				  struct vm_area_struct *vma,
+				  unsigned long addr, struct page *page,
+				  pmd_t *pmd, pte_t *old_pte,
+				  bool set_to_swp)
+{
+	struct mmu_notifier_range range;
+	spinlock_t *ptl;
+	pte_t *pte, _old_pte;
+
+	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma,
+				vma->vm_mm, addr, addr + PAGE_SIZE);
+	mmu_notifier_invalidate_range_start(&range);
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (pte_none(*pte))
+		goto out_release_unlock;
+	flush_cache_page(vma, addr, pte_pfn(*pte));
+	_old_pte = ptep_clear_flush(vma, addr, pte);
+	if (set_to_swp)
+		set_pte_at(mm, addr, pte, swp_entry_to_pte(swp_entry(
+			   SWP_USERSWAP_ENTRY, page_to_pfn(page))));
+
+	dec_mm_counter(mm, MM_ANONPAGES);
+	reliable_page_counter(page, mm, -1);
+	page_remove_rmap(page, false);
+
+out_release_unlock:
+	pte_unmap_unlock(pte, ptl);
+	mmu_notifier_invalidate_range_end(&range);
+	page->mapping = NULL;
+	if (old_pte)
+		*old_pte = _old_pte;
+}
+
+static void uswap_map_anon_page(struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				unsigned long addr,
+				struct page *page,
+				pmd_t *pmd,
+				pte_t old_pte)
+{
+	spinlock_t *ptl;
+	pte_t *pte;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	flush_cache_page(vma, addr, pte_pfn(*pte));
+	set_pte_at(mm, addr, pte, old_pte);
+	inc_mm_counter(mm, MM_ANONPAGES);
+	reliable_page_counter(page, mm, 1);
+	page_add_new_anon_rmap(page, vma, addr, false);
+	pte_unmap_unlock(pte, ptl);
+}
+
+static unsigned long vm_insert_anon_page(struct vm_area_struct *vma,
+					 unsigned long addr, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int ret = 0;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	if (unlikely(anon_vma_prepare(vma)))
+		return -ENOMEM;
+
+	flush_dcache_page(page);
+	pte = get_locked_pte(mm, addr, &ptl);
+	if (!pte)
+		return -ENOMEM;
+	if (!pte_none(*pte)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	inc_mm_counter(mm, MM_ANONPAGES);
+	reliable_page_counter(page, mm, 1);
+	page_add_new_anon_rmap(page, vma, addr, false);
+	set_pte_at(mm, addr, pte, mk_pte(page, vma->vm_page_prot));
+
+out_unlock:
+	pte_unmap_unlock(pte, ptl);
+	return ret;
+}
+
+static void uswapout_recover(struct mm_struct *mm,
+			     unsigned long old_addr_start, unsigned long len,
+			     struct page **pages, unsigned long new_addr_start,
+			     pte_t *ptes)
+{
+	unsigned long unmap_old_addr = old_addr_start;
+	unsigned long unmap_new_addr = new_addr_start;
+	struct page *page;
+	pmd_t *old_pmd, *new_pmd;
+	pte_t pte;
+	unsigned long i;
+
+	for (i = 0; i < len; i++) {
+		page = pages[i];
+		pte = ptes[i];
+		new_pmd = mm_find_pmd(mm, new_addr_start);
+		old_pmd = mm_find_pmd(mm, unmap_old_addr);
+
+		uswap_unmap_anon_page(mm, find_vma(mm, unmap_new_addr),
+				      unmap_new_addr, page, new_pmd, NULL,
+				      false);
+		uswap_map_anon_page(mm, find_vma(mm, unmap_old_addr),
+				    unmap_old_addr, page, old_pmd, pte);
+		unmap_old_addr += PAGE_SIZE;
+		unmap_new_addr += PAGE_SIZE;
+	}
+	if (pte_val(ptes[len]) != 0) {
+		page = pages[len];
+		pte = ptes[len];
+		old_pmd = mm_find_pmd(mm, unmap_old_addr);
+
+		uswap_map_anon_page(mm, find_vma(mm, unmap_old_addr),
+				    unmap_old_addr, page, old_pmd, pte);
+		get_page(page);
+	}
+}
+
+/* unmap the pages between 'addr ~ addr+len' and remap them to a new address */
+static unsigned long do_user_swap(struct mm_struct *mm,
+				  unsigned long old_addr_start,
+				  unsigned long len, struct page **pages,
+				  unsigned long new_addr_start)
+{
+	struct vm_area_struct *old_vma, *new_vma;
+	unsigned long old_addr = old_addr_start;
+	unsigned long new_addr = new_addr_start;
+	struct page *page;
+	pmd_t *pmd;
+	pte_t old_pte, *ptes;
+	bool pages_dirty = false;
+	unsigned long i = 0, j;
+	int ret;
+
+	ptes = kmalloc(sizeof(pte_t) * (len / PAGE_SIZE), GFP_KERNEL);
+	if (!ptes)
+		return -ENOMEM;
+	memset(ptes, 0, sizeof(pte_t) * (len / PAGE_SIZE));
+	lru_add_drain();
+	for (j = 0; j < len; j += PAGE_SIZE) {
+		page = pages[i];
+		ret = -EINVAL;
+		if (!page)
+			goto out_recover;
+		if (is_thp_or_huge(mm, new_addr))
+			goto out_recover;
+		old_vma = find_vma(mm, old_addr);
+		if (!old_vma || old_addr < old_vma->vm_start)
+			goto out_recover;
+		new_vma = find_vma(mm, new_addr);
+		if (!new_vma || new_addr < new_vma->vm_start)
+			goto out_recover;
+
+		ret = -EACCES;
+		if (!(old_vma->vm_flags & VM_WRITE) &&
+		    (new_vma->vm_flags & VM_WRITE))
+			goto out_recover;
+
+		ret = -ENXIO;
+		pmd = mm_find_pmd(mm, old_addr);
+		if (!pmd)
+			goto out_recover;
+		uswap_unmap_anon_page(mm, old_vma, old_addr, page, pmd,
+				      &old_pte, true);
+		ptes[i] = old_pte;
+		if (pte_dirty(old_pte)  || PageDirty(page))
+			pages_dirty = true;
+		put_page(page);
+
+		ret = vm_insert_anon_page(new_vma, new_addr, page);
+		if (ret)
+			goto out_recover;
+		get_page(page);
+
+		old_addr += PAGE_SIZE;
+		new_addr += PAGE_SIZE;
+		i++;
+	}
+
+	if (pages_dirty)
+		new_addr_start = new_addr_start | USWAP_PAGES_DIRTY;
+	kfree(ptes);
+	return new_addr_start;
+
+out_recover:
+	uswapout_recover(mm, old_addr_start, i, pages, new_addr_start, ptes);
+	kfree(ptes);
+	return ret;
+}
+
+
+/*
+ * When flags is MREMAP_USWAP_SET_PTE, uswap_mremap() is called in syscall
+ * mremap.
+ * Unmap the pages between 'addr ~addr+old_len' and remap them to 'new_addr
+ * ~ new_addr+new_len'. Set the pte of old_addr to SWP_USERSWAP_ENTRY.
+ */
+unsigned long uswap_mremap(unsigned long old_addr, unsigned long old_len,
+			   unsigned long new_addr, unsigned long new_len)
+{
+	struct page **pages = NULL;
+	struct mm_struct *mm = current->mm;
+	unsigned long len = old_len;
+	unsigned long ret = -EINVAL;
+	unsigned long i;
+
+	if (!len || old_len != new_len || offset_in_page(old_addr) ||
+	    offset_in_page(new_addr) || (len % PAGE_SIZE))
+		return ret;
+
+	if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len ||
+	    old_addr > TASK_SIZE - old_len)
+		return ret;
+
+	/* Ensure the old/new locations do not overlap */
+	if (old_addr + old_len > new_addr && new_addr + new_len > old_addr)
+		return ret;
+
+	down_read(&mm->mmap_lock);
+	ret = pages_can_be_swapped(mm, old_addr, len, &pages);
+	if (ret) {
+		up_read(&mm->mmap_lock);
+		return ret;
+	}
+
+	ret = do_user_swap(mm, old_addr, len, pages, new_addr);
+	up_read(&mm->mmap_lock);
+	/* follow_page() above increased the reference*/
+	for (i = 0; i < len / PAGE_SIZE; i++)
+		if (pages[i])
+			put_page(pages[i]);
+	kfree(pages);
+	return ret;
+}
+
+int mfill_atomic_pte_nocopy(struct mm_struct *mm,
+			    pmd_t *dst_pmd,
+			    struct vm_area_struct *dst_vma,
+			    unsigned long dst_addr,
+			    unsigned long src_addr)
+{
+	struct vm_area_struct *src_vma;
+	pte_t dst_pte, *pte, src_pte;
+	pmd_t *src_pmd;
+	spinlock_t *ptl;
+	int ret = 0;
+	struct page *page;
+
+	src_vma = find_vma(mm, src_addr);
+	if (!src_vma || src_addr < src_vma->vm_start)
+		return -ENOENT;
+
+	if (src_vma->vm_flags & VM_LOCKED)
+		return -EINVAL;
+
+	page = follow_page(src_vma, src_addr, FOLL_GET | FOLL_MIGRATION);
+	if (!page)
+		return -ENODEV;
+
+	src_pmd = mm_find_pmd(mm, src_addr);
+	if (!src_pmd) {
+		ret = -ENXIO;
+		goto out_put_page;
+	}
+	uswap_unmap_anon_page(mm, src_vma, src_addr, page, src_pmd, &src_pte,
+			      false);
+
+	if (dst_vma->vm_flags & VM_USWAP)
+		ClearPageDirty(page);
+	/*
+	 * The memory barrier inside __SetPageUptodate makes sure that
+	 * preceding stores to the page contents become visible before
+	 * the set_pte_at() write.
+	 */
+	__SetPageUptodate(page);
+
+	dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+	if (dst_vma->vm_flags & VM_WRITE)
+		dst_pte = pte_mkwrite(pte_mkdirty(dst_pte));
+	if (dst_vma->vm_flags & VM_USWAP)
+		dst_pte = pte_mkclean(dst_pte);
+
+	pte = pte_offset_map_lock(mm, dst_pmd, dst_addr, &ptl);
+
+	/*
+	 * The userspace may swap in a large area. Part of the area is not
+	 * swapped out. If concurrent execution, PTE may be present. Skip those
+	 * pages (pte_present).
+	 * No other scenes should be handled except first pagefault (pte_none)
+	 * and after userswap out (SWP_USERSWAP_ENTRY).
+	 */
+	if (pte_present(*pte) || (!pte_none(*pte) &&
+	    swp_type(pte_to_swp_entry(*pte)) != SWP_USERSWAP_ENTRY)) {
+		pte_unmap_unlock(pte, ptl);
+		uswap_map_anon_page(mm, src_vma, src_addr, page, src_pmd,
+				    src_pte);
+		ret = -EEXIST;
+		goto out_put_page;
+	}
+
+	inc_mm_counter(mm, MM_ANONPAGES);
+	reliable_page_counter(page, mm, 1);
+	page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
+	set_pte_at(mm, dst_addr, pte, dst_pte);
+
+	/* No need to invalidate - it was non-present before */
+	update_mmu_cache(dst_vma, dst_addr, pte);
+	pte_unmap_unlock(pte, ptl);
+
+out_put_page:
+	put_page(page);
+	return ret;
+}
+
+bool uswap_register(struct uffdio_register *uffdio_register, bool *uswap_mode)
+{
+	if (!static_branch_unlikely(&userswap_enabled))
+		return true;
+	if (!(uffdio_register->mode & UFFDIO_REGISTER_MODE_USWAP))
+		return true;
+	uffdio_register->mode &= ~UFFDIO_REGISTER_MODE_USWAP;
+	if (!uffdio_register->mode)
+		return false;
+	*uswap_mode = true;
+	return true;
+}
+
+/*
+ * register the whole vma overlapping with the address range to avoid splitting
+ * the vma which could reduce fragmentation.
+ */
+bool uswap_adjust_uffd_range(struct uffdio_register *uffdio_register,
+			     unsigned long *vm_flags, struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	unsigned long end;
+	bool ret = false;
+
+	if (!static_branch_unlikely(&userswap_enabled))
+		return true;
+	end = uffdio_register->range.start + uffdio_register->range.len - 1;
+
+	mmap_read_lock(mm);
+	vma = find_vma(mm, uffdio_register->range.start);
+	if (!vma || vma->vm_start >= end)
+		goto out_unlock;
+	uffdio_register->range.start = vma->vm_start;
+	vma = find_vma(mm, end);
+	if (vma && end >= vma->vm_start)
+		uffdio_register->range.len = vma->vm_end - uffdio_register->range.start;
+
+	*vm_flags |= VM_USWAP;
+
+	ret = true;
+out_unlock:
+	mmap_read_unlock(mm);
+	return ret;
+}
+
+bool do_uswap_page(swp_entry_t entry, struct vm_fault *vmf,
+		   struct vm_area_struct *vma, vm_fault_t *ret)
+{
+	if (!static_branch_unlikely(&userswap_enabled))
+		return true;
+
+	if (swp_type(entry) != SWP_USERSWAP_ENTRY)
+		return true;
+
+	/* print error if we come across a nested fault */
+	if (!strncmp(current->comm, "uswap", 5)) {
+		pr_err("USWAP: fault %lx is triggered by %s\n", vmf->address,
+		       current->comm);
+		*ret = VM_FAULT_SIGBUS;
+		return false;
+	}
+
+	if (!(vma->vm_flags & VM_UFFD_MISSING)) {
+		pr_err("USWAP: addr %lx flags %lx is not a user swap page",
+				vmf->address, vma->vm_flags);
+		return true;
+	}
+
+	*ret = handle_userfault(vmf, VM_UFFD_MISSING | VM_USWAP);
+	return false;
+}
+
+static int __init enable_userswap_setup(char *str)
+{
+	static_branch_enable(&userswap_enabled);
+	return 1;
+}
+__setup("enable_userswap", enable_userswap_setup);