diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index a73702704f38d05c1b60a87c77ff69a19d845551..d3e04208d53c23e36eee552e89e484ef81d8f2c5 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -48,6 +48,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, u
 
 #define flush_icache_page(vma, page)		do {} while (0);
 #define flush_icache_range(start, end)		cache_wbinv_range(start, end)
+#define flush_icache_mm_range(mm, start, end)	cache_wbinv_range(start, end)
+#define flush_icache_deferred(mm)		do {} while (0);
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 do { \
diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c
index ba469953a16e6ee9daf879aba2faa34e8baada27..790f1ebfba44ba925ccd94f1927b61e03137dbc6 100644
--- a/arch/csky/abiv2/cacheflush.c
+++ b/arch/csky/abiv2/cacheflush.c
@@ -28,3 +28,58 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 
 	kunmap_atomic((void *) addr);
 }
+
+void flush_icache_deferred(struct mm_struct *mm)
+{
+	unsigned int cpu = smp_processor_id();
+	cpumask_t *mask = &mm->context.icache_stale_mask;
+
+	if (cpumask_test_cpu(cpu, mask)) {
+		cpumask_clear_cpu(cpu, mask);
+		/*
+		 * Ensure the remote hart's writes are visible to this hart.
+		 * This pairs with a barrier in flush_icache_mm.
+		 */
+		smp_mb();
+		local_icache_inv_all(NULL);
+	}
+}
+
+void flush_icache_mm_range(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
+{
+	unsigned int cpu;
+	cpumask_t others, *mask;
+
+	preempt_disable();
+
+#ifdef CONFIG_CPU_HAS_ICACHE_INS
+	if (mm == current->mm) {
+		icache_inv_range(start, end);
+		preempt_enable();
+		return;
+	}
+#endif
+
+	/* Mark every hart's icache as needing a flush for this MM. */
+	mask = &mm->context.icache_stale_mask;
+	cpumask_setall(mask);
+
+	/* Flush this hart's I$ now, and mark it as flushed. */
+	cpu = smp_processor_id();
+	cpumask_clear_cpu(cpu, mask);
+	local_icache_inv_all(NULL);
+
+	/*
+	 * Flush the I$ of other harts concurrently executing, and mark them as
+	 * flushed.
+	 */
+	cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+
+	if (mm != current->active_mm || !cpumask_empty(&others)) {
+		on_each_cpu_mask(&others, local_icache_inv_all, NULL, 1);
+		cpumask_clear(mask);
+	}
+
+	preempt_enable();
+}
diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h
index 28b7c32331757e0352d1b4681ef8ef0e802538d2..a565e00c3f70b2e51420cf61650c59cf68c199dc 100644
--- a/arch/csky/abiv2/inc/abi/cacheflush.h
+++ b/arch/csky/abiv2/inc/abi/cacheflush.h
@@ -31,15 +31,23 @@ static inline void flush_dcache_page(struct page *page)
 
 #define flush_icache_range(start, end)		cache_wbinv_range(start, end)
 
+void flush_icache_mm_range(struct mm_struct *mm,
+			unsigned long start, unsigned long end);
+void flush_icache_deferred(struct mm_struct *mm);
+
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
 do { \
 	memcpy(dst, src, len); \
-	if (vma->vm_flags & VM_EXEC) \
-		cache_wbinv_range((unsigned long)dst, \
-				  (unsigned long)dst + len); \
+	if (vma->vm_flags & VM_EXEC) { \
+		dcache_wb_range((unsigned long)dst, \
+				(unsigned long)dst + len); \
+		flush_icache_mm_range(current->mm, \
+				(unsigned long)dst, \
+				(unsigned long)dst + len); \
+		} \
 } while (0)
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
diff --git a/arch/csky/include/asm/cacheflush.h b/arch/csky/include/asm/cacheflush.h
index a96da67261ae51fd67f48291489ef709874ac148..f0b8f25429a27f723c191888f624b53c0d778529 100644
--- a/arch/csky/include/asm/cacheflush.h
+++ b/arch/csky/include/asm/cacheflush.h
@@ -4,6 +4,7 @@
 #ifndef __ASM_CSKY_CACHEFLUSH_H
 #define __ASM_CSKY_CACHEFLUSH_H
 
+#include <linux/mm.h>
 #include <abi/cacheflush.h>
 
 #endif /* __ASM_CSKY_CACHEFLUSH_H */
diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h
index b382a14ea4ec72535393b4a6962b1f1020ab0b7e..26fbb1d15df08ea711f73efe26b39bc356312cc0 100644
--- a/arch/csky/include/asm/mmu.h
+++ b/arch/csky/include/asm/mmu.h
@@ -7,6 +7,7 @@
 typedef struct {
 	atomic64_t	asid;
 	void *vdso;
+	cpumask_t	icache_stale_mask;
 } mm_context_t;
 
 #endif /* __ASM_CSKY_MMU_H */
diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h
index 0285b0ad18b6f2bce9276396461f9a1ffc2ed2f2..abdf1f1cb6ec991248ac17d7dc63881c479616ef 100644
--- a/arch/csky/include/asm/mmu_context.h
+++ b/arch/csky/include/asm/mmu_context.h
@@ -43,5 +43,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
 	TLBMISS_HANDLER_SETUP_PGD(next->pgd);
 	write_mmu_entryhi(next->context.asid.counter);
+
+	flush_icache_deferred(next);
 }
 #endif /* __ASM_CSKY_MMU_CONTEXT_H */
diff --git a/arch/csky/mm/syscache.c b/arch/csky/mm/syscache.c
index c4645e4e97f4b70d4cc23be29f20cd3aaaff5db0..ffade2f9a4c87914af5c8322fdfc1b9600a204c1 100644
--- a/arch/csky/mm/syscache.c
+++ b/arch/csky/mm/syscache.c
@@ -3,7 +3,7 @@
 
 #include <linux/syscalls.h>
 #include <asm/page.h>
-#include <asm/cache.h>
+#include <asm/cacheflush.h>
 #include <asm/cachectl.h>
 
 SYSCALL_DEFINE3(cacheflush,
@@ -13,17 +13,14 @@ SYSCALL_DEFINE3(cacheflush,
 {
 	switch (cache) {
 	case ICACHE:
-		icache_inv_range((unsigned long)addr,
-				 (unsigned long)addr + bytes);
-		break;
+	case BCACHE:
+		flush_icache_mm_range(current->mm,
+				(unsigned long)addr,
+				(unsigned long)addr + bytes);
 	case DCACHE:
 		dcache_wb_range((unsigned long)addr,
 				(unsigned long)addr + bytes);
 		break;
-	case BCACHE:
-		cache_wbinv_range((unsigned long)addr,
-				  (unsigned long)addr + bytes);
-		break;
 	default:
 		return -EINVAL;
 	}