Merge branch 'topic/ppc-kvm' into next

Merge in some commits we're sharing with the KVM tree. I manually propagated the change from commit d3d4ffaa ("powerpc/powernv/ioda2: Reduce upper limit for DMA window size") into pci-ioda-tce.c. Conflicts: arch/powerpc/include/asm/cputable.h arch/powerpc/platforms/powernv/pci-ioda.c arch/powerpc/platforms/powernv/pci.h

Merge branch 'topic/ppc-kvm' into next
Merge in some commits we're sharing with the KVM tree. I manually propagated the change from commit d3d4ffaa ("powerpc/powernv/ioda2: Reduce upper limit for DMA window size") into pci-ioda-tce.c. Conflicts: arch/powerpc/include/asm/cputable.h arch/powerpc/platforms/powernv/pci-ioda.c arch/powerpc/platforms/powernv/pci.h
ce57c661 · Michael Ellerman · 9c3250a1 · a68bd126 · ce57c661 · ce57c661
41 changed file
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -32,26 +32,6 @@ static inline int hstate_get_psize(struct hstate *hstate)
 	}
 }

-#define arch_make_huge_pte arch_make_huge_pte
-static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
-				       struct page *page, int writable)
-{
-	unsigned long page_shift;
-
-	if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
-		return entry;
-
-	page_shift = huge_page_shift(hstate_vma(vma));
-	/*
-	 * We don't support 1G hugetlb pages yet.
-	 */
-	VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift);
-	if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift)
-		return __pte(pte_val(entry) | R_PAGE_LARGE);
-	else
-		return entry;
-}
-
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 static inline bool gigantic_page_supported(void)
 {

--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -479,9 +479,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 {
 	if (full && radix_enabled()) {
 		/*
-		 * Let's skip the DD1 style pte update here. We know that
-		 * this is a full mm pte clear and hence can be sure there is
-		 * no parallel set_pte.
+		 * We know that this is a full mm pte clear and
+		 * hence can be sure there is no parallel set_pte.
 		 */
 		return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
 	}

--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -12,12 +12,6 @@
 #include <asm/book3s/64/radix-4k.h>
 #endif

-/*
- * For P9 DD1 only, we need to track whether the pte's huge.
- */
-#define R_PAGE_LARGE	_RPAGE_RSV1
-
-
 #ifndef __ASSEMBLY__
 #include <asm/book3s/64/tlbflush-radix.h>
 #include <asm/cpu_has_feature.h>
@@ -154,20 +148,7 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
 {
 	unsigned long old_pte;

-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-
-		unsigned long new_pte;
-
-		old_pte = __radix_pte_update(ptep, ~0ul, 0);
-		/*
-		 * new value of pte
-		 */
-		new_pte = (old_pte | set) & ~clr;
-		radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr);
-		if (new_pte)
-			__radix_pte_update(ptep, 0, new_pte);
-	} else
-		old_pte = __radix_pte_update(ptep, clr, set);
+	old_pte = __radix_pte_update(ptep, clr, set);
 	if (!huge)
 		assert_pte_locked(mm, addr);

@@ -253,8 +234,6 @@ static inline int radix__pmd_trans_huge(pmd_t pmd)

 static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
 {
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE);
 	return __pmd(pmd_val(pmd) | _PAGE_PTE);
 }

@@ -285,18 +264,14 @@ static inline unsigned long radix__get_tree_size(void)
 	unsigned long rts_field;
 	/*
 	 * We support 52 bits, hence:
-	 *  DD1    52-28 = 24, 0b11000
-	 *  Others 52-31 = 21, 0b10101
+	 * bits 52 - 31 = 21, 0b10101
 	 * RTS encoding details
 	 * bits 0 - 3 of rts -> bits 6 - 8 unsigned long
 	 * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
 	 */
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		rts_field = (0x3UL << 61);
-	else {
-		rts_field = (0x5UL << 5); /* 6 - 8 bits */
-		rts_field |= (0x2UL << 61);
-	}
+	rts_field = (0x5UL << 5); /* 6 - 8 bits */
+	rts_field |= (0x2UL << 61);
+
 	return rts_field;
 }


--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -48,8 +48,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad
 extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
 extern void radix__flush_tlb_all(void);
-extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
-					unsigned long address);

 extern void radix__flush_tlb_lpid_page(unsigned int lpid,
 					unsigned long addr,

--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -210,7 +210,6 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_DAWR			LONG_ASM_CONST(0x0000008000000000)
 #define CPU_FTR_DABRX			LONG_ASM_CONST(0x0000010000000000)
 #define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_POWER9_DD1		LONG_ASM_CONST(0x0000040000000000)
 #define CPU_FTR_POWER9_DD2_1		LONG_ASM_CONST(0x0000080000000000)
 #define CPU_FTR_P9_TM_HV_ASSIST		LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_P9_TM_XER_SO_BUG	LONG_ASM_CONST(0x0000200000000000)
@@ -463,8 +462,6 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
 	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
 	    CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
-#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
-			     (~CPU_FTR_SAO))
 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
 #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
@@ -488,16 +485,14 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
 	     CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \
-	     CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
-	     CPU_FTRS_POWER9_DD2_2)
+	     CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2)
 #else
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
 	     CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
 	     CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
 	     CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \
-	     CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
-	     CPU_FTRS_POWER9_DD2_2)
+	     CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2)
 #endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else
@@ -565,17 +560,15 @@ enum {
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 #define CPU_FTRS_ALWAYS \
 	    (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
-	     CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & \
-	     CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
-	     CPU_FTRS_DT_CPU_BASE)
+	     CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER9 & \
+	     CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_DT_CPU_BASE)
 #else
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
 	     CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
 	     CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
-	     ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
-	     CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
-	     CPU_FTRS_DT_CPU_BASE)
+	     ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & CPU_FTRS_POWER9 & \
+	     CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_DT_CPU_BASE)
 #endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else

--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -69,6 +69,8 @@ struct iommu_table_ops {
 			long index,
 			unsigned long *hpa,
 			enum dma_data_direction *direction);
+
+	__be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
 #endif
 	void (*clear)(struct iommu_table *tbl,
 			long index, long npages);
@@ -117,15 +119,16 @@ struct iommu_table {
 	unsigned long *it_map;       /* A simple allocation bitmap for now */
 	unsigned long  it_page_shift;/* table iommu page size */
 	struct list_head it_group_list;/* List of iommu_table_group_link */
-	unsigned long *it_userspace; /* userspace view of the table */
+	__be64 *it_userspace; /* userspace view of the table */
 	struct iommu_table_ops *it_ops;
 	struct kref    it_kref;
+	int it_nid;
 };

+#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
+		((tbl)->it_ops->useraddrptr((tbl), (entry), false))
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
-		((tbl)->it_userspace ? \
-			&((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
-			NULL)
+		((tbl)->it_ops->useraddrptr((tbl), (entry), true))

 /* Pure 2^n version of get_order */
 static inline __attribute_const__

--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -187,11 +187,6 @@ struct paca_struct {
 	u8 subcore_sibling_mask;
 	/* Flag to request this thread not to stop */
 	atomic_t dont_stop;
-	/*
-	 * Pointer to an array which contains pointer
-	 * to the sibling threads' paca.
-	 */
-	struct paca_struct **thread_sibling_pacas;
 	/* The PSSCR value that the kernel requested before going to stop */
 	u64 requested_psscr;


--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -766,7 +766,6 @@ int main(void)
 	OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
 	OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
 	OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
-	OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
 	OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
 	OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
 #define STOP_SPR(x, f)	OFFSET(x, paca_struct, stop_sprs.f)

--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -466,25 +466,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check_early	= __machine_check_early_realmode_p8,
 		.platform		= "power8",
 	},
-	{	/* Power9 DD1*/
-		.pvr_mask		= 0xffffff00,
-		.pvr_value		= 0x004e0100,
-		.cpu_name		= "POWER9 (raw)",
-		.cpu_features		= CPU_FTRS_POWER9_DD1,
-		.cpu_user_features	= COMMON_USER_POWER9,
-		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/power9",
-		.oprofile_type		= PPC_OPROFILE_INVALID,
-		.cpu_setup		= __setup_cpu_power9,
-		.cpu_restore		= __restore_cpu_power9,
-		.machine_check_early	= __machine_check_early_realmode_p9,
-		.platform		= "power9",
-	},
 	{	/* Power9 DD2.0 */
 		.pvr_mask		= 0xffffefff,
 		.pvr_value		= 0x004e0200,

--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -701,9 +701,7 @@ static __init void cpufeatures_cpu_quirks(void)
 	/*
 	 * Not all quirks can be derived from the cpufeatures device tree.
 	 */
-	if ((version & 0xffffff00) == 0x004e0100)
-		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
-	else if ((version & 0xffffefff) == 0x004e0200)
+	if ((version & 0xffffefff) == 0x004e0200)
 		; /* DD2.0 has no feature flag */
 	else if ((version & 0xffffefff) == 0x004e0201)
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;

--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -276,9 +276,7 @@ BEGIN_FTR_SECTION
 	 *
 	 * This interrupt can wake directly from idle. If that is the case,
 	 * the machine check is handled then the idle wakeup code is called
-	 * to restore state. In that case, the POWER9 DD1 idle PACA workaround
-	 * is not applied in the early machine check code, which will cause
-	 * bugs.
+	 * to restore state.
 	 */
 	mr	r11,r1			/* Save r1 */
 	lhz	r10,PACA_IN_MCE(r13)

--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -466,43 +466,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
 	blr		/* return 0 for wakeup cause / SRR1 value */
 #endif

-/*
- * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
- * HSPRG0 will be set to the HSPRG0 value of one of the
- * threads in this core. Thus the value we have in r13
- * may not be this thread's paca pointer.
- *
- * Fortunately, the TIR remains invariant. Since this thread's
- * paca pointer is recorded in all its sibling's paca, we can
- * correctly recover this thread's paca pointer if we
- * know the index of this thread in the core.
- *
- * This index can be obtained from the TIR.
- *
- * i.e, thread's position in the core = TIR.
- * If this value is i, then this thread's paca is
- * paca->thread_sibling_pacas[i].
- */
-power9_dd1_recover_paca:
-	mfspr	r4, SPRN_TIR
-	/*
-	 * Since each entry in thread_sibling_pacas is 8 bytes
-	 * we need to left-shift by 3 bits. Thus r4 = i * 8
-	 */
-	sldi	r4, r4, 3
-	/* Get &paca->thread_sibling_pacas[0] in r5 */
-	ld	r5, PACA_SIBLING_PACA_PTRS(r13)
-	/* Load paca->thread_sibling_pacas[i] into r13 */
-	ldx	r13, r4, r5
-	SET_PACA(r13)
-	/*
-	 * Indicate that we have lost NVGPR state
-	 * which needs to be restored from the stack.
-	 */
-	li	r3, 1
-	stb	r3,PACA_NAPSTATELOST(r13)
-	blr
-
 /*
 * Called from machine check handler for powersave wakeups.
 * Low level machine check processing has already been done. Now just
@@ -537,9 +500,6 @@ pnv_powersave_wakeup:
 	ld	r2, PACATOC(r13)

 BEGIN_FTR_SECTION
-BEGIN_FTR_SECTION_NESTED(70)
-	bl	power9_dd1_recover_paca
-END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
 	bl	pnv_restore_hyp_resource_arch300
 FTR_SECTION_ELSE
 	bl	pnv_restore_hyp_resource_arch207
@@ -602,22 +562,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
 	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)

-BEGIN_FTR_SECTION_NESTED(71)
-	/*
-	 * Assume that we are waking up from the state
-	 * same as the Requested Level (RL) in the PSSCR
-	 * which are Bits 60-63
-	 */
-	ld	r5,PACA_REQ_PSSCR(r13)
-	rldicl  r5,r5,0,60
-FTR_SECTION_ELSE_NESTED(71)
 	/*
 	 * 0-3 bits correspond to Power-Saving Level Status
 	 * which indicates the idle state we are waking up from
 	 */
 	mfspr	r5, SPRN_PSSCR
 	rldicl  r5,r5,4,60
-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
 	li	r0, 0		/* clear requested_psscr to say we're awake */
 	std	r0, PACA_REQ_PSSCR(r13)
 	cmpd	cr4,r5,r4

--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1250,17 +1250,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
 		 * mappings. If the new process has the foreign real address
 		 * mappings, we must issue a cp_abort to clear any state and
 		 * prevent snooping, corruption or a covert channel.
-		 *
-		 * DD1 allows paste into normal system memory so we do an
-		 * unpaired copy, rather than cp_abort, to clear the buffer,
-		 * since cp_abort is quite expensive.
 		 */
-		if (current_thread_info()->task->thread.used_vas) {
+		if (current_thread_info()->task->thread.used_vas)
 			asm volatile(PPC_CP_ABORT);
-		} else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-			asm volatile(PPC_COPY(%0, %1)
-					: : "r"(dummy_copy_buffer), "r"(0));
-		}
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */


--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -66,10 +66,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	bits = root & RPDS_MASK;
 	root = root & RPDB_MASK;

-	/* P9 DD1 interprets RTS (radix tree size) differently */
 	offset = rts + 31;
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		offset -= 3;

 	/* current implementations only support 52-bit space */
 	if (offset != 52)
@@ -160,17 +157,7 @@ static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
 				      unsigned long clr, unsigned long set,
 				      unsigned long addr, unsigned int shift)
 {
-	unsigned long old = 0;
-
-	if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) &&
-	    pte_present(*ptep)) {
-		/* have to invalidate it first */
-		old = __radix_pte_update(ptep, _PAGE_PRESENT, 0);
-		kvmppc_radix_tlbie_page(kvm, addr, shift);
-		set |= _PAGE_PRESENT;
-		old &= _PAGE_PRESENT;
-	}
-	return __radix_pte_update(ptep, clr, set) | old;
+	return __radix_pte_update(ptep, clr, set);
 }

 void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,

--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -378,19 +378,19 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
 {
 	struct mm_iommu_table_group_mem_t *mem = NULL;
 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);

 	if (!pua)
 		/* it_userspace allocation might be delayed */
 		return H_TOO_HARD;

-	mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
+	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
 	if (!mem)
 		return H_TOO_HARD;

 	mm_iommu_mapped_dec(mem);

-	*pua = 0;
+	*pua = cpu_to_be64(0);

 	return H_SUCCESS;
 }
@@ -437,7 +437,8 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 		enum dma_data_direction dir)
 {
 	long ret;
-	unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	unsigned long hpa;
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
 	struct mm_iommu_table_group_mem_t *mem;

 	if (!pua)
@@ -464,7 +465,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 	if (dir != DMA_NONE)
 		kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);

-	*pua = ua;
+	*pua = cpu_to_be64(ua);

 	return 0;
 }

--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -200,23 +200,19 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
 {
 	struct mm_iommu_table_group_mem_t *mem = NULL;
 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);

 	if (!pua)
 		/* it_userspace allocation might be delayed */
 		return H_TOO_HARD;

-	pua = (void *) vmalloc_to_phys(pua);
-	if (WARN_ON_ONCE_RM(!pua))
-		return H_HARDWARE;
-
-	mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
+	mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
 	if (!mem)
 		return H_TOO_HARD;

 	mm_iommu_mapped_dec(mem);

-	*pua = 0;
+	*pua = cpu_to_be64(0);

 	return H_SUCCESS;
 }
@@ -268,7 +264,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 {
 	long ret;
 	unsigned long hpa = 0;
-	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
 	struct mm_iommu_table_group_mem_t *mem;

 	if (!pua)
@@ -282,10 +278,6 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 	if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
 		return H_HARDWARE;

-	pua = (void *) vmalloc_to_phys(pua);
-	if (WARN_ON_ONCE_RM(!pua))
-		return H_HARDWARE;
-
 	if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
 		return H_CLOSED;

@@ -302,7 +294,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 	if (dir != DMA_NONE)
 		kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);

-	*pua = ua;
+	*pua = cpu_to_be64(ua);

 	return 0;
 }

--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1693,14 +1693,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
 	case KVM_REG_PPC_TB_OFFSET:
-		/*
-		 * POWER9 DD1 has an erratum where writing TBU40 causes
-		 * the timebase to lose ticks.  So we don't let the
-		 * timebase offset be changed on P9 DD1.  (It is
-		 * initialized to zero.)
-		 */
-		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-			break;
 		/* round up to multiple of 2^24 */
 		vcpu->arch.vcore->tb_offset =
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2026,8 +2018,6 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	/*
 	 * Set the default HFSCR for the guest from the host value.
 	 * This value is only used on POWER9.
-	 * On POWER9 DD1, TM doesn't work, so we make sure to
-	 * prevent the guest from using it.
 	 * On POWER9, we want to virtualize the doorbell facility, so we
 	 * turn off the HFSCR bit, which causes those instructions to trap.
 	 */

--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -916,9 +916,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
 	mtspr	SPRN_BESCR, r6
 	mtspr	SPRN_PID, r7
 	mtspr	SPRN_WORT, r8
-BEGIN_FTR_SECTION
-	PPC_INVALIDATE_ERAT
-END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
 BEGIN_FTR_SECTION
 	/* POWER8-only registers */
 	ld	r5, VCPU_TCSCR(r4)
@@ -1912,7 +1909,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	ld	r5, VCPU_KVM(r9)
 	lbz	r0, KVM_RADIX(r5)
 	cmpwi	cr2, r0, 0
-	beq	cr2, 4f
+	beq	cr2, 2f

 	/*
 	 * Radix: do eieio; tlbsync; ptesync sequence in case we
@@ -1952,11 +1949,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	bdnz	1b
 	ptesync

-2:	/* Flush the ERAT on radix P9 DD1 guest exit */
-BEGIN_FTR_SECTION
-	PPC_INVALIDATE_ERAT
-END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-4:
+2:
 #endif /* CONFIG_PPC_RADIX_MMU */

 	/*
@@ -3367,11 +3360,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_CIABR, r0
 	mtspr	SPRN_DAWRX, r0

-	/* Flush the ERAT on radix P9 DD1 guest exit */
-BEGIN_FTR_SECTION
-	PPC_INVALIDATE_ERAT
-END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-
 BEGIN_MMU_FTR_SECTION
 	b	4f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)

--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -25,18 +25,6 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
 	 */
 	eieio();

-	/*
-	 * DD1 bug workaround: If PIPR is less favored than CPPR
-	 * ignore the interrupt or we might incorrectly lose an IPB
-	 * bit.
-	 */
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		__be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
-		u8 pipr = be64_to_cpu(qw1) & 0xff;
-		if (pipr >= xc->hw_cppr)
-			return;
-	}
-
 	/* Perform the acknowledge OS to register cycle. */
 	ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));

@@ -89,8 +77,15 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
-	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
+	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW)
 		opal_int_eoi(hw_irq);
+	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+		/*
+		 * For LSIs the HW EOI cycle is used rather than PQ bits,
+		 * as they are automatically re-triggred in HW when still
+		 * pending.
+		 */
+		__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
 	} else {
 		uint64_t eoi_val;

@@ -102,20 +97,12 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 		 *
 		 * This allows us to then do a re-trigger if Q was set
 		 * rather than synthetizing an interrupt in software
-		 *
-		 * For LSIs, using the HW EOI cycle works around a problem
-		 * on P9 DD1 PHBs where the other ESB accesses don't work
-		 * properly.
 		 */
-		if (xd->flags & XIVE_IRQ_FLAG_LSI)
-			__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
-		else {
-			eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
-
-			/* Re-trigger if needed */
-			if ((eoi_val & 1) && __x_trig_page(xd))
-				__x_writeq(0, __x_trig_page(xd));
-		}
+		eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
+
+		/* Re-trigger if needed */
+		if ((eoi_val & 1) && __x_trig_page(xd))
+			__x_writeq(0, __x_trig_page(xd));
 	}
 }


--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -808,31 +808,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */

-static void update_hid_for_hash(void)
-{
-	unsigned long hid0;
-	unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
-
-	asm volatile("ptesync": : :"memory");
-	/* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
-	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-		     : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
-	asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
-	trace_tlbie(0, 0, rb, 0, 2, 0, 0);
-
-	/*
-	 * now switch the HID
-	 */
-	hid0  = mfspr(SPRN_HID0);
-	hid0 &= ~HID0_POWER9_RADIX;
-	mtspr(SPRN_HID0, hid0);
-	asm volatile("isync": : :"memory");
-
-	/* Wait for it to happen */
-	while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
-		cpu_relax();
-}
-
 static void __init hash_init_partition_table(phys_addr_t hash_table,
 					     unsigned long htab_size)
 {
@@ -845,8 +820,6 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
 	htab_size =  __ilog2(htab_size) - 18;
 	mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
 	pr_info("Partition table %p\n", partition_tb);
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		update_hid_for_hash();
 }

 static void __init htab_initialize(void)
@@ -1077,9 +1050,6 @@ void hash__early_init_mmu_secondary(void)
 	/* Initialize hash table for that CPU */
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {

-		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-			update_hid_for_hash();
-
 		if (!cpu_has_feature(CPU_FTR_ARCH_300))
 			mtspr(SPRN_SDR1, _SDR1);
 		else

--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -621,15 +621,12 @@ static int __init add_huge_page_size(unsigned long long size)
 	 * firmware we only add hugetlb support for page sizes that can be
 	 * supported by linux page table layout.
 	 * For now we have
-	 * Radix: 2M
+	 * Radix: 2M and 1G
 	 * Hash: 16M and 16G
 	 */
 	if (radix_enabled()) {
-		if (mmu_psize != MMU_PAGE_2M) {
-			if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
-			    (mmu_psize != MMU_PAGE_1G))
-				return -EINVAL;
-		}
+		if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+			return -EINVAL;
 	} else {
 		if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
 			return -EINVAL;

--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -273,15 +273,7 @@ void arch_exit_mmap(struct mm_struct *mm)
 #ifdef CONFIG_PPC_RADIX_MMU
 void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 {
-
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		isync();
-		mtspr(SPRN_PID, next->context.id);
-		isync();
-		asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
-	} else {
-		mtspr(SPRN_PID, next->context.id);
-		isync();
-	}
+	mtspr(SPRN_PID, next->context.id);
+	isync();
 }
 #endif
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -226,16 +226,6 @@ void radix__mark_rodata_ro(void)
 {
 	unsigned long start, end;

-	/*
-	 * mark_rodata_ro() will mark itself as !writable at some point.
-	 * Due to DD1 workaround in radix__pte_update(), we'll end up with
-	 * an invalid pte and the system will crash quite severly.
-	 */
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
-		return;
-	}
-
 	start = (unsigned long)_stext;
 	end = (unsigned long)__init_begin;

@@ -533,35 +523,6 @@ void __init radix__early_init_devtree(void)
 	return;
 }

-static void update_hid_for_radix(void)
-{
-	unsigned long hid0;
-	unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
-
-	asm volatile("ptesync": : :"memory");
-	/* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
-	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-		     : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
-	/* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
-	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-		     : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
-	asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
-	trace_tlbie(0, 0, rb, 0, 2, 0, 1);
-	trace_tlbie(0, 0, rb, 0, 2, 1, 1);
-
-	/*
-	 * now switch the HID
-	 */
-	hid0  = mfspr(SPRN_HID0);
-	hid0 |= HID0_POWER9_RADIX;
-	mtspr(SPRN_HID0, hid0);
-	asm volatile("isync": : :"memory");
-
-	/* Wait for it to happen */
-	while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
-		cpu_relax();
-}
-
 static void radix_init_amor(void)
 {
 	/*
@@ -576,22 +537,12 @@ static void radix_init_amor(void)

 static void radix_init_iamr(void)
 {
-	unsigned long iamr;
-
-	/*
-	 * The IAMR should set to 0 on DD1.
-	 */
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		iamr = 0;
-	else
-		iamr = (1ul << 62);
-
 	/*
 	 * Radix always uses key0 of the IAMR to determine if an access is
 	 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
 	 * fetch.
 	 */
-	mtspr(SPRN_IAMR, iamr);
+	mtspr(SPRN_IAMR, (1ul << 62));
 }

 void __init radix__early_init_mmu(void)
@@ -644,8 +595,6 @@ void __init radix__early_init_mmu(void)

 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		radix_init_native();
-		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-			update_hid_for_radix();
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
 		radix_init_partition_table();
@@ -671,10 +620,6 @@ void radix__early_init_mmu_secondary(void)
 	 * update partition table control register and UPRT
 	 */
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
-
-		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-			update_hid_for_radix();
-
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);

@@ -1095,8 +1040,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 	 * To avoid NMMU hang while relaxing access, we need mark
 	 * the pte invalid in between.
 	 */
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
-	    atomic_read(&mm->context.copros) > 0) {
+	if (atomic_read(&mm->context.copros) > 0) {
 		unsigned long old_pte, new_pte;

 		old_pte = __radix_pte_update(ptep, ~0, 0);

--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -1048,24 +1048,6 @@ void radix__flush_tlb_all(void)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }

-void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
-				 unsigned long address)
-{
-	/*
-	 * We track page size in pte only for DD1, So we can
-	 * call this only on DD1.
-	 */
-	if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		VM_WARN_ON(1);
-		return;
-	}
-
-	if (old_pte & R_PAGE_LARGE)
-		radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
-	else
-		radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
-}
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 {

--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -128,10 +128,6 @@ static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
 static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
-static bool use_ic(u64 event)
-{
-	return false;
-}
 #endif /* CONFIG_PPC32 */

 static bool regs_use_siar(struct pt_regs *regs)
@@ -714,14 +710,6 @@ static void pmao_restore_workaround(bool ebb)
 	mtspr(SPRN_PMC6, pmcs[5]);
 }

-static bool use_ic(u64 event)
-{
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1) &&
-			(event == 0x200f2 || event == 0x300f2))
-		return true;
-
-	return false;
-}
 #endif /* CONFIG_PPC64 */

 static void perf_event_interrupt(struct pt_regs *regs);
@@ -1046,7 +1034,6 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
 static void power_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
-	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);

 	if (event->hw.state & PERF_HES_STOPPED)
 		return;
@@ -1056,13 +1043,6 @@ static void power_pmu_read(struct perf_event *event)

 	if (is_ebb_event(event)) {
 		val = read_pmc(event->hw.idx);
-		if (use_ic(event->attr.config)) {
-			val = mfspr(SPRN_IC);
-			if (val > cpuhw->ic_init)
-				val = val - cpuhw->ic_init;
-			else
-				val = val + (0 - cpuhw->ic_init);
-		}
 		local64_set(&event->hw.prev_count, val);
 		return;
 	}
@@ -1076,13 +1056,6 @@ static void power_pmu_read(struct perf_event *event)
 		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		val = read_pmc(event->hw.idx);
-		if (use_ic(event->attr.config)) {
-			val = mfspr(SPRN_IC);
-			if (val > cpuhw->ic_init)
-				val = val - cpuhw->ic_init;
-			else
-				val = val + (0 - cpuhw->ic_init);
-		}
 		delta = check_and_compute_delta(prev, val);
 		if (!delta)
 			return;
@@ -1535,13 +1508,6 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 					event->attr.branch_sample_type);
 	}

-	/*
-	 * Workaround for POWER9 DD1 to use the Instruction Counter
-	 * register value for instruction counting
-	 */
-	if (use_ic(event->attr.config))
-		cpuhw->ic_init = mfspr(SPRN_IC);
-
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	return ret;

--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -59,7 +59,7 @@ static bool is_event_valid(u64 event)
 {
 	u64 valid_mask = EVENT_VALID_MASK;

-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		valid_mask = p9_EVENT_VALID_MASK;

 	return !(event & ~valid_mask);
@@ -86,8 +86,6 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 	 * Incase of Power9:
 	 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
 	 *               or if group already have any marked events.
-	 * Non-Marked events (for DD1):
-	 *	MMCRA[SDAR_MODE] will be set to 0b01
 	 * For rest
 	 *	MMCRA[SDAR_MODE] will be set from event code.
 	 *      If sdar_mode from event is zero, default to 0b01. Hardware
@@ -96,7 +94,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
 			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
-		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
+		else if (p9_SDAR_MODE(event))
 			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
 		else
 			*mmcra |= MMCRA_SDAR_MODE_DCACHE;
@@ -106,7 +104,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)

 static u64 thresh_cmp_val(u64 value)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return value << p9_MMCRA_THR_CMP_SHIFT;

 	return value << MMCRA_THR_CMP_SHIFT;
@@ -114,7 +112,7 @@ static u64 thresh_cmp_val(u64 value)

 static unsigned long combine_from_event(u64 event)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return p9_EVENT_COMBINE(event);

 	return EVENT_COMBINE(event);
@@ -122,7 +120,7 @@ static unsigned long combine_from_event(u64 event)

 static unsigned long combine_shift(unsigned long pmc)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return p9_MMCR1_COMBINE_SHIFT(pmc);

 	return MMCR1_COMBINE_SHIFT(pmc);

--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -158,11 +158,6 @@
 	CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
 	CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL

-/*
- * Lets restrict use of PMC5 for instruction counting.
- */
-#define P9_DD1_TEST_ADDER	(ISA207_TEST_ADDER | CNST_PMC_VAL(5))
-
 /* Bits in MMCR1 for PowerISA v2.07 */
 #define MMCR1_UNIT_SHIFT(pmc)		(60 - (4 * ((pmc) - 1)))
 #define MMCR1_COMBINE_SHIFT(pmc)	(35 - ((pmc) - 1))

--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -219,12 +219,6 @@ static struct attribute_group power9_pmu_events_group = {
 	.attrs = power9_events_attr,
 };

-static const struct attribute_group *power9_isa207_pmu_attr_groups[] = {
-	&isa207_pmu_format_group,
-	&power9_pmu_events_group,
-	NULL,
-};
-
 PMU_FORMAT_ATTR(event,		"config:0-51");
 PMU_FORMAT_ATTR(pmcxsel,	"config:0-7");
 PMU_FORMAT_ATTR(mark,		"config:8");
@@ -267,17 +261,6 @@ static const struct attribute_group *power9_pmu_attr_groups[] = {
 	NULL,
 };

-static int power9_generic_events_dd1[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_ICT_NOSLOT_CYC,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PM_CMPLU_STALL,
-	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_DISP,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL_ALT,
-	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
-	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
-	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1_FIN,
-};
-
 static int power9_generic_events[] = {
 	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_ICT_NOSLOT_CYC,
@@ -439,25 +422,6 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {

 #undef C

-static struct power_pmu power9_isa207_pmu = {
-	.name			= "POWER9",
-	.n_counter		= MAX_PMU_COUNTERS,
-	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= P9_DD1_TEST_ADDER,
-	.compute_mmcr		= isa207_compute_mmcr,
-	.config_bhrb		= power9_config_bhrb,
-	.bhrb_filter_map	= power9_bhrb_filter_map,
-	.get_constraint		= isa207_get_constraint,
-	.get_alternatives	= power9_get_alternatives,
-	.disable_pmc		= isa207_disable_pmc,
-	.flags			= PPMU_NO_SIAR | PPMU_ARCH_207S,
-	.n_generic		= ARRAY_SIZE(power9_generic_events_dd1),
-	.generic_events		= power9_generic_events_dd1,
-	.cache_events		= &power9_cache_events,
-	.attr_groups		= power9_isa207_pmu_attr_groups,
-	.bhrb_nr		= 32,
-};
-
 static struct power_pmu power9_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
@@ -500,23 +464,7 @@ static int __init init_power9_pmu(void)
 		}
 	}

-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		/*
-		 * Since PM_INST_CMPL may not provide right counts in all
-		 * sampling scenarios in power9 DD1, instead use PM_INST_DISP.
-		 */
-		EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP;
-		/*
-		 * Power9 DD1 should use PM_BR_CMPL_ALT event code for
-		 * "branches" to provide correct counter value.
-		 */
-		EVENT_VAR(PM_BR_CMPL, _g).id = PM_BR_CMPL_ALT;
-		EVENT_VAR(PM_BR_CMPL, _c).id = PM_BR_CMPL_ALT;
-		rc = register_power_pmu(&power9_isa207_pmu);
-	} else {
-		rc = register_power_pmu(&power9_pmu);
-	}
-
+	rc = register_power_pmu(&power9_pmu);
 	if (rc)
 		return rc;


--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,7 +6,7 @@ obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 obj-y			+= opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o

 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o
+obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o

--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -177,11 +177,6 @@ static void pnv_alloc_idle_core_states(void)
 			paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
 			paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
 			paca_ptrs[cpu]->thread_mask = 1 << j;
-			if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
-				continue;
-			paca_ptrs[cpu]->thread_sibling_pacas =
-				kmalloc_node(paca_ptr_array_size,
-					     GFP_KERNEL, node);
 		}
 	}

@@ -805,29 +800,6 @@ static int __init pnv_init_idle_states(void)

 	pnv_alloc_idle_core_states();

-	/*
-	 * For each CPU, record its PACA address in each of it's
-	 * sibling thread's PACA at the slot corresponding to this
-	 * CPU's index in the core.
-	 */
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		int cpu;
-
-		pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
-		for_each_present_cpu(cpu) {
-			int base_cpu = cpu_first_thread_sibling(cpu);
-			int idx = cpu_thread_in_core(cpu);
-			int i;
-
-			for (i = 0; i < threads_per_core; i++) {
-				int j = base_cpu + i;
-
-				paca_ptrs[j]->thread_sibling_pacas[idx] =
-					paca_ptrs[cpu];
-			}
-		}
-	}
-
 	if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
 		ppc_md.power_save = power7_idle;


--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * TCE helpers for IODA PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/iommu.h>
+
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include "pci.h"
+
+void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+		void *tce_mem, u64 tce_size,
+		u64 dma_offset, unsigned int page_shift)
+{
+	tbl->it_blocksize = 16;
+	tbl->it_base = (unsigned long)tce_mem;
+	tbl->it_page_shift = page_shift;
+	tbl->it_offset = dma_offset >> tbl->it_page_shift;
+	tbl->it_index = 0;
+	tbl->it_size = tce_size >> 3;
+	tbl->it_busno = 0;
+	tbl->it_type = TCE_PCI;
+}
+
+static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
+{
+	struct page *tce_mem = NULL;
+	__be64 *addr;
+
+	tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
+	if (!tce_mem) {
+		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
+				shift);
+		return NULL;
+	}
+	addr = page_address(tce_mem);
+	memset(addr, 0, 1UL << shift);
+
+	return addr;
+}
+
+static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
+{
+	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+	int  level = tbl->it_indirect_levels;
+	const long shift = ilog2(tbl->it_level_size);
+	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+	while (level) {
+		int n = (idx & mask) >> (level * shift);
+		unsigned long tce;
+
+		if (tmp[n] == 0) {
+			__be64 *tmp2;
+
+			if (!alloc)
+				return NULL;
+
+			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
+					ilog2(tbl->it_level_size) + 3);
+			if (!tmp2)
+				return NULL;
+
+			tmp[n] = cpu_to_be64(__pa(tmp2) |
+					TCE_PCI_READ | TCE_PCI_WRITE);
+		}
+		tce = be64_to_cpu(tmp[n]);
+
+		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+		idx &= ~mask;
+		mask >>= shift;
+		--level;
+	}
+
+	return tmp + idx;
+}
+
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	u64 proto_tce = iommu_direction_to_tce_perm(direction);
+	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+	long i;
+
+	if (proto_tce & TCE_PCI_WRITE)
+		proto_tce |= TCE_PCI_READ;
+
+	for (i = 0; i < npages; i++) {
+		unsigned long newtce = proto_tce |
+			((rpn + i) << tbl->it_page_shift);
+		unsigned long idx = index - tbl->it_offset + i;
+
+		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+		unsigned long *hpa, enum dma_data_direction *direction,
+		bool alloc)
+{
+	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+	unsigned long newtce = *hpa | proto_tce, oldtce;
+	unsigned long idx = index - tbl->it_offset;
+	__be64 *ptce = NULL;
+
+	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+	if (*direction == DMA_NONE) {
+		ptce = pnv_tce(tbl, false, idx, false);
+		if (!ptce) {
+			*hpa = 0;
+			return 0;
+		}
+	}
+
+	if (!ptce) {
+		ptce = pnv_tce(tbl, false, idx, alloc);
+		if (!ptce)
+			return alloc ? H_HARDWARE : H_TOO_HARD;
+	}
+
+	if (newtce & TCE_PCI_WRITE)
+		newtce |= TCE_PCI_READ;
+
+	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
+	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+	*direction = iommu_tce_direction(oldtce);
+
+	return 0;
+}
+
+__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
+{
+	if (WARN_ON_ONCE(!tbl->it_userspace))
+		return NULL;
+
+	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
+}
+#endif
+
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+{
+	long i;
+
+	for (i = 0; i < npages; i++) {
+		unsigned long idx = index - tbl->it_offset + i;
+		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
+
+		if (ptce)
+			*ptce = cpu_to_be64(0);
+	}
+}
+
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
+
+	if (!ptce)
+		return 0;
+
+	return be64_to_cpu(*ptce);
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+		unsigned long size, unsigned int levels)
+{
+	const unsigned long addr_ul = (unsigned long) addr &
+			~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+	if (levels) {
+		long i;
+		u64 *tmp = (u64 *) addr_ul;
+
+		for (i = 0; i < size; ++i) {
+			unsigned long hpa = be64_to_cpu(tmp[i]);
+
+			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+				continue;
+
+			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+					levels - 1);
+		}
+	}
+
+	free_pages(addr_ul, get_order(size << 3));
+}
+
+void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+	const unsigned long size = tbl->it_indirect_levels ?
+			tbl->it_level_size : tbl->it_size;
+
+	if (!tbl->it_size)
+		return;
+
+	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+			tbl->it_indirect_levels);
+	if (tbl->it_userspace) {
+		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
+				tbl->it_indirect_levels);
+	}
+}
+
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+		unsigned int levels, unsigned long limit,
+		unsigned long *current_offset, unsigned long *total_allocated)
+{
+	__be64 *addr, *tmp;
+	unsigned long allocated = 1UL << shift;
+	unsigned int entries = 1UL << (shift - 3);
+	long i;
+
+	addr = pnv_alloc_tce_level(nid, shift);
+	*total_allocated += allocated;
+
+	--levels;
+	if (!levels) {
+		*current_offset += allocated;
+		return addr;
+	}
+
+	for (i = 0; i < entries; ++i) {
+		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+				levels, limit, current_offset, total_allocated);
+		if (!tmp)
+			break;
+
+		addr[i] = cpu_to_be64(__pa(tmp) |
+				TCE_PCI_READ | TCE_PCI_WRITE);
+
+		if (*current_offset >= limit)
+			break;
+	}
+
+	return addr;
+}
+
+long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+		__u32 page_shift, __u64 window_size, __u32 levels,
+		bool alloc_userspace_copy, struct iommu_table *tbl)
+{
+	void *addr, *uas = NULL;
+	unsigned long offset = 0, level_shift, total_allocated = 0;
+	unsigned long total_allocated_uas = 0;
+	const unsigned int window_shift = ilog2(window_size);
+	unsigned int entries_shift = window_shift - page_shift;
+	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
+			PAGE_SHIFT);
+	const unsigned long tce_table_size = 1UL << table_shift;
+	unsigned int tmplevels = levels;
+
+	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+		return -EINVAL;
+
+	if (!is_power_of_2(window_size))
+		return -EINVAL;
+
+	if (alloc_userspace_copy && (window_size > (1ULL << 32)))
+		tmplevels = 1;
+
+	/* Adjust direct table size from window_size and levels */
+	entries_shift = (entries_shift + levels - 1) / levels;
+	level_shift = entries_shift + 3;
+	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
+
+	if ((level_shift - 3) * levels + page_shift >= 60)
+		return -EINVAL;
+
+	/* Allocate TCE table */
+	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+			tmplevels, tce_table_size, &offset, &total_allocated);
+
+	/* addr==NULL means that the first level allocation failed */
+	if (!addr)
+		return -ENOMEM;
+
+	/*
+	 * First level was allocated but some lower level failed as
+	 * we did not allocate as much as we wanted,
+	 * release partially allocated table.
+	 */
+	if (tmplevels == levels && offset < tce_table_size)
+		goto free_tces_exit;
+
+	/* Allocate userspace view of the TCE table */
+	if (alloc_userspace_copy) {
+		offset = 0;
+		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+				levels, tce_table_size, &offset,
+				&total_allocated_uas);
+		if (!uas)
+			goto free_tces_exit;
+		if (tmplevels == levels && (offset < tce_table_size ||
+				total_allocated_uas != total_allocated))
+			goto free_uas_exit;
+	}
+
+	/* Setup linux iommu table */
+	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+			page_shift);
+	tbl->it_level_size = 1ULL << (level_shift - 3);
+	tbl->it_indirect_levels = levels - 1;
+	tbl->it_allocated_size = total_allocated;
+	tbl->it_userspace = uas;
+	tbl->it_nid = nid;
+
+	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
+			window_size, tce_table_size, bus_offset, tbl->it_base,
+			tbl->it_userspace, tmplevels, levels);
+
+	return 0;
+
+free_uas_exit:
+	pnv_pci_ioda2_table_do_free_pages(uas,
+			1ULL << (level_shift - 3), levels - 1);
+free_tces_exit:
+	pnv_pci_ioda2_table_do_free_pages(addr,
+			1ULL << (level_shift - 3), levels - 1);
+
+	return -ENOMEM;
+}
+
+static void pnv_iommu_table_group_link_free(struct rcu_head *head)
+{
+	struct iommu_table_group_link *tgl = container_of(head,
+			struct iommu_table_group_link, rcu);
+
+	kfree(tgl);
+}
+
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+		struct iommu_table_group *table_group)
+{
+	long i;
+	bool found;
+	struct iommu_table_group_link *tgl;
+
+	if (!tbl || !table_group)
+		return;
+
+	/* Remove link to a group from table's list of attached groups */
+	found = false;
+	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+		if (tgl->table_group == table_group) {
+			list_del_rcu(&tgl->next);
+			call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+			found = true;
+			break;
+		}
+	}
+	if (WARN_ON(!found))
+		return;
+
+	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+	found = false;
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		if (table_group->tables[i] == tbl) {
+			table_group->tables[i] = NULL;
+			found = true;
+			break;
+		}
+	}
+	WARN_ON(!found);
+}
+
+long pnv_pci_link_table_and_group(int node, int num,
+		struct iommu_table *tbl,
+		struct iommu_table_group *table_group)
+{
+	struct iommu_table_group_link *tgl = NULL;
+
+	if (WARN_ON(!tbl || !table_group))
+		return -EINVAL;
+
+	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+			node);
+	if (!tgl)
+		return -ENOMEM;
+
+	tgl->table_group = table_group;
+	list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+	table_group->tables[num] = tbl;
+
+	return 0;
+}
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -51,12 +51,8 @@
 #define PNV_IODA1_M64_SEGS	8	/* Segments per M64 BAR	*/
 #define PNV_IODA1_DMA32_SEGSIZE	0x10000000

-#define POWERNV_IOMMU_DEFAULT_LEVELS	1
-#define POWERNV_IOMMU_MAX_LEVELS	5
-
 static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
 					      "NPU_OCAPI" };
-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);

 void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 			    const char *fmt, ...)
@@ -2007,7 +2003,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
 static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
 		unsigned long *hpa, enum dma_data_direction *direction)
 {
-	long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+	long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);

 	if (!ret)
 		pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
@@ -2018,7 +2014,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
 static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
 		unsigned long *hpa, enum dma_data_direction *direction)
 {
-	long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+	long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);

 	if (!ret)
 		pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
@@ -2040,6 +2036,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
 #ifdef CONFIG_IOMMU_API
 	.exchange = pnv_ioda1_tce_xchg,
 	.exchange_rm = pnv_ioda1_tce_xchg_rm,
+	.useraddrptr = pnv_tce_useraddrptr,
 #endif
 	.clear = pnv_ioda1_tce_free,
 	.get = pnv_tce_get,
@@ -2171,7 +2168,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
 static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
 		unsigned long *hpa, enum dma_data_direction *direction)
 {
-	long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+	long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);

 	if (!ret)
 		pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
@@ -2182,7 +2179,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
 static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
 		unsigned long *hpa, enum dma_data_direction *direction)
 {
-	long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+	long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);

 	if (!ret)
 		pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
@@ -2199,20 +2196,16 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
 	pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
 }

-static void pnv_ioda2_table_free(struct iommu_table *tbl)
-{
-	pnv_pci_ioda2_table_free_pages(tbl);
-}
-
 static struct iommu_table_ops pnv_ioda2_iommu_ops = {
 	.set = pnv_ioda2_tce_build,
 #ifdef CONFIG_IOMMU_API
 	.exchange = pnv_ioda2_tce_xchg,
 	.exchange_rm = pnv_ioda2_tce_xchg_rm,
+	.useraddrptr = pnv_tce_useraddrptr,
 #endif
 	.clear = pnv_ioda2_tce_free,
 	.get = pnv_tce_get,
-	.free = pnv_ioda2_table_free,
+	.free = pnv_pci_ioda2_table_free_pages,
 };

 static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
@@ -2462,13 +2455,9 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
 		pe->tce_bypass_enabled = enable;
 }

-static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
-		__u32 page_shift, __u64 window_size, __u32 levels,
-		struct iommu_table *tbl);
-
 static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
 		int num, __u32 page_shift, __u64 window_size, __u32 levels,
-		struct iommu_table **ptbl)
+		bool alloc_userspace_copy, struct iommu_table **ptbl)
 {
 	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
 			table_group);
@@ -2485,7 +2474,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,

 	ret = pnv_pci_ioda2_table_alloc_pages(nid,
 			bus_offset, page_shift, window_size,
-			levels, tbl);
+			levels, alloc_userspace_copy, tbl);
 	if (ret) {
 		iommu_tce_table_put(tbl);
 		return ret;
@@ -2518,7 +2507,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
 	rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
 			IOMMU_PAGE_SHIFT_4K,
 			window_size,
-			POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
+			POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
 	if (rc) {
 		pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
 				rc);
@@ -2605,7 +2594,16 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
 				tce_table_size, direct_table_size);
 	}

-	return bytes;
+	return bytes + bytes; /* one for HW table, one for userspace copy */
+}
+
+static long pnv_pci_ioda2_create_table_userspace(
+		struct iommu_table_group *table_group,
+		int num, __u32 page_shift, __u64 window_size, __u32 levels,
+		struct iommu_table **ptbl)
+{
+	return pnv_pci_ioda2_create_table(table_group,
+			num, page_shift, window_size, levels, true, ptbl);
 }

 static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
@@ -2634,7 +2632,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)

 static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
 	.get_table_size = pnv_pci_ioda2_get_table_size,
-	.create_table = pnv_pci_ioda2_create_table,
+	.create_table = pnv_pci_ioda2_create_table_userspace,
 	.set_window = pnv_pci_ioda2_set_window,
 	.unset_window = pnv_pci_ioda2_unset_window,
 	.take_ownership = pnv_ioda2_take_ownership,
@@ -2739,7 +2737,7 @@ static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)

 static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
 	.get_table_size = pnv_pci_ioda2_get_table_size,
-	.create_table = pnv_pci_ioda2_create_table,
+	.create_table = pnv_pci_ioda2_create_table_userspace,
 	.set_window = pnv_pci_ioda2_npu_set_window,
 	.unset_window = pnv_pci_ioda2_npu_unset_window,
 	.take_ownership = pnv_ioda2_npu_take_ownership,
@@ -2773,144 +2771,6 @@ static void pnv_pci_ioda_setup_iommu_api(void)
 static void pnv_pci_ioda_setup_iommu_api(void) { };
 #endif

-static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
-		unsigned levels, unsigned long limit,
-		unsigned long *current_offset, unsigned long *total_allocated)
-{
-	struct page *tce_mem = NULL;
-	__be64 *addr, *tmp;
-	unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
-	unsigned long allocated = 1UL << (order + PAGE_SHIFT);
-	unsigned entries = 1UL << (shift - 3);
-	long i;
-
-	tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
-	if (!tce_mem) {
-		pr_err("Failed to allocate a TCE memory, order=%d\n", order);
-		return NULL;
-	}
-	addr = page_address(tce_mem);
-	memset(addr, 0, allocated);
-	*total_allocated += allocated;
-
-	--levels;
-	if (!levels) {
-		*current_offset += allocated;
-		return addr;
-	}
-
-	for (i = 0; i < entries; ++i) {
-		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
-				levels, limit, current_offset, total_allocated);
-		if (!tmp)
-			break;
-
-		addr[i] = cpu_to_be64(__pa(tmp) |
-				TCE_PCI_READ | TCE_PCI_WRITE);
-
-		if (*current_offset >= limit)
-			break;
-	}
-
-	return addr;
-}
-
-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
-		unsigned long size, unsigned level);
-
-static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
-		__u32 page_shift, __u64 window_size, __u32 levels,
-		struct iommu_table *tbl)
-{
-	void *addr;
-	unsigned long offset = 0, level_shift, total_allocated = 0;
-	const unsigned window_shift = ilog2(window_size);
-	unsigned entries_shift = window_shift - page_shift;
-	unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
-	const unsigned long tce_table_size = 1UL << table_shift;
-
-	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
-		return -EINVAL;
-
-	if (!is_power_of_2(window_size))
-		return -EINVAL;
-
-	/* Adjust direct table size from window_size and levels */
-	entries_shift = (entries_shift + levels - 1) / levels;
-	level_shift = entries_shift + 3;
-	level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
-
-	if ((level_shift - 3) * levels + page_shift >= 55)
-		return -EINVAL;
-
-	/* Allocate TCE table */
-	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
-			levels, tce_table_size, &offset, &total_allocated);
-
-	/* addr==NULL means that the first level allocation failed */
-	if (!addr)
-		return -ENOMEM;
-
-	/*
-	 * First level was allocated but some lower level failed as
-	 * we did not allocate as much as we wanted,
-	 * release partially allocated table.
-	 */
-	if (offset < tce_table_size) {
-		pnv_pci_ioda2_table_do_free_pages(addr,
-				1ULL << (level_shift - 3), levels - 1);
-		return -ENOMEM;
-	}
-
-	/* Setup linux iommu table */
-	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
-			page_shift);
-	tbl->it_level_size = 1ULL << (level_shift - 3);
-	tbl->it_indirect_levels = levels - 1;
-	tbl->it_allocated_size = total_allocated;
-
-	pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
-			window_size, tce_table_size, bus_offset);
-
-	return 0;
-}
-
-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
-		unsigned long size, unsigned level)
-{
-	const unsigned long addr_ul = (unsigned long) addr &
-			~(TCE_PCI_READ | TCE_PCI_WRITE);
-
-	if (level) {
-		long i;
-		u64 *tmp = (u64 *) addr_ul;
-
-		for (i = 0; i < size; ++i) {
-			unsigned long hpa = be64_to_cpu(tmp[i]);
-
-			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
-				continue;
-
-			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
-					level - 1);
-		}
-	}
-
-	free_pages(addr_ul, get_order(size << 3));
-}
-
-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
-{
-	const unsigned long size = tbl->it_indirect_levels ?
-			tbl->it_level_size : tbl->it_size;
-
-	if (!tbl->it_size)
-		return;
-
-	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
-			tbl->it_indirect_levels);
-}
-
 static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
 {
 	struct pci_controller *hose = phb->hose;

--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -802,85 +802,6 @@ struct pci_ops pnv_pci_ops = {
 	.write = pnv_pci_write_config,
 };

-static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
-{
-	__be64 *tmp = ((__be64 *)tbl->it_base);
-	int  level = tbl->it_indirect_levels;
-	const long shift = ilog2(tbl->it_level_size);
-	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
-
-	while (level) {
-		int n = (idx & mask) >> (level * shift);
-		unsigned long tce = be64_to_cpu(tmp[n]);
-
-		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
-		idx &= ~mask;
-		mask >>= shift;
-		--level;
-	}
-
-	return tmp + idx;
-}
-
-int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
-		unsigned long uaddr, enum dma_data_direction direction,
-		unsigned long attrs)
-{
-	u64 proto_tce = iommu_direction_to_tce_perm(direction);
-	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
-	long i;
-
-	if (proto_tce & TCE_PCI_WRITE)
-		proto_tce |= TCE_PCI_READ;
-
-	for (i = 0; i < npages; i++) {
-		unsigned long newtce = proto_tce |
-			((rpn + i) << tbl->it_page_shift);
-		unsigned long idx = index - tbl->it_offset + i;
-
-		*(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_IOMMU_API
-int pnv_tce_xchg(struct iommu_table *tbl, long index,
-		unsigned long *hpa, enum dma_data_direction *direction)
-{
-	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
-	unsigned long newtce = *hpa | proto_tce, oldtce;
-	unsigned long idx = index - tbl->it_offset;
-
-	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
-
-	if (newtce & TCE_PCI_WRITE)
-		newtce |= TCE_PCI_READ;
-
-	oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
-	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
-	*direction = iommu_tce_direction(oldtce);
-
-	return 0;
-}
-#endif
-
-void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
-{
-	long i;
-
-	for (i = 0; i < npages; i++) {
-		unsigned long idx = index - tbl->it_offset + i;
-
-		*(pnv_tce(tbl, idx)) = cpu_to_be64(0);
-	}
-}
-
-unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
-{
-	return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
-}
-
 struct iommu_table *pnv_pci_table_alloc(int nid)
 {
 	struct iommu_table *tbl;
@@ -895,85 +816,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
 	return tbl;
 }

-long pnv_pci_link_table_and_group(int node, int num,
-		struct iommu_table *tbl,
-		struct iommu_table_group *table_group)
-{
-	struct iommu_table_group_link *tgl = NULL;
-
-	if (WARN_ON(!tbl || !table_group))
-		return -EINVAL;
-
-	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
-			node);
-	if (!tgl)
-		return -ENOMEM;
-
-	tgl->table_group = table_group;
-	list_add_rcu(&tgl->next, &tbl->it_group_list);
-
-	table_group->tables[num] = tbl;
-
-	return 0;
-}
-
-static void pnv_iommu_table_group_link_free(struct rcu_head *head)
-{
-	struct iommu_table_group_link *tgl = container_of(head,
-			struct iommu_table_group_link, rcu);
-
-	kfree(tgl);
-}
-
-void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
-		struct iommu_table_group *table_group)
-{
-	long i;
-	bool found;
-	struct iommu_table_group_link *tgl;
-
-	if (!tbl || !table_group)
-		return;
-
-	/* Remove link to a group from table's list of attached groups */
-	found = false;
-	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
-		if (tgl->table_group == table_group) {
-			list_del_rcu(&tgl->next);
-			call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
-			found = true;
-			break;
-		}
-	}
-	if (WARN_ON(!found))
-		return;
-
-	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
-	found = false;
-	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		if (table_group->tables[i] == tbl) {
-			table_group->tables[i] = NULL;
-			found = true;
-			break;
-		}
-	}
-	WARN_ON(!found);
-}
-
-void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
-			       void *tce_mem, u64 tce_size,
-			       u64 dma_offset, unsigned page_shift)
-{
-	tbl->it_blocksize = 16;
-	tbl->it_base = (unsigned long)tce_mem;
-	tbl->it_page_shift = page_shift;
-	tbl->it_offset = dma_offset >> tbl->it_page_shift;
-	tbl->it_index = 0;
-	tbl->it_size = tce_size >> 3;
-	tbl->it_busno = 0;
-	tbl->it_type = TCE_PCI;
-}
-
 void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 {
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);

--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -197,13 +197,6 @@ struct pnv_phb {
 };

 extern struct pci_ops pnv_pci_ops;
-extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
-		unsigned long uaddr, enum dma_data_direction direction,
-		unsigned long attrs);
-extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
-extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
-		unsigned long *hpa, enum dma_data_direction *direction);
-extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);

 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
 				unsigned char *log_buff);
@@ -213,14 +206,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
 		      int where, int size, u32 val);
 extern struct iommu_table *pnv_pci_table_alloc(int nid);

-extern long pnv_pci_link_table_and_group(int node, int num,
-		struct iommu_table *tbl,
-		struct iommu_table_group *table_group);
-extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
-		struct iommu_table_group *table_group);
-extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
-				      void *tce_mem, u64 tce_size,
-				      u64 dma_offset, unsigned page_shift);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_init_npu_phb(struct device_node *np);
@@ -257,4 +242,33 @@ extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
 extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
 extern int pnv_npu2_init(struct pnv_phb *phb);

+/* pci-ioda-tce.c */
+#define POWERNV_IOMMU_DEFAULT_LEVELS	1
+#define POWERNV_IOMMU_MAX_LEVELS	5
+
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		unsigned long attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+		unsigned long *hpa, enum dma_data_direction *direction,
+		bool alloc);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
+		bool alloc);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+		__u32 page_shift, __u64 window_size, __u32 levels,
+		bool alloc_userspace_copy, struct iommu_table *tbl);
+extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+		struct iommu_table *tbl,
+		struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+		struct iommu_table_group *table_group);
+extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+		void *tce_mem, u64 tce_size,
+		u64 dma_offset, unsigned int page_shift);
+
 #endif /* __POWERNV_PCI_H */
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -283,23 +283,6 @@ static void pnv_cause_ipi(int cpu)
 	ic_cause_ipi(cpu);
 }

-static void pnv_p9_dd1_cause_ipi(int cpu)
-{
-	int this_cpu = get_cpu();
-
-	/*
-	 * POWER9 DD1 has a global addressed msgsnd, but for now we restrict
-	 * IPIs to same core, because it requires additional synchronization
-	 * for inter-core doorbells which we do not implement.
-	 */
-	if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu)))
-		doorbell_global_ipi(cpu);
-	else
-		ic_cause_ipi(cpu);
-
-	put_cpu();
-}
-
 static void __init pnv_smp_probe(void)
 {
 	if (xive_enabled())
@@ -311,14 +294,10 @@ static void __init pnv_smp_probe(void)
 		ic_cause_ipi = smp_ops->cause_ipi;
 		WARN_ON(!ic_cause_ipi);

-		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-			if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-				smp_ops->cause_ipi = pnv_p9_dd1_cause_ipi;
-			else
-				smp_ops->cause_ipi = doorbell_global_ipi;
-		} else {
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			smp_ops->cause_ipi = doorbell_global_ipi;
+		else
 			smp_ops->cause_ipi = pnv_cause_ipi;
-		}
 	}
 }


--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -319,7 +319,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 		 * The FW told us to call it. This happens for some
 		 * interrupt sources that need additional HW whacking
 		 * beyond the ESB manipulation. For example LPC interrupts
-		 * on P9 DD1.0 need a latch to be clared in the LPC bridge
+		 * on P9 DD1.0 needed a latch to be clared in the LPC bridge
 		 * itself. The Firmware will take care of it.
 		 */
 		if (WARN_ON_ONCE(!xive_ops->eoi))
@@ -337,9 +337,9 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 		 * This allows us to then do a re-trigger if Q was set
 		 * rather than synthesizing an interrupt in software
 		 *
-		 * For LSIs, using the HW EOI cycle works around a problem
-		 * on P9 DD1 PHBs where the other ESB accesses don't work
-		 * properly.
+		 * For LSIs the HW EOI cycle is used rather than PQ bits,
+		 * as they are automatically re-triggred in HW when still
+		 * pending.
 		 */
 		if (xd->flags & XIVE_IRQ_FLAG_LSI)
 			xive_esb_read(xd, XIVE_ESB_LOAD_EOI);

--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2429,7 +2429,6 @@ static void dump_one_paca(int cpu)
 	DUMP(p, thread_idle_state, "%#-*x");
 	DUMP(p, thread_mask, "%#-*x");
 	DUMP(p, subcore_sibling_mask, "%#-*x");
-	DUMP(p, thread_sibling_pacas, "%-*px");
 	DUMP(p, requested_psscr, "%#-*llx");
 	DUMP(p, stop_sprs.pid, "%#-*llx");
 	DUMP(p, stop_sprs.ldbar, "%#-*llx");

--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -850,14 +850,6 @@ static inline bool cxl_is_power9(void)
 	return false;
 }

-static inline bool cxl_is_power9_dd1(void)
-{
-	if ((pvr_version_is(PVR_POWER9)) &&
-	    cpu_has_feature(CPU_FTR_POWER9_DD1))
-		return true;
-	return false;
-}
-
 ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
 				loff_t off, size_t count);


--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -102,10 +102,6 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
 	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
 	if (rc)
 		return rc;
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-		/* workaround for DD1 - nbwind = capiind */
-		cfg->dsnctl |= ((u64)0x02 << (63-47));
-	}

 	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
 	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;

--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -463,23 +463,21 @@ int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 	/* nMMU_ID Defaults to: b’000001001’*/
 	xsl_dsnctl |= ((u64)0x09 << (63-28));

-	if (!(cxl_is_power9_dd1())) {
-		/*
-		 * Used to identify CAPI packets which should be sorted into
-		 * the Non-Blocking queues by the PHB. This field should match
-		 * the PHB PBL_NBW_CMPM register
-		 * nbwind=0x03, bits [57:58], must include capi indicator.
-		 * Not supported on P9 DD1.
-		 */
-		xsl_dsnctl |= (nbwind << (63-55));
+	/*
+	 * Used to identify CAPI packets which should be sorted into
+	 * the Non-Blocking queues by the PHB. This field should match
+	 * the PHB PBL_NBW_CMPM register
+	 * nbwind=0x03, bits [57:58], must include capi indicator.
+	 * Not supported on P9 DD1.
+	 */
+	xsl_dsnctl |= (nbwind << (63-55));

-		/*
-		 * Upper 16b address bits of ASB_Notify messages sent to the
-		 * system. Need to match the PHB’s ASN Compare/Mask Register.
-		 * Not supported on P9 DD1.
-		 */
-		xsl_dsnctl |= asnind;
-	}
+	/*
+	 * Upper 16b address bits of ASB_Notify messages sent to the
+	 * system. Need to match the PHB’s ASN Compare/Mask Register.
+	 * Not supported on P9 DD1.
+	 */
+	xsl_dsnctl |= asnind;

 	*reg = xsl_dsnctl;
 	return 0;
@@ -537,15 +535,8 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
 	/* Snoop machines */
 	cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL);

-	if (cxl_is_power9_dd1()) {
-		/* Disabling deadlock counter CAR */
-		cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
-		/* Enable NORST */
-		cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL);
-	} else {
-		/* Enable NORST and DD2 features */
-		cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
-	}
+	/* Enable NORST and DD2 features */
+	cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);

 	/*
 	 * Check if PSL has data-cache. We need to flush adapter datacache

--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -211,44 +211,6 @@ static long tce_iommu_register_pages(struct tce_container *container,
 	return 0;
 }

-static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
-		struct mm_struct *mm)
-{
-	unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
-			tbl->it_size, PAGE_SIZE);
-	unsigned long *uas;
-	long ret;
-
-	BUG_ON(tbl->it_userspace);
-
-	ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
-	if (ret)
-		return ret;
-
-	uas = vzalloc(cb);
-	if (!uas) {
-		decrement_locked_vm(mm, cb >> PAGE_SHIFT);
-		return -ENOMEM;
-	}
-	tbl->it_userspace = uas;
-
-	return 0;
-}
-
-static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
-		struct mm_struct *mm)
-{
-	unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
-			tbl->it_size, PAGE_SIZE);
-
-	if (!tbl->it_userspace)
-		return;
-
-	vfree(tbl->it_userspace);
-	tbl->it_userspace = NULL;
-	decrement_locked_vm(mm, cb >> PAGE_SHIFT);
-}
-
 static bool tce_page_is_contained(struct page *page, unsigned page_shift)
 {
 	/*
@@ -482,20 +444,20 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container,
 	struct mm_iommu_table_group_mem_t *mem = NULL;
 	int ret;
 	unsigned long hpa = 0;
-	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);

 	if (!pua)
 		return;

-	ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl),
-			&hpa, &mem);
+	ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
+			IOMMU_PAGE_SIZE(tbl), &hpa, &mem);
 	if (ret)
-		pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
-				__func__, *pua, entry, ret);
+		pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
+				__func__, be64_to_cpu(*pua), entry, ret);
 	if (mem)
 		mm_iommu_mapped_dec(mem);

-	*pua = 0;
+	*pua = cpu_to_be64(0);
 }

 static int tce_iommu_clear(struct tce_container *container,
@@ -599,16 +561,9 @@ static long tce_iommu_build_v2(struct tce_container *container,
 	unsigned long hpa;
 	enum dma_data_direction dirtmp;

-	if (!tbl->it_userspace) {
-		ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
-		if (ret)
-			return ret;
-	}
-
 	for (i = 0; i < pages; ++i) {
 		struct mm_iommu_table_group_mem_t *mem = NULL;
-		unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
-				entry + i);
+		__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);

 		ret = tce_iommu_prereg_ua_to_hpa(container,
 				tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem);
@@ -642,7 +597,7 @@ static long tce_iommu_build_v2(struct tce_container *container,
 		if (dirtmp != DMA_NONE)
 			tce_iommu_unuse_page_v2(container, tbl, entry + i);

-		*pua = tce;
+		*pua = cpu_to_be64(tce);

 		tce += IOMMU_PAGE_SIZE(tbl);
 	}
@@ -676,7 +631,7 @@ static long tce_iommu_create_table(struct tce_container *container,
 			page_shift, window_size, levels, ptbl);

 	WARN_ON(!ret && !(*ptbl)->it_ops->free);
-	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
+	WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));

 	return ret;
 }
@@ -686,7 +641,6 @@ static void tce_iommu_free_table(struct tce_container *container,
 {
 	unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;

-	tce_iommu_userspace_view_free(tbl, container->mm);
 	iommu_tce_table_put(tbl);
 	decrement_locked_vm(container->mm, pages);
 }
@@ -1201,7 +1155,6 @@ static void tce_iommu_release_ownership(struct tce_container *container,
 			continue;

 		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
-		tce_iommu_userspace_view_free(tbl, container->mm);
 		if (tbl->it_map)
 			iommu_release_ownership(tbl);