hash-64k.h 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H

#include <asm-generic/pgtable-nopud.h>

#define PTE_INDEX_SIZE  8
#define PMD_INDEX_SIZE  10
#define PUD_INDEX_SIZE	0
#define PGD_INDEX_SIZE  12

#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)

/* With 4k base page size, hugepage PTEs go at the PMD level */
#define MIN_HUGEPTE_SHIFT	PAGE_SHIFT

/* PMD_SHIFT determines what a second-level page table entry can map */
#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
#define PMD_SIZE	(1UL << PMD_SHIFT)
#define PMD_MASK	(~(PMD_SIZE-1))

/* PGDIR_SHIFT determines what a third-level page table entry can map */
#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE-1))

/* Bits to mask out from a PMD to get to the PTE page */
/* PMDs point to PTE table fragments which are 4K aligned.  */
#define PMD_MASKED_BITS		0xfff
/* Bits to mask out from a PGD/PUD to get to the PMD page */
#define PUD_MASKED_BITS		0x1ff
33

34 35 36 37 38
#define _PAGE_COMBO	0x00020000 /* this is a combo 4k page */
#define _PAGE_4K_PFN	0x00040000 /* PFN is for a single 4k page */
/*
 * Used to track subpage group valid if _PAGE_COMBO is set
 * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
39
 */
40
#define _PAGE_COMBO_VALID	(_PAGE_F_GIX | _PAGE_F_SECOND)
41 42

/* PTE flags to conserve for HPTE identification */
43 44
#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
			 _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
45 46 47

/* Shift to put page number into pte.
 *
48 49
 * That gives us a max RPN of 34 bits, which means a max of 50 bits
 * of addressable physical space, or 46 bits for the special 4k PFNs.
50
 */
51
#define PTE_RPN_SHIFT	(30)
52

53
#ifndef __ASSEMBLY__
54

55 56 57 58 59 60
/*
 * With 64K pages on hash table, we have a special PTE format that
 * uses a second "half" of the page table to encode sub-page information
 * in order to deal with 64K made of 4K HW pages. Thus we override the
 * generic accessors and iterators here
 */
61 62 63 64
#define __real_pte __real_pte
static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
{
	real_pte_t rpte;
65
	unsigned long *hidxp;
66 67 68 69 70 71 72 73 74

	rpte.pte = pte;
	rpte.hidx = 0;
	if (pte_val(pte) & _PAGE_COMBO) {
		/*
		 * Make sure we order the hidx load against the _PAGE_COMBO
		 * check. The store side ordering is done in __hash_page_4K
		 */
		smp_rmb();
75 76
		hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
		rpte.hidx = *hidxp;
77 78 79 80 81 82 83 84 85 86 87
	}
	return rpte;
}

static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
{
	if ((pte_val(rpte.pte) & _PAGE_COMBO))
		return (rpte.hidx >> (index<<2)) & 0xf;
	return (pte_val(rpte.pte) >> 12) & 0xf;
}

88
#define __rpte_to_pte(r)	((r).pte)
89
extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
90 91
/*
 * Trick: we set __end to va + 64k, which happens works for
92 93
 * a 16M page as well as we want only one iteration
 */
94 95 96 97 98 99 100 101 102 103
#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
	do {								\
		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
		unsigned __split = (psize == MMU_PAGE_4K ||		\
				    psize == MMU_PAGE_64K_AP);		\
		shift = mmu_psize_defs[psize].shift;			\
		for (index = 0; vpn < __end; index++,			\
			     vpn += (1L << (shift - VPN_SHIFT))) {	\
			if (!__split || __rpte_sub_valid(rpte, index))	\
				do {
104 105 106

#define pte_iterate_hashed_end() } while(0); } } while(0)

107
#define pte_pagesize_index(mm, addr, pte)	\
108
	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
109

110
#define remap_4k_pfn(vma, addr, pfn, prot)				\
111 112 113
	(WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL :	\
		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
114

115 116 117 118 119 120 121
#define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)

#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
#define pte_pgd(pte)	((pgd_t)pte_pud(pte))

122 123 124 125 126 127 128 129 130 131 132
#ifdef CONFIG_HUGETLB_PAGE
/*
 * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
 * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
 *
 * Defined in such a way that we can optimize away code block at build time
 * if CONFIG_HUGETLB_PAGE=n.
 */
static inline int pmd_huge(pmd_t pmd)
{
	/*
A
Aneesh Kumar K.V 已提交
133
	 * leaf pte for huge page
134
	 */
A
Aneesh Kumar K.V 已提交
135
	return !!(pmd_val(pmd) & _PAGE_PTE);
136 137 138 139 140
}

static inline int pud_huge(pud_t pud)
{
	/*
A
Aneesh Kumar K.V 已提交
141
	 * leaf pte for huge page
142
	 */
A
Aneesh Kumar K.V 已提交
143
	return !!(pud_val(pud) & _PAGE_PTE);
144 145 146 147 148
}

static inline int pgd_huge(pgd_t pgd)
{
	/*
A
Aneesh Kumar K.V 已提交
149
	 * leaf pte for huge page
150
	 */
A
Aneesh Kumar K.V 已提交
151
	return !!(pgd_val(pgd) & _PAGE_PTE);
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
}
#define pgd_huge pgd_huge

#ifdef CONFIG_DEBUG_VM
extern int hugepd_ok(hugepd_t hpd);
#define is_hugepd(hpd)               (hugepd_ok(hpd))
#else
/*
 * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
 * need to setup hugepage directory for them. Our pte and page directory format
 * enable us to have this enabled.
 */
static inline int hugepd_ok(hugepd_t hpd)
{
	return 0;
}
#define is_hugepd(pdep)			0
#endif /* CONFIG_DEBUG_VM */

#endif /* CONFIG_HUGETLB_PAGE */

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
					 unsigned long addr,
					 pmd_t *pmdp,
					 unsigned long clr,
					 unsigned long set);
static inline char *get_hpte_slot_array(pmd_t *pmdp)
{
	/*
	 * The hpte hindex is stored in the pgtable whose address is in the
	 * second half of the PMD
	 *
	 * Order this load with the test for pmd_trans_huge in the caller
	 */
	smp_rmb();
	return *(char **)(pmdp + PTRS_PER_PMD);


}
/*
 * The linux hugepage PMD now include the pmd entries followed by the address
 * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
 * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
 * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
 * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
 *
 * The last three bits are intentionally left to zero. This memory location
 * are also used as normal page PTE pointers. So if we have any pointers
 * left around while we collapse a hugepage, we need to make sure
 * _PAGE_PRESENT bit of that is zero when we look at them
 */
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
	return (hpte_slot_array[index] >> 3) & 0x1;
}

static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
					   int index)
{
	return hpte_slot_array[index] >> 4;
}

static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
					unsigned int index, unsigned int hidx)
{
	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
}

/*
 *
 * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
 * page. The hugetlbfs page table walking and mangling paths are totally
 * separated form the core VM paths and they're differentiated by
 *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
 *
 * pmd_trans_huge() is defined as false at build time if
 * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
 * time in such case.
 *
 * For ppc64 we need to differntiate from explicit hugepages from THP, because
 * for THP we also track the subpage details at the pmd level. We don't do
 * that for explicit huge pages.
 *
 */
static inline int pmd_trans_huge(pmd_t pmd)
{
A
Aneesh Kumar K.V 已提交
239 240
	return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) ==
		  (_PAGE_PTE | _PAGE_THP_HUGE));
241 242 243 244 245 246 247 248 249 250 251
}

static inline int pmd_trans_splitting(pmd_t pmd)
{
	if (pmd_trans_huge(pmd))
		return pmd_val(pmd) & _PAGE_SPLITTING;
	return 0;
}

static inline int pmd_large(pmd_t pmd)
{
A
Aneesh Kumar K.V 已提交
252
	return !!(pmd_val(pmd) & _PAGE_PTE);
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
}

static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
}

static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
}

#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
}

static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
					      unsigned long addr, pmd_t *pmdp)
{
	unsigned long old;

	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
		return 0;
	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
	return ((old & _PAGE_ACCESSED) != 0);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
				      pmd_t *pmdp)
{

	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
		return;

	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
}

#endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
294
#endif	/* __ASSEMBLY__ */
295 296

#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */