hash-64k.h 9.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H

#include <asm-generic/pgtable-nopud.h>

#define PTE_INDEX_SIZE  8
#define PMD_INDEX_SIZE  10
#define PUD_INDEX_SIZE	0
#define PGD_INDEX_SIZE  12

#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)

/* With 4k base page size, hugepage PTEs go at the PMD level */
#define MIN_HUGEPTE_SHIFT	PAGE_SHIFT

/* PMD_SHIFT determines what a second-level page table entry can map */
#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
#define PMD_SIZE	(1UL << PMD_SHIFT)
#define PMD_MASK	(~(PMD_SIZE-1))

/* PGDIR_SHIFT determines what a third-level page table entry can map */
#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE-1))

28 29
#define _PAGE_COMBO	0x00040000 /* this is a combo 4k page */
#define _PAGE_4K_PFN	0x00080000 /* PFN is for a single 4k page */
30 31 32
/*
 * Used to track subpage group valid if _PAGE_COMBO is set
 * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
33
 */
34
#define _PAGE_COMBO_VALID	(_PAGE_F_GIX | _PAGE_F_SECOND)
35 36

/* PTE flags to conserve for HPTE identification */
37 38
#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
			 _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
39 40 41

/* Shift to put page number into pte.
 *
42 43
 * That gives us a max RPN of 34 bits, which means a max of 50 bits
 * of addressable physical space, or 46 bits for the special 4k PFNs.
44
 */
45
#define PTE_RPN_SHIFT	(30)
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
/*
 * we support 16 fragments per PTE page of 64K size.
 */
#define PTE_FRAG_NR	16
/*
 * We use a 2K PTE page fragment and another 2K for storing
 * real_pte_t hash index
 */
#define PTE_FRAG_SIZE_SHIFT  12
#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)

/*
 * Bits to mask out from a PMD to get to the PTE page
 * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
 */
#define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
/* Bits to mask out from a PGD/PUD to get to the PMD page */
#define PUD_MASKED_BITS		0x1ff
64

65
#ifndef __ASSEMBLY__
66

67 68 69 70 71 72
/*
 * With 64K pages on hash table, we have a special PTE format that
 * uses a second "half" of the page table to encode sub-page information
 * in order to deal with 64K made of 4K HW pages. Thus we override the
 * generic accessors and iterators here
 */
73 74 75 76
#define __real_pte __real_pte
static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
{
	real_pte_t rpte;
77
	unsigned long *hidxp;
78 79 80 81 82 83 84 85 86

	rpte.pte = pte;
	rpte.hidx = 0;
	if (pte_val(pte) & _PAGE_COMBO) {
		/*
		 * Make sure we order the hidx load against the _PAGE_COMBO
		 * check. The store side ordering is done in __hash_page_4K
		 */
		smp_rmb();
87 88
		hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
		rpte.hidx = *hidxp;
89 90 91 92 93 94 95 96
	}
	return rpte;
}

static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
{
	if ((pte_val(rpte.pte) & _PAGE_COMBO))
		return (rpte.hidx >> (index<<2)) & 0xf;
97
	return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf;
98 99
}

100
#define __rpte_to_pte(r)	((r).pte)
101
extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
102 103
/*
 * Trick: we set __end to va + 64k, which happens works for
104 105
 * a 16M page as well as we want only one iteration
 */
106 107 108 109 110 111 112 113 114 115
#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
	do {								\
		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
		unsigned __split = (psize == MMU_PAGE_4K ||		\
				    psize == MMU_PAGE_64K_AP);		\
		shift = mmu_psize_defs[psize].shift;			\
		for (index = 0; vpn < __end; index++,			\
			     vpn += (1L << (shift - VPN_SHIFT))) {	\
			if (!__split || __rpte_sub_valid(rpte, index))	\
				do {
116 117 118

#define pte_iterate_hashed_end() } while(0); } } while(0)

119
#define pte_pagesize_index(mm, addr, pte)	\
120
	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
121

122
#define remap_4k_pfn(vma, addr, pfn, prot)				\
123 124 125
	(WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL :	\
		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
126

127 128 129 130
#define PTE_TABLE_SIZE	PTE_FRAG_SIZE
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
131
#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
132
#endif
133 134 135 136 137
#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)

#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
#define pte_pgd(pte)	((pgd_t)pte_pud(pte))

138 139 140 141 142 143 144 145 146 147 148
#ifdef CONFIG_HUGETLB_PAGE
/*
 * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
 * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
 *
 * Defined in such a way that we can optimize away code block at build time
 * if CONFIG_HUGETLB_PAGE=n.
 */
static inline int pmd_huge(pmd_t pmd)
{
	/*
A
Aneesh Kumar K.V 已提交
149
	 * leaf pte for huge page
150
	 */
A
Aneesh Kumar K.V 已提交
151
	return !!(pmd_val(pmd) & _PAGE_PTE);
152 153 154 155 156
}

static inline int pud_huge(pud_t pud)
{
	/*
A
Aneesh Kumar K.V 已提交
157
	 * leaf pte for huge page
158
	 */
A
Aneesh Kumar K.V 已提交
159
	return !!(pud_val(pud) & _PAGE_PTE);
160 161 162 163 164
}

static inline int pgd_huge(pgd_t pgd)
{
	/*
A
Aneesh Kumar K.V 已提交
165
	 * leaf pte for huge page
166
	 */
A
Aneesh Kumar K.V 已提交
167
	return !!(pgd_val(pgd) & _PAGE_PTE);
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
}
#define pgd_huge pgd_huge

#ifdef CONFIG_DEBUG_VM
extern int hugepd_ok(hugepd_t hpd);
#define is_hugepd(hpd)               (hugepd_ok(hpd))
#else
/*
 * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
 * need to setup hugepage directory for them. Our pte and page directory format
 * enable us to have this enabled.
 */
static inline int hugepd_ok(hugepd_t hpd)
{
	return 0;
}
#define is_hugepd(pdep)			0
#endif /* CONFIG_DEBUG_VM */

#endif /* CONFIG_HUGETLB_PAGE */

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
					 unsigned long addr,
					 pmd_t *pmdp,
					 unsigned long clr,
					 unsigned long set);
static inline char *get_hpte_slot_array(pmd_t *pmdp)
{
	/*
	 * The hpte hindex is stored in the pgtable whose address is in the
	 * second half of the PMD
	 *
	 * Order this load with the test for pmd_trans_huge in the caller
	 */
	smp_rmb();
	return *(char **)(pmdp + PTRS_PER_PMD);


}
/*
 * The linux hugepage PMD now include the pmd entries followed by the address
 * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
 * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
 * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
 * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
 *
 * The last three bits are intentionally left to zero. This memory location
 * are also used as normal page PTE pointers. So if we have any pointers
 * left around while we collapse a hugepage, we need to make sure
 * _PAGE_PRESENT bit of that is zero when we look at them
 */
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
	return (hpte_slot_array[index] >> 3) & 0x1;
}

static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
					   int index)
{
	return hpte_slot_array[index] >> 4;
}

static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
					unsigned int index, unsigned int hidx)
{
	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
}

/*
 *
 * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
 * page. The hugetlbfs page table walking and mangling paths are totally
 * separated form the core VM paths and they're differentiated by
 *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
 *
 * pmd_trans_huge() is defined as false at build time if
 * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
 * time in such case.
 *
 * For ppc64 we need to differntiate from explicit hugepages from THP, because
 * for THP we also track the subpage details at the pmd level. We don't do
 * that for explicit huge pages.
 *
 */
static inline int pmd_trans_huge(pmd_t pmd)
{
A
Aneesh Kumar K.V 已提交
255 256
	return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) ==
		  (_PAGE_PTE | _PAGE_THP_HUGE));
257 258 259 260 261 262 263 264 265 266 267
}

static inline int pmd_trans_splitting(pmd_t pmd)
{
	if (pmd_trans_huge(pmd))
		return pmd_val(pmd) & _PAGE_SPLITTING;
	return 0;
}

static inline int pmd_large(pmd_t pmd)
{
A
Aneesh Kumar K.V 已提交
268
	return !!(pmd_val(pmd) & _PAGE_PTE);
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
}

static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
}

static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
}

#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
}

static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
					      unsigned long addr, pmd_t *pmdp)
{
	unsigned long old;

	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
		return 0;
	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
	return ((old & _PAGE_ACCESSED) != 0);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
				      pmd_t *pmdp)
{

	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
		return;

	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
}

#endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
310
#endif	/* __ASSEMBLY__ */
311 312

#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */