提交 4f93d21d 编写于 作者: D David S. Miller

sparc64: Support 2GB and 16GB page sizes for kernel linear mappings.

SPARC-T4 supports 2GB pages.

So convert kpte_linear_bitmap into an array of 2-bit values which
index into kern_linear_pte_xor.

Now kern_linear_pte_xor is used for 4 page size aligned regions,
4MB, 256MB, 2GB, and 16GB respectively.

Enabling 2GB pages is currently hardcoded using a check against
sun4v_chip_type.  In the future this will be done more cleanly
by interrogating the machine description which is the correct
way to determine this kind of thing.
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 699871bc
...@@ -188,31 +188,26 @@ valid_addr_bitmap_patch: ...@@ -188,31 +188,26 @@ valid_addr_bitmap_patch:
be,pn %xcc, kvmap_dtlb_longpath be,pn %xcc, kvmap_dtlb_longpath
2: sethi %hi(kpte_linear_bitmap), %g2 2: sethi %hi(kpte_linear_bitmap), %g2
or %g2, %lo(kpte_linear_bitmap), %g2
/* Get the 256MB physical address index. */ /* Get the 256MB physical address index. */
sllx %g4, 21, %g5 sllx %g4, 21, %g5
mov 1, %g7 or %g2, %lo(kpte_linear_bitmap), %g2
srlx %g5, 21 + 28, %g5 srlx %g5, 21 + 28, %g5
and %g5, (32 - 1), %g7
/* Don't try this at home kids... this depends upon srlx /* Divide by 32 to get the offset into the bitmask. */
* only taking the low 6 bits of the shift count in %g5. srlx %g5, 5, %g5
*/ add %g7, %g7, %g7
sllx %g7, %g5, %g7
/* Divide by 64 to get the offset into the bitmask. */
srlx %g5, 6, %g5
sllx %g5, 3, %g5 sllx %g5, 3, %g5
/* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ /* kern_linear_pte_xor[(mask >> shift) & 3)] */
ldx [%g2 + %g5], %g2 ldx [%g2 + %g5], %g2
andcc %g2, %g7, %g0 srlx %g2, %g7, %g7
sethi %hi(kern_linear_pte_xor), %g5 sethi %hi(kern_linear_pte_xor), %g5
and %g7, 3, %g7
or %g5, %lo(kern_linear_pte_xor), %g5 or %g5, %lo(kern_linear_pte_xor), %g5
bne,a,pt %xcc, 1f sllx %g7, 3, %g7
add %g5, 8, %g5 ldx [%g5 + %g7], %g2
1: ldx [%g5], %g2
.globl kvmap_linear_patch .globl kvmap_linear_patch
kvmap_linear_patch: kvmap_linear_patch:
......
...@@ -51,18 +51,34 @@ ...@@ -51,18 +51,34 @@
#include "init_64.h" #include "init_64.h"
unsigned long kern_linear_pte_xor[2] __read_mostly; unsigned long kern_linear_pte_xor[4] __read_mostly;
/* A bitmap, one bit for every 256MB of physical memory. If the bit /* A bitmap, two bits for every 256MB of physical memory. These two
* is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else * bits determine what page size we use for kernel linear
* if set we should use a 256MB page (via kern_linear_pte_xor[1]). * translations. They form an index into kern_linear_pte_xor[]. The
* value in the indexed slot is XOR'd with the TLB miss virtual
* address to form the resulting TTE. The mapping is:
*
* 0 ==> 4MB
* 1 ==> 256MB
* 2 ==> 2GB
* 3 ==> 16GB
*
* All sun4v chips support 256MB pages. Only SPARC-T4 and later
* support 2GB pages, and hopefully future cpus will support the 16GB
* pages as well. For slots 2 and 3, we encode a 256MB TTE xor there
* if these larger page sizes are not supported by the cpu.
*
* It would be nice to determine this from the machine description
* 'cpu' properties, but we need to have this table setup before the
* MDESC is initialized.
*/ */
unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
#ifndef CONFIG_DEBUG_PAGEALLOC #ifndef CONFIG_DEBUG_PAGEALLOC
/* A special kernel TSB for 4MB and 256MB linear mappings. /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
* Space is allocated for this right after the trap table * Space is allocated for this right after the trap table in
* in arch/sparc64/kernel/head.S * arch/sparc64/kernel/head.S
*/ */
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
#endif #endif
...@@ -1358,32 +1374,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, ...@@ -1358,32 +1374,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
extern unsigned int kvmap_linear_patch[1]; extern unsigned int kvmap_linear_patch[1];
#endif /* CONFIG_DEBUG_PAGEALLOC */ #endif /* CONFIG_DEBUG_PAGEALLOC */
static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) static void __init kpte_set_val(unsigned long index, unsigned long val)
{ {
const unsigned long shift_256MB = 28; unsigned long *ptr = kpte_linear_bitmap;
const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
const unsigned long size_256MB = (1UL << shift_256MB);
while (start < end) { val <<= ((index % (BITS_PER_LONG / 2)) * 2);
long remains; ptr += (index / (BITS_PER_LONG / 2));
remains = end - start; *ptr |= val;
if (remains < size_256MB) }
break;
if (start & mask_256MB) { static const unsigned long kpte_shift_min = 28; /* 256MB */
start = (start + size_256MB) & ~mask_256MB; static const unsigned long kpte_shift_max = 34; /* 16GB */
continue; static const unsigned long kpte_shift_incr = 3;
}
while (remains >= size_256MB) { static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
unsigned long index = start >> shift_256MB; unsigned long shift)
{
unsigned long size = (1UL << shift);
unsigned long mask = (size - 1UL);
unsigned long remains = end - start;
unsigned long val;
__set_bit(index, kpte_linear_bitmap); if (remains < size || (start & mask))
return start;
start += size_256MB; /* VAL maps:
remains -= size_256MB; *
* shift 28 --> kern_linear_pte_xor index 1
* shift 31 --> kern_linear_pte_xor index 2
* shift 34 --> kern_linear_pte_xor index 3
*/
val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
remains &= ~mask;
if (shift != kpte_shift_max)
remains = size;
while (remains) {
unsigned long index = start >> kpte_shift_min;
kpte_set_val(index, val);
start += 1UL << kpte_shift_min;
remains -= 1UL << kpte_shift_min;
}
return start;
}
static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
{
unsigned long smallest_size, smallest_mask;
unsigned long s;
smallest_size = (1UL << kpte_shift_min);
smallest_mask = (smallest_size - 1UL);
while (start < end) {
unsigned long orig_start = start;
for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
start = kpte_mark_using_shift(start, end, s);
if (start != orig_start)
break;
} }
if (start == orig_start)
start = (start + smallest_size) & ~smallest_mask;
} }
} }
...@@ -1577,13 +1636,15 @@ static void __init sun4v_ktsb_init(void) ...@@ -1577,13 +1636,15 @@ static void __init sun4v_ktsb_init(void)
ktsb_descr[0].resv = 0; ktsb_descr[0].resv = 0;
#ifndef CONFIG_DEBUG_PAGEALLOC #ifndef CONFIG_DEBUG_PAGEALLOC
/* Second KTSB for 4MB/256MB mappings. */ /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */
ktsb_pa = (kern_base + ktsb_pa = (kern_base +
((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
HV_PGSZ_MASK_256MB); HV_PGSZ_MASK_256MB);
if (sun4v_chip_type == SUN4V_CHIP_NIAGARA4)
ktsb_descr[1].pgsz_mask |= HV_PGSZ_MASK_2GB;
ktsb_descr[1].assoc = 1; ktsb_descr[1].assoc = 1;
ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
ktsb_descr[1].ctx_idx = 0; ktsb_descr[1].ctx_idx = 0;
...@@ -2110,6 +2171,7 @@ static void __init sun4u_pgprot_init(void) ...@@ -2110,6 +2171,7 @@ static void __init sun4u_pgprot_init(void)
{ {
unsigned long page_none, page_shared, page_copy, page_readonly; unsigned long page_none, page_shared, page_copy, page_readonly;
unsigned long page_exec_bit; unsigned long page_exec_bit;
int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
_PAGE_CACHE_4U | _PAGE_P_4U | _PAGE_CACHE_4U | _PAGE_P_4U |
...@@ -2138,7 +2200,8 @@ static void __init sun4u_pgprot_init(void) ...@@ -2138,7 +2200,8 @@ static void __init sun4u_pgprot_init(void)
_PAGE_P_4U | _PAGE_W_4U); _PAGE_P_4U | _PAGE_W_4U);
/* XXX Should use 256MB on Panther. XXX */ /* XXX Should use 256MB on Panther. XXX */
kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
_PAGE_SZBITS = _PAGE_SZBITS_4U; _PAGE_SZBITS = _PAGE_SZBITS_4U;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
...@@ -2164,6 +2227,7 @@ static void __init sun4v_pgprot_init(void) ...@@ -2164,6 +2227,7 @@ static void __init sun4v_pgprot_init(void)
{ {
unsigned long page_none, page_shared, page_copy, page_readonly; unsigned long page_none, page_shared, page_copy, page_readonly;
unsigned long page_exec_bit; unsigned long page_exec_bit;
int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
_PAGE_CACHE_4V | _PAGE_P_4V | _PAGE_CACHE_4V | _PAGE_P_4V |
...@@ -2195,6 +2259,25 @@ static void __init sun4v_pgprot_init(void) ...@@ -2195,6 +2259,25 @@ static void __init sun4v_pgprot_init(void)
kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V); _PAGE_P_4V | _PAGE_W_4V);
i = 2;
if (sun4v_chip_type == SUN4V_CHIP_NIAGARA4) {
#ifdef CONFIG_DEBUG_PAGEALLOC
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
0xfffff80000000000UL;
#else
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
0xfffff80000000000UL;
#endif
kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
i = 3;
}
for (; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[i - 1];
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
__ACCESS_BITS_4V | _PAGE_E_4V); __ACCESS_BITS_4V | _PAGE_E_4V);
......
...@@ -8,12 +8,12 @@ ...@@ -8,12 +8,12 @@
#define MAX_PHYS_ADDRESS (1UL << 41UL) #define MAX_PHYS_ADDRESS (1UL << 41UL)
#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
#define KPTE_BITMAP_BYTES \ #define KPTE_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
#define VALID_ADDR_BITMAP_BYTES \ #define VALID_ADDR_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
extern unsigned long kern_linear_pte_xor[2]; extern unsigned long kern_linear_pte_xor[4];
extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned int sparc64_highest_unlocked_tlb_ent;
extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_context;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册