diff --git a/libcpu/aarch64/cortex-a53/SConscript b/libcpu/aarch64/cortex-a53/SConscript index 57a5accc5c95f0967fc55ac86f66032d49fb9d2e..75e170a0cfde303a148c4e8842b371b17cdd52df 100644 --- a/libcpu/aarch64/cortex-a53/SConscript +++ b/libcpu/aarch64/cortex-a53/SConscript @@ -7,6 +7,7 @@ context_gcc.S vector_gcc.S entry_point.S cpu_gcc.S +cache.S ''') CPPPATH = [cwd] diff --git a/libcpu/aarch64/cortex-a53/cache.S b/libcpu/aarch64/cortex-a53/cache.S new file mode 100644 index 0000000000000000000000000000000000000000..7f295a2b02f7758886f81d4cee9e7f70dcf83f66 --- /dev/null +++ b/libcpu/aarch64/cortex-a53/cache.S @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2006-2020, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2020-03-17 bigmagic first version + */ + +/* + * void __asm_dcache_level(level) + * + * flush or invalidate one level cache. + * + * x0: cache level + * x1: 0 clean & invalidate, 1 invalidate only + * x2~x9: clobbered + */ +.globl __asm_dcache_level +__asm_dcache_level: + lsl x12, x0, #1 + msr csselr_el1, x12 /* select cache level */ + isb /* sync change of cssidr_el1 */ + mrs x6, ccsidr_el1 /* read the new cssidr_el1 */ + and x2, x6, #7 /* x2 <- log2(cache line size)-4 */ + add x2, x2, #4 /* x2 <- log2(cache line size) */ + mov x3, #0x3ff + and x3, x3, x6, lsr #3 /* x3 <- max number of #ways */ + clz w5, w3 /* bit position of #ways */ + mov x4, #0x7fff + and x4, x4, x6, lsr #13 /* x4 <- max number of #sets */ + /* x12 <- cache level << 1 */ + /* x2 <- line length offset */ + /* x3 <- number of cache ways - 1 */ + /* x4 <- number of cache sets - 1 */ + /* x5 <- bit position of #ways */ + +loop_set: + mov x6, x3 /* x6 <- working copy of #ways */ +loop_way: + lsl x7, x6, x5 + orr x9, x12, x7 /* map way and level to cisw value */ + lsl x7, x4, x2 + orr x9, x9, x7 /* map set number to cisw value */ + tbz w1, #0, 1f + dc isw, x9 + b 2f +1: dc cisw, x9 /* clean & invalidate by set/way */ +2: subs x6, x6, #1 /* decrement the way */ + b.ge loop_way + subs x4, x4, #1 /* decrement the set */ + b.ge loop_set + + ret + +/* + * void __asm_flush_dcache_all(int invalidate_only) + * + * x0: 0 clean & invalidate, 1 invalidate only + * + * flush or invalidate all data cache by SET/WAY. + */ +.globl __asm_dcache_all +__asm_dcache_all: + mov x1, x0 + dsb sy + mrs x10, clidr_el1 /* read clidr_el1 */ + lsr x11, x10, #24 + and x11, x11, #0x7 /* x11 <- loc */ + cbz x11, finished /* if loc is 0, exit */ + mov x15, lr + mov x0, #0 /* start flush at cache level 0 */ + /* x0 <- cache level */ + /* x10 <- clidr_el1 */ + /* x11 <- loc */ + /* x15 <- return address */ + +loop_level: + lsl x12, x0, #1 + add x12, x12, x0 /* x0 <- tripled cache level */ + lsr x12, x10, x12 + and x12, x12, #7 /* x12 <- cache type */ + cmp x12, #2 + b.lt skip /* skip if no cache or icache */ + bl __asm_dcache_level /* x1 = 0 flush, 1 invalidate */ +skip: + add x0, x0, #1 /* increment cache level */ + cmp x11, x0 + b.gt loop_level + + mov x0, #0 + msr csselr_el1, x0 /* restore csselr_el1 */ + dsb sy + isb + mov lr, x15 + +finished: + ret + +.globl __asm_flush_dcache_all +__asm_flush_dcache_all: + mov x0, #0 + b __asm_dcache_all + +.globl __asm_invalidate_dcache_all +__asm_invalidate_dcache_all: + mov x0, #0x1 + b __asm_dcache_all + +/* + * void __asm_flush_dcache_range(start, end) + * + * clean & invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +.globl __asm_flush_dcache_range +__asm_flush_dcache_range: + mrs x3, ctr_el0 + lsr x3, x3, #16 + and x3, x3, #0xf + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 /* clean & invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret + +/* + * void __asm_invalidate_icache_all(void) + * + * invalidate all tlb entries. + */ +.globl __asm_invalidate_icache_all +__asm_invalidate_icache_all: + ic ialluis + isb sy + ret + +.globl __asm_flush_l3_cache +__asm_flush_l3_cache: + mov x0, #0 /* return status as success */ + ret \ No newline at end of file diff --git a/libcpu/aarch64/cortex-a53/mmu.c b/libcpu/aarch64/cortex-a53/mmu.c index aab6bf9ebe05a0cc5e069df4e7bcf2669e8e7b2f..b799143084ccb58cd66899212ff8a2effb56d225 100644 --- a/libcpu/aarch64/cortex-a53/mmu.c +++ b/libcpu/aarch64/cortex-a53/mmu.c @@ -9,6 +9,7 @@ */ #include #include +#include #define TTBR_CNP 1 @@ -35,6 +36,13 @@ static unsigned long main_tbl[512 * 20] __attribute__((aligned (4096))); int free_idx = 1; +void __asm_invalidate_icache_all(void); +void __asm_flush_dcache_all(void); +int __asm_flush_l3_cache(void); +void __asm_flush_dcache_range(unsigned long long start, unsigned long long end); +void __asm_invalidate_dcache_all(void); +void __asm_invalidate_icache_all(void); + void mmu_memset(char *dst, char v, size_t len) { while (len--) @@ -50,6 +58,20 @@ static unsigned long get_free_page(void) return (unsigned long)(main_tbl + __page_off); } + +static inline unsigned int get_sctlr(void) +{ + unsigned int val; + asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc"); + return val; +} + +static inline void set_sctlr(unsigned int val) +{ + asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc"); + asm volatile("isb"); +} + void mmu_init(void) { unsigned long val64; @@ -101,6 +123,9 @@ void mmu_enable(void) __asm__ volatile("mrs %0, SCTLR_EL1\n":"=r"(val32)); val32 |= 0x1005; //enable mmu, I C M __asm__ volatile("dmb sy\n msr SCTLR_EL1, %0\nisb sy\n"::"r"(val32)); + rt_hw_icache_enable(); + rt_hw_dcache_enable(); + } static int map_single_page_2M(unsigned long* lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr) @@ -271,3 +296,72 @@ void armv8_map(unsigned long va, unsigned long pa, unsigned long size, unsigned map_region(va, pa, size, attr); } +void rt_hw_dcache_enable(void) +{ + if (!(get_sctlr() & CR_M)) + { + rt_kprintf("please init mmu!\n"); + } + else + { + set_sctlr(get_sctlr() | CR_C); + } +} + +void rt_hw_dcache_flush_all(void) +{ + int ret; + + __asm_flush_dcache_all(); + ret = __asm_flush_l3_cache(); + if (ret) + { + rt_kprintf("flushing dcache returns 0x%x\n", ret); + } + else + { + rt_kprintf("flushing dcache successfully.\n"); + } +} + +void rt_hw_dcache_flush_range(unsigned long start_addr, unsigned long size) +{ + __asm_flush_dcache_range(start_addr, start_addr + size); +} +void rt_hw_dcache_invalidate_range(unsigned long start_addr,unsigned long size) +{ + __asm_flush_dcache_range(start_addr, start_addr + size); +} + +void rt_hw_dcache_invalidate_all(void) +{ + __asm_invalidate_dcache_all(); +} + +void rt_hw_dcache_disable(void) +{ + /* if cache isn't enabled no need to disable */ + if(!(get_sctlr() & CR_C)) + { + rt_kprintf("need enable cache!\n"); + return; + } + set_sctlr(get_sctlr() & ~CR_C); +} + +//icache +void rt_hw_icache_enable(void) +{ + __asm_invalidate_icache_all(); + set_sctlr(get_sctlr() | CR_I); +} + +void rt_hw_icache_invalidate_all(void) +{ + __asm_invalidate_icache_all(); +} + +void rt_hw_icache_disable(void) +{ + set_sctlr(get_sctlr() & ~CR_I); +} \ No newline at end of file diff --git a/libcpu/aarch64/cortex-a53/mmu.h b/libcpu/aarch64/cortex-a53/mmu.h index 61d64164b1e52677046481d67b0581b298c8858e..6a66472c790e0a7505e1d07a49aed8491980a457 100644 --- a/libcpu/aarch64/cortex-a53/mmu.h +++ b/libcpu/aarch64/cortex-a53/mmu.h @@ -11,6 +11,37 @@ #ifndef __MMU_H__ #define __MMU_H__ +/* + * CR1 bits (CP#15 CR1) + */ +#define CR_M (1 << 0) /* MMU enable */ +#define CR_A (1 << 1) /* Alignment abort enable */ +#define CR_C (1 << 2) /* Dcache enable */ +#define CR_W (1 << 3) /* Write buffer enable */ +#define CR_P (1 << 4) /* 32-bit exception handler */ +#define CR_D (1 << 5) /* 32-bit data address range */ +#define CR_L (1 << 6) /* Implementation defined */ +#define CR_B (1 << 7) /* Big endian */ +#define CR_S (1 << 8) /* System MMU protection */ +#define CR_R (1 << 9) /* ROM MMU protection */ +#define CR_F (1 << 10) /* Implementation defined */ +#define CR_Z (1 << 11) /* Implementation defined */ +#define CR_I (1 << 12) /* Icache enable */ +#define CR_V (1 << 13) /* Vectors relocated to 0xffff0000 */ +#define CR_RR (1 << 14) /* Round Robin cache replacement */ +#define CR_L4 (1 << 15) /* LDR pc can set T bit */ +#define CR_DT (1 << 16) +#define CR_IT (1 << 18) +#define CR_ST (1 << 19) +#define CR_FI (1 << 21) /* Fast interrupt (lower latency mode) */ +#define CR_U (1 << 22) /* Unaligned access operation */ +#define CR_XP (1 << 23) /* Extended page tables */ +#define CR_VE (1 << 24) /* Vectored interrupts */ +#define CR_EE (1 << 25) /* Exception (Big) Endian */ +#define CR_TRE (1 << 28) /* TEX remap enable */ +#define CR_AFE (1 << 29) /* Access flag enable */ +#define CR_TE (1 << 30) /* Thumb exception enable */ + #define MMU_LEVEL_MASK 0x1ffUL #define MMU_MAP_ERROR_VANOTALIGN -1 #define MMU_MAP_ERROR_PANOTALIGN -2 @@ -20,7 +51,7 @@ #define MEM_ATTR_MEMORY ((0x1UL << 10) | (0x2UL << 8) | (0x0UL << 6) | (0x1UL << 2)) #define MEM_ATTR_IO ((0x1UL << 10) | (0x2UL << 8) | (0x0UL << 6) | (0x2UL << 2)) -#define BUS_ADDRESS(phys) (((phys) & ~0xC0000000) | 0xC0000000) +#define BUS_ADDRESS(phys) (((phys) & ~0xC0000000) | 0xC0000000) void mmu_init(void); @@ -30,4 +61,18 @@ int armv8_map_2M(unsigned long va, unsigned long pa, int count, unsigned long at void armv8_map(unsigned long va, unsigned long pa, unsigned long size, unsigned long attr); +//dcache +void rt_hw_dcache_enable(void); +void rt_hw_dcache_flush_all(void); +void rt_hw_dcache_flush_range(unsigned long start_addr, unsigned long size); +void rt_hw_dcache_invalidate_range(unsigned long start_addr,unsigned long size); +void rt_hw_dcache_invalidate_all(void); +void rt_hw_dcache_disable(void); + +//icache +void rt_hw_icache_enable(void); +void rt_hw_icache_invalidate_all(void); +void rt_hw_icache_disable(void); + + #endif /*__MMU_H__*/