未验证 提交 68224d52 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!99 update patches for sw64 architecture

Merge Pull Request from: @guzitao 
 
These patches generally cover the following tasks:

1.optimize kernel cores, remove unused codes, simplify interfaces, fix compile errors
2.fixes for perf, fix perf_get_regs_user
3.add support, add ebpf-jit compiler support
4.modify interface, reimplement die_if_kernel, fix sys_rt_sigaction, 
5.fixes for kvm, expand the number of SWVM_IRQS, fix wrong info print of KVM_MEMHOTPLUG, turn off the clock 
  timer of guest os
6.fix dynamic CPUfreq scaling bugs
 
 
Link:https://gitee.com/openeuler/kernel/pulls/99 
Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -14,7 +14,6 @@ config SW64 ...@@ -14,7 +14,6 @@ config SW64
select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_NO_PREEMPT select ARCH_NO_PREEMPT
select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_CMPXCHG_LOCKREF
select GENERIC_SMP_IDLE_THREAD select GENERIC_SMP_IDLE_THREAD
...@@ -24,7 +23,6 @@ config SW64 ...@@ -24,7 +23,6 @@ config SW64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SECCOMP_FILTER
select OLD_SIGACTION
select OLD_SIGSUSPEND select OLD_SIGSUSPEND
select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER select GENERIC_STRNLEN_USER
...@@ -92,11 +90,14 @@ config SW64 ...@@ -92,11 +90,14 @@ config SW64
select ACPI_REDUCED_HARDWARE_ONLY select ACPI_REDUCED_HARDWARE_ONLY
select GENERIC_TIME_VSYSCALL select GENERIC_TIME_VSYSCALL
select SET_FS select SET_FS
select HAVE_PCI
select GENERIC_PCI_IOMAP if PCI
select PCI_MSI_ARCH_FALLBACKS select PCI_MSI_ARCH_FALLBACKS
select DMA_OPS if PCI select DMA_OPS if PCI
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
select HARDIRQS_SW_RESEND select HARDIRQS_SW_RESEND
select MEMORY_HOTPLUG_SPARSE if MEMORY_HOTPLUG
config LOCKDEP_SUPPORT config LOCKDEP_SUPPORT
def_bool y def_bool y
...@@ -141,6 +142,10 @@ config ARCH_HAS_ILOG2_U64 ...@@ -141,6 +142,10 @@ config ARCH_HAS_ILOG2_U64
config GENERIC_GPIO config GENERIC_GPIO
bool bool
config GENERIC_CALIBRATE_DELAY
bool
default y
config ZONE_DMA32 config ZONE_DMA32
bool bool
default y default y
...@@ -240,6 +245,11 @@ config PLATFORM_XUELANG ...@@ -240,6 +245,11 @@ config PLATFORM_XUELANG
endchoice endchoice
config MIGHT_HAVE_PC_SERIO
bool "Use PC serio device i8042"
select ARCH_MIGHT_HAVE_PC_SERIO
default n
endmenu endmenu
config LOCK_MEMB config LOCK_MEMB
...@@ -509,17 +519,6 @@ config ISA_DMA_API ...@@ -509,17 +519,6 @@ config ISA_DMA_API
bool bool
default y default y
config PCI
bool "PCI Support"
depends on SW64
select GENERIC_PCI_IOMAP
default y
help
Find out whether you have a PCI motherboard. PCI is the name of a
bus system, i.e. the way the CPU talks to the other stuff inside
your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
VESA. If you have PCI, say Y, otherwise N.
config PCI_DOMAINS config PCI_DOMAINS
def_bool PCI def_bool PCI
...@@ -724,7 +723,6 @@ config HZ ...@@ -724,7 +723,6 @@ config HZ
int "HZ of the short timer" int "HZ of the short timer"
default 500 default 500
source "drivers/pci/Kconfig"
source "drivers/eisa/Kconfig" source "drivers/eisa/Kconfig"
source "drivers/pcmcia/Kconfig" source "drivers/pcmcia/Kconfig"
......
...@@ -393,7 +393,6 @@ static void chip3_set_rc_piu(unsigned long node, unsigned long index) ...@@ -393,7 +393,6 @@ static void chip3_set_rc_piu(unsigned long node, unsigned long index)
/* set DMA offset value PCITODMA_OFFSET */ /* set DMA offset value PCITODMA_OFFSET */
write_piu_ior0(node, index, EPDMABAR, PCITODMA_OFFSET); write_piu_ior0(node, index, EPDMABAR, PCITODMA_OFFSET);
if (IS_ENABLED(CONFIG_PCI_MSI)) { if (IS_ENABLED(CONFIG_PCI_MSI)) {
write_piu_ior0(node, index, PIUCONFIG0, 0x38076);
write_piu_ior0(node, index, MSIADDR, MSIX_MSG_ADDR); write_piu_ior0(node, index, MSIADDR, MSIX_MSG_ADDR);
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
write_piu_ior0(node, index, MSICONFIG0 + (i << 7), 0); write_piu_ior0(node, index, MSICONFIG0 + (i << 7), 0);
...@@ -656,8 +655,8 @@ static void handle_dev_int(struct pt_regs *regs) ...@@ -656,8 +655,8 @@ static void handle_dev_int(struct pt_regs *regs)
sw64_io_write(node, DEV_INT_CONFIG, config_val); sw64_io_write(node, DEV_INT_CONFIG, config_val);
} }
void handle_chip_irq(unsigned long type, unsigned long vector, asmlinkage void do_entInt(unsigned long type, unsigned long vector,
unsigned long irq_arg, struct pt_regs *regs) unsigned long irq_arg, struct pt_regs *regs)
{ {
struct pt_regs *old_regs; struct pt_regs *old_regs;
...@@ -738,6 +737,7 @@ void handle_chip_irq(unsigned long type, unsigned long vector, ...@@ -738,6 +737,7 @@ void handle_chip_irq(unsigned long type, unsigned long vector,
} }
pr_crit("PC = %016lx PS = %04lx\n", regs->pc, regs->ps); pr_crit("PC = %016lx PS = %04lx\n", regs->pc, regs->ps);
} }
EXPORT_SYMBOL(do_entInt);
/* /*
* Early fix up the chip3 Root Complex settings * Early fix up the chip3 Root Complex settings
......
...@@ -2,94 +2,12 @@ ...@@ -2,94 +2,12 @@
#ifndef _ASM_SW64_CACHEFLUSH_H #ifndef _ASM_SW64_CACHEFLUSH_H
#define _ASM_SW64_CACHEFLUSH_H #define _ASM_SW64_CACHEFLUSH_H
#include <linux/mm.h> /*
#include <asm/hw_init.h> * DCache: PIPT
* ICache:
/* Caches aren't brain-dead on the sw64. */ * - C3A/B is VIVT with ICTAG, support coherence.
#define flush_cache_all() do { } while (0) * - C4 is VIPT
#define flush_cache_mm(mm) do { } while (0)
#define flush_cache_dup_mm(mm) do { } while (0)
#define flush_cache_range(vma, start, end) do { } while (0)
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
#define flush_dcache_page(page) do { } while (0)
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_cache_vmap(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
/* Note that the following two definitions are _highly_ dependent
* on the contexts in which they are used in the kernel. I personally
* think it is criminal how loosely defined these macros are.
*/ */
/* We need to flush the kernel's icache after loading modules. The
* only other use of this macro is in load_aout_interp which is not
* used on sw64.
* Note that this definition should *not* be used for userspace
* icache flushing. While functional, it is _way_ overkill. The
* icache is tagged with ASNs and it suffices to allocate a new ASN
* for the process.
*/
#ifndef CONFIG_SMP
static inline void
flush_icache_range(unsigned long start, unsigned long end)
{
if (icache_is_vivt_no_ictag())
imb();
}
#define flush_icache_range flush_icache_range
#else
extern void smp_imb(void);
static inline void
flush_icache_range(unsigned long start, unsigned long end)
{
if (icache_is_vivt_no_ictag())
smp_imb();
}
#define flush_icache_range flush_icache_range
#endif
/* We need to flush the userspace icache after setting breakpoints in
* ptrace.
* Instead of indiscriminately using imb, take advantage of the fact
* that icache entries are tagged with the ASN and load a new mm context.
*/
/* ??? Ought to use this in arch/sw_64/kernel/signal.c too. */
#ifndef CONFIG_SMP
#include <linux/sched.h>
extern void __load_new_mm_context(struct mm_struct *);
static inline void
flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
if ((vma->vm_flags & VM_EXEC) && icache_is_vivt_no_ictag())
imb();
}
#define flush_icache_user_page flush_icache_user_page
#else
extern void flush_icache_user_page(struct vm_area_struct *vma,
struct page *page,
unsigned long addr, int len);
#define flush_icache_user_page flush_icache_user_page
#endif
/* This is used only in __do_fault and do_swap_page. */
#define flush_icache_page(vma, page) \
flush_icache_user_page((vma), (page), 0, 0)
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
flush_icache_user_page(vma, page, vaddr, len); \
} while (0)
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len)
#include <asm-generic/cacheflush.h> #include <asm-generic/cacheflush.h>
#endif /* _ASM_SW64_CACHEFLUSH_H */ #endif /* _ASM_SW64_CACHEFLUSH_H */
...@@ -44,13 +44,13 @@ struct clk { ...@@ -44,13 +44,13 @@ struct clk {
int clk_init(void); int clk_init(void);
int sw64_set_rate(int index, unsigned long rate); void sw64_set_rate(unsigned long rate);
struct clk *sw64_clk_get(struct device *dev, const char *id); struct clk *sw64_clk_get(struct device *dev, const char *id);
unsigned long sw64_clk_get_rate(struct clk *clk);
void sw64_update_clockevents(unsigned long cpu, u32 freq); void sw64_update_clockevents(unsigned long cpu, u32 freq);
void sw64_store_policy(struct cpufreq_policy *policy); void sw64_store_policy(struct cpufreq_policy *policy);
unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy);
#endif /* _ASM_SW64_CLOCK_H */ #endif /* _ASM_SW64_CLOCK_H */
...@@ -17,16 +17,12 @@ ...@@ -17,16 +17,12 @@
#define HMC_wrksp 0x0E #define HMC_wrksp 0x0E
#define HMC_mtinten 0x0F #define HMC_mtinten 0x0F
#define HMC_load_mm 0x11 #define HMC_load_mm 0x11
#define HMC_rdpcbb 0x12
#define HMC_wrpcbb 0x13
#define HMC_tbisasn 0x14 #define HMC_tbisasn 0x14
#define HMC_tbivpn 0x19 #define HMC_tbivpn 0x19
#define HMC_ret 0x1A #define HMC_ret 0x1A
#define HMC_wrvpcr 0x29 #define HMC_wrvpcr 0x29
#define HMC_wrfen 0x2B #define HMC_wrfen 0x2B
#define HMC_kvcpucb 0x2C
#define HMC_sflush 0x2F #define HMC_sflush 0x2F
#define HMC_swpctx 0x30
#define HMC_entervm 0x31 #define HMC_entervm 0x31
#define HMC_hcall 0x32 #define HMC_hcall 0x32
#define HMC_tbi 0x33 #define HMC_tbi 0x33
...@@ -45,23 +41,27 @@ ...@@ -45,23 +41,27 @@
/* 0x80 - 0xBF : User Level HMC routine */ /* 0x80 - 0xBF : User Level HMC routine */
#define HMC_bpt 0x80 #include <uapi/asm/hmcall.h>
#define HMC_callsys 0x83
#define HMC_imb 0x86 /* Following will be deprecated from user level invocation */
#define HMC_rwreg 0x87 #define HMC_rwreg 0x87
#define HMC_rdunique 0x9E
#define HMC_wrunique 0x9F
#define HMC_sz_uflush 0xA8 #define HMC_sz_uflush 0xA8
#define HMC_gentrap 0xAA
#define HMC_wrperfmon 0xB0
#define HMC_longtime 0xB1 #define HMC_longtime 0xB1
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/init.h>
extern void __init fixup_hmcall(void);
extern void halt(void) __attribute__((noreturn)); extern void halt(void) __attribute__((noreturn));
#define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt)) #define __halt() __asm__ __volatile__ ("sys_call %0 #halt" : : "i" (HMC_halt))
#define fpu_enable() \
{ \
__asm__ __volatile__("sys_call %0" : : "i" (HMC_wrfen));\
}
#define imb() \ #define imb() \
__asm__ __volatile__ ("sys_call %0 #imb" : : "i" (HMC_imb) : "memory") __asm__ __volatile__ ("sys_call %0 #imb" : : "i" (HMC_imb) : "memory")
...@@ -156,8 +156,6 @@ __CALL_HMC_R0(rdksp, unsigned long); ...@@ -156,8 +156,6 @@ __CALL_HMC_R0(rdksp, unsigned long);
__CALL_HMC_W1(wrksp, unsigned long); __CALL_HMC_W1(wrksp, unsigned long);
__CALL_HMC_W2(load_mm, unsigned long, unsigned long); __CALL_HMC_W2(load_mm, unsigned long, unsigned long);
__CALL_HMC_R0(rdpcbb, unsigned long);
__CALL_HMC_W1(wrpcbb, unsigned long);
__CALL_HMC_R0(rdptbr, unsigned long); __CALL_HMC_R0(rdptbr, unsigned long);
__CALL_HMC_W1(wrptbr, unsigned long); __CALL_HMC_W1(wrptbr, unsigned long);
...@@ -166,7 +164,6 @@ __CALL_HMC_RW1(swpipl, unsigned long, unsigned long); ...@@ -166,7 +164,6 @@ __CALL_HMC_RW1(swpipl, unsigned long, unsigned long);
__CALL_HMC_R0(whami, unsigned long); __CALL_HMC_R0(whami, unsigned long);
__CALL_HMC_RW1(rdio64, unsigned long, unsigned long); __CALL_HMC_RW1(rdio64, unsigned long, unsigned long);
__CALL_HMC_RW1(rdio32, unsigned int, unsigned long); __CALL_HMC_RW1(rdio32, unsigned int, unsigned long);
__CALL_HMC_R0(kvcpucb, unsigned long);
__CALL_HMC_R0(sleepen, unsigned long); __CALL_HMC_R0(sleepen, unsigned long);
__CALL_HMC_R0(mtinten, unsigned long); __CALL_HMC_R0(mtinten, unsigned long);
__CALL_HMC_W2(wrent, void*, unsigned long); __CALL_HMC_W2(wrent, void*, unsigned long);
...@@ -178,6 +175,7 @@ __CALL_HMC_W1(wrtimer, unsigned long); ...@@ -178,6 +175,7 @@ __CALL_HMC_W1(wrtimer, unsigned long);
__CALL_HMC_RW3(tbivpn, unsigned long, unsigned long, unsigned long, unsigned long); __CALL_HMC_RW3(tbivpn, unsigned long, unsigned long, unsigned long, unsigned long);
__CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long); __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long);
__CALL_HMC_W1(wrtp, unsigned long);
/* /*
* TB routines.. * TB routines..
*/ */
...@@ -193,12 +191,28 @@ __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long); ...@@ -193,12 +191,28 @@ __CALL_HMC_RW2(cpuid, unsigned long, unsigned long, unsigned long);
}) })
#define tbi(x, y) __tbi(x, __r17 = (y), "1" (__r17)) #define tbi(x, y) __tbi(x, __r17 = (y), "1" (__r17))
#define tbisi(x) __tbi(1, __r17 = (x), "1" (__r17))
#define tbisd(x) __tbi(2, __r17 = (x), "1" (__r17)) /* Invalidate all TLB, only used by hypervisor */
#define tbis(x) __tbi(3, __r17 = (x), "1" (__r17))
#define tbiap() __tbi(-1, /* no second argument */)
#define tbia() __tbi(-2, /* no second argument */) #define tbia() __tbi(-2, /* no second argument */)
/* Invalidate TLB for all processes with currnet VPN */
#define tbivp() __tbi(-1, /* no second argument */)
/* Invalidate all TLB with current VPN */
#define tbiv() __tbi(0, /* no second argument */)
/* Invalidate ITLB of addr with current UPN and VPN */
#define tbisi(addr) __tbi(1, __r17 = (addr), "1" (__r17))
/* Invalidate DTLB of addr with current UPN and VPN */
#define tbisd(addr) __tbi(2, __r17 = (addr), "1" (__r17))
/* Invalidate TLB of addr with current UPN and VPN */
#define tbis(addr) __tbi(3, __r17 = (addr), "1" (__r17))
/* Invalidate all user TLB with current UPN and VPN */
#define tbiu() __tbi(4, /* no second argument */)
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -18,16 +18,8 @@ struct cache_desc { ...@@ -18,16 +18,8 @@ struct cache_desc {
}; };
struct cpuinfo_sw64 { struct cpuinfo_sw64 {
unsigned long loops_per_jiffy;
unsigned long last_asn; unsigned long last_asn;
int need_new_asn;
int asn_lock;
unsigned long ipi_count; unsigned long ipi_count;
unsigned long prof_multiplier;
unsigned long prof_counter;
unsigned char mcheck_expected;
unsigned char mcheck_taken;
unsigned char mcheck_extra;
struct cache_desc icache; /* Primary I-cache */ struct cache_desc icache; /* Primary I-cache */
struct cache_desc dcache; /* Primary D or combined I/D cache */ struct cache_desc dcache; /* Primary D or combined I/D cache */
struct cache_desc scache; /* Secondary cache */ struct cache_desc scache; /* Secondary cache */
...@@ -45,7 +37,6 @@ struct cpu_desc_t { ...@@ -45,7 +37,6 @@ struct cpu_desc_t {
char vendor_id[16]; char vendor_id[16];
char model_id[64]; char model_id[64];
unsigned long frequency; unsigned long frequency;
__u8 run_mode;
} __randomize_layout; } __randomize_layout;
#define MAX_NUMSOCKETS 8 #define MAX_NUMSOCKETS 8
...@@ -74,6 +65,8 @@ struct memmap_entry { ...@@ -74,6 +65,8 @@ struct memmap_entry {
}; };
extern struct cpuinfo_sw64 cpu_data[NR_CPUS]; extern struct cpuinfo_sw64 cpu_data[NR_CPUS];
extern void store_cpu_data(int cpu);
extern struct cpu_desc_t cpu_desc; extern struct cpu_desc_t cpu_desc;
extern struct socket_desc_t socket_desc[MAX_NUMSOCKETS]; extern struct socket_desc_t socket_desc[MAX_NUMSOCKETS];
extern int memmap_nr; extern int memmap_nr;
...@@ -89,12 +82,11 @@ static inline unsigned long get_cpu_freq(void) ...@@ -89,12 +82,11 @@ static inline unsigned long get_cpu_freq(void)
return cpu_desc.frequency; return cpu_desc.frequency;
} }
static inline bool icache_is_vivt_no_ictag(void) static inline void update_cpu_freq(unsigned long freq)
{ {
/* freq = freq * 1000000;
* Icache of C3B is vivt with ICtag. C4 will be vipt. if (cpu_desc.frequency != freq)
*/ cpu_desc.frequency = freq;
return (cpu_desc.arch_var == 0x3 && cpu_desc.arch_rev == 0x1);
} }
#define EMUL_FLAG (0x1UL << 63) #define EMUL_FLAG (0x1UL << 63)
......
...@@ -41,10 +41,8 @@ enum sw64_irq_type { ...@@ -41,10 +41,8 @@ enum sw64_irq_type {
extern struct irqaction timer_irqaction; extern struct irqaction timer_irqaction;
extern void init_rtc_irq(irq_handler_t handler); extern void init_rtc_irq(irq_handler_t handler);
extern void handle_irq(int irq); extern void handle_irq(int irq);
extern void handle_ipi(struct pt_regs *); extern void handle_ipi(struct pt_regs *regs);
extern void __init sw64_init_irq(void); extern void __init sw64_init_irq(void);
extern irqreturn_t timer_interrupt(int irq, void *dev); extern irqreturn_t timer_interrupt(int irq, void *dev);
extern void handle_chip_irq(unsigned long type, unsigned long vector,
unsigned long irq_arg, struct pt_regs *regs);
#endif #endif
...@@ -13,38 +13,14 @@ ...@@ -13,38 +13,14 @@
#include <asm/io.h> #include <asm/io.h>
/* /*
* Force a context reload. This is needed when we change the page * Load a mm context. This is needed when we change the page
* table pointer or when we update the ASN of the current process. * table pointer(CSR:PTBR) or when we update the ASID.
*
*/ */
static inline unsigned long
__reload_thread(struct pcb_struct *pcb)
{
register unsigned long a0 __asm__("$16");
register unsigned long v0 __asm__("$0");
a0 = virt_to_phys(pcb);
__asm__ __volatile__(
"sys_call %2 #__reload_thread"
: "=r"(v0), "=r"(a0)
: "i"(HMC_swpctx), "r"(a0)
: "$1", "$22", "$23", "$24", "$25");
return v0;
}
#define load_asn_ptbr load_mm #define load_asn_ptbr load_mm
/* /*
* The maximum ASN's the processor supports. * The maximum ASN's the processor supports. ASN is called ASID too.
*
* If a processor implements address space numbers (ASNs), and the old
* PTE has the Address Space Match (ASM) bit clear (ASNs in use) and
* the Valid bit set, then entries can also effectively be made coherent
* by assigning a new, unused ASN to the currently running process and
* not reusing the previous ASN before calling the appropriate HMcode
* routine to invalidate the translation buffer (TB).
*
*/ */
#ifdef CONFIG_SUBARCH_C3B #ifdef CONFIG_SUBARCH_C3B
...@@ -60,12 +36,7 @@ __reload_thread(struct pcb_struct *pcb) ...@@ -60,12 +36,7 @@ __reload_thread(struct pcb_struct *pcb)
*/ */
#include <asm/hw_init.h> #include <asm/hw_init.h>
#ifdef CONFIG_SMP
#define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) #define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn)
#else
extern unsigned long last_asn;
#define cpu_last_asn(cpuid) last_asn
#endif /* CONFIG_SMP */
#define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN) #define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN)
#define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1) #define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1)
...@@ -77,7 +48,7 @@ extern unsigned long last_asn; ...@@ -77,7 +48,7 @@ extern unsigned long last_asn;
* need to do "p->mm->context = 0". * need to do "p->mm->context = 0".
* *
* If we need more ASN's than the processor has, we invalidate the old * If we need more ASN's than the processor has, we invalidate the old
* user TLB's (tbiap()) and start a new ASN version. That will automatically * user TLB's (tbivp()) and start a new ASN version. That will automatically
* force a new asn for any other processes the next time they want to * force a new asn for any other processes the next time they want to
* run. * run.
*/ */
...@@ -89,7 +60,7 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) ...@@ -89,7 +60,7 @@ __get_new_mm_context(struct mm_struct *mm, long cpu)
unsigned long next = asn + 1; unsigned long next = asn + 1;
if ((asn & HARDWARE_ASN_MASK) >= HARDWARE_ASN_MASK) { if ((asn & HARDWARE_ASN_MASK) >= HARDWARE_ASN_MASK) {
tbiap(); tbivp();
next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION;
} }
cpu_last_asn(cpu) = next; cpu_last_asn(cpu) = next;
...@@ -97,18 +68,13 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) ...@@ -97,18 +68,13 @@ __get_new_mm_context(struct mm_struct *mm, long cpu)
} }
static inline void static inline void
switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, switch_mm_irqs_off(struct mm_struct *prev_mm, struct mm_struct *next_mm,
struct task_struct *next) struct task_struct *next)
{ {
/* Check if our ASN is of an older version, and thus invalid. */ /* Check if our ASN is of an older version, and thus invalid. */
unsigned long asn; unsigned long asn, mmc, ptbr;
unsigned long mmc;
long cpu = smp_processor_id(); long cpu = smp_processor_id();
#ifdef CONFIG_SMP
cpu_data[cpu].asn_lock = 1;
barrier();
#endif
asn = cpu_last_asn(cpu); asn = cpu_last_asn(cpu);
mmc = next_mm->context.asid[cpu]; mmc = next_mm->context.asid[cpu];
if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) { if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) {
...@@ -116,50 +82,31 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, ...@@ -116,50 +82,31 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm,
mmc = __get_new_mm_context(next_mm, cpu); mmc = __get_new_mm_context(next_mm, cpu);
next_mm->context.asid[cpu] = mmc; next_mm->context.asid[cpu] = mmc;
} }
#ifdef CONFIG_SMP
else
cpu_data[cpu].need_new_asn = 1;
#endif
/* /*
* Always update the PCB ASN. Another thread may have allocated * Update CSR:UPN and CSR:PTBR. Another thread may have allocated
* a new mm->context (via flush_tlb_mm) without the ASN serial * a new mm->context[asid] (via flush_tlb_mm) without the ASN serial
* number wrapping. We have no way to detect when this is needed. * number wrapping. We have no way to detect when this is needed.
*/ */
task_thread_info(next)->pcb.asn = mmc & HARDWARE_ASN_MASK; asn = mmc & HARDWARE_ASN_MASK;
/* ptbr = virt_to_pfn(next_mm->pgd);
* Always update the PCB PTBR. If next is kernel thread, it must load_asn_ptbr(asn, ptbr);
* update PTBR. If next is user process, it's ok to update PTBR.
*/
task_thread_info(next)->pcb.ptbr = virt_to_pfn(next_mm->pgd);
load_asn_ptbr(task_thread_info(next)->pcb.asn, task_thread_info(next)->pcb.ptbr);
} }
extern void __load_new_mm_context(struct mm_struct *); #define switch_mm_irqs_off switch_mm_irqs_off
#ifdef CONFIG_SMP
#define check_mmu_context() \
do { \
int cpu = smp_processor_id(); \
cpu_data[cpu].asn_lock = 0; \
barrier(); \
if (cpu_data[cpu].need_new_asn) { \
struct mm_struct *mm = current->active_mm; \
cpu_data[cpu].need_new_asn = 0; \
if (!mm->context.asid[cpu]) \
__load_new_mm_context(mm); \
} \
} while (0)
#else
#define check_mmu_context() do { } while (0)
#endif
static inline void activate_mm(struct mm_struct *prev_mm, static inline void
struct mm_struct *next_mm) switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm,
struct task_struct *tsk)
{ {
__load_new_mm_context(next_mm); unsigned long flags;
local_irq_save(flags);
switch_mm_irqs_off(prev_mm, next_mm, tsk);
local_irq_restore(flags);
} }
#define activate_mm(prev, next) switch_mm(prev, next, current)
#define deactivate_mm(tsk, mm) do { } while (0) #define deactivate_mm(tsk, mm) do { } while (0)
static inline int init_new_context(struct task_struct *tsk, static inline int init_new_context(struct task_struct *tsk,
...@@ -169,8 +116,6 @@ static inline int init_new_context(struct task_struct *tsk, ...@@ -169,8 +116,6 @@ static inline int init_new_context(struct task_struct *tsk,
for_each_possible_cpu(i) for_each_possible_cpu(i)
mm->context.asid[i] = 0; mm->context.asid[i] = 0;
if (tsk != current)
task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd);
return 0; return 0;
} }
...@@ -182,7 +127,6 @@ static inline void destroy_context(struct mm_struct *mm) ...@@ -182,7 +127,6 @@ static inline void destroy_context(struct mm_struct *mm)
static inline void enter_lazy_tlb(struct mm_struct *mm, static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *tsk) struct task_struct *tsk)
{ {
task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd);
} }
static inline int arch_dup_mmap(struct mm_struct *oldmm, static inline int arch_dup_mmap(struct mm_struct *oldmm,
......
...@@ -40,12 +40,7 @@ struct pt_regs { ...@@ -40,12 +40,7 @@ struct pt_regs {
unsigned long r26; unsigned long r26;
unsigned long r27; unsigned long r27;
unsigned long r28; unsigned long r28;
unsigned long hae; /* These are saved by HMcode: */
/* JRP - These are the values provided to a0-a2 by HMcode */
unsigned long trap_a0;
unsigned long trap_a1;
unsigned long trap_a2;
/* These are saved by HMcode: */
unsigned long ps; unsigned long ps;
unsigned long pc; unsigned long pc;
unsigned long gp; unsigned long gp;
...@@ -54,7 +49,6 @@ struct pt_regs { ...@@ -54,7 +49,6 @@ struct pt_regs {
unsigned long r18; unsigned long r18;
}; };
#define arch_has_single_step() (1)
#define user_mode(regs) (((regs)->ps & 8) != 0) #define user_mode(regs) (((regs)->ps & 8) != 0)
#define instruction_pointer(regs) ((regs)->pc) #define instruction_pointer(regs) ((regs)->pc)
#define profile_pc(regs) instruction_pointer(regs) #define profile_pc(regs) instruction_pointer(regs)
......
...@@ -14,9 +14,11 @@ typedef struct { ...@@ -14,9 +14,11 @@ typedef struct {
unsigned long sig[_NSIG_WORDS]; unsigned long sig[_NSIG_WORDS];
} sigset_t; } sigset_t;
#ifdef CONFIG_OLD_SIGACTION struct odd_sigaction {
#define __ARCH_HAS_SA_RESTORER __sighandler_t sa_handler;
#endif old_sigset_t sa_mask;
int sa_flags;
};
#include <asm/sigcontext.h> #include <asm/sigcontext.h>
#endif #endif
...@@ -39,7 +39,7 @@ struct processor_state { ...@@ -39,7 +39,7 @@ struct processor_state {
struct callee_saved_fpregs fpregs; struct callee_saved_fpregs fpregs;
unsigned long fpcr; unsigned long fpcr;
#ifdef CONFIG_HIBERNATION #ifdef CONFIG_HIBERNATION
struct pcb_struct pcb; unsigned long sp;
struct vcpucb vcb; struct vcpucb vcb;
#endif #endif
}; };
......
...@@ -6,27 +6,39 @@ ...@@ -6,27 +6,39 @@
extern void __fpstate_save(struct task_struct *save_to); extern void __fpstate_save(struct task_struct *save_to);
extern void __fpstate_restore(struct task_struct *restore_from); extern void __fpstate_restore(struct task_struct *restore_from);
extern struct task_struct *__switch_to(unsigned long pcb, extern struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *prev, struct task_struct *next); struct task_struct *next);
extern void restore_da_match_after_sched(void); extern void restore_da_match_after_sched(void);
static inline void fpstate_save(struct task_struct *task) static inline void aux_save(struct task_struct *task)
{ {
if (likely(!(task->flags & PF_KTHREAD))) struct pcb_struct *pcb;
if (likely(!(task->flags & PF_KTHREAD))) {
pcb = &task_thread_info(task)->pcb;
pcb->usp = rdusp();
pcb->tp = rtid();
__fpstate_save(task); __fpstate_save(task);
}
} }
static inline void fpstate_restore(struct task_struct *task) static inline void aux_restore(struct task_struct *task)
{ {
if (likely(!(task->flags & PF_KTHREAD))) struct pcb_struct *pcb;
if (likely(!(task->flags & PF_KTHREAD))) {
pcb = &task_thread_info(task)->pcb;
wrusp(pcb->usp);
wrtp(pcb->tp);
__fpstate_restore(task); __fpstate_restore(task);
}
} }
static inline void __switch_to_aux(struct task_struct *prev, static inline void __switch_to_aux(struct task_struct *prev,
struct task_struct *next) struct task_struct *next)
{ {
fpstate_save(prev); aux_save(prev);
fpstate_restore(next); aux_restore(next);
} }
...@@ -34,10 +46,8 @@ static inline void __switch_to_aux(struct task_struct *prev, ...@@ -34,10 +46,8 @@ static inline void __switch_to_aux(struct task_struct *prev,
do { \ do { \
struct task_struct *__prev = (prev); \ struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \ struct task_struct *__next = (next); \
__u64 __nextpcb = virt_to_phys(&task_thread_info(__next)->pcb); \
__switch_to_aux(__prev, __next); \ __switch_to_aux(__prev, __next); \
(last) = __switch_to(__nextpcb, __prev, __next); \ (last) = __switch_to(__prev, __next); \
check_mmu_context(); \
} while (0) } while (0)
......
...@@ -15,13 +15,8 @@ typedef struct { ...@@ -15,13 +15,8 @@ typedef struct {
struct pcb_struct { struct pcb_struct {
unsigned long ksp;
unsigned long usp; unsigned long usp;
unsigned long ptbr; unsigned long tp;
unsigned int pcc;
unsigned int asn;
unsigned long unique;
unsigned long flags;
unsigned long da_match, da_mask; unsigned long da_match, da_mask;
unsigned long dv_match, dv_mask; unsigned long dv_match, dv_mask;
unsigned long dc_ctl; unsigned long dc_ctl;
...@@ -39,14 +34,19 @@ struct thread_info { ...@@ -39,14 +34,19 @@ struct thread_info {
int preempt_count; /* 0 => preemptible, <0 => BUG */ int preempt_count; /* 0 => preemptible, <0 => BUG */
unsigned int status; /* thread-synchronous flags */ unsigned int status; /* thread-synchronous flags */
int bpt_nsaved;
unsigned long bpt_addr[2]; /* breakpoint handling */
unsigned int bpt_insn[2];
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
unsigned long dyn_ftrace_addr; unsigned long dyn_ftrace_addr;
#endif #endif
}; };
static __always_inline u64 rtid(void)
{
u64 val;
asm volatile("rtid %0" : "=r" (val) : :);
return val;
}
/* /*
* Macros/functions for gaining access to the thread information structure. * Macros/functions for gaining access to the thread information structure.
*/ */
......
...@@ -8,13 +8,26 @@ ...@@ -8,13 +8,26 @@
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/hw_init.h> #include <asm/hw_init.h>
#include <asm/hmcall.h> #include <asm/hmcall.h>
#include <asm/mmu_context.h>
extern void __load_new_mm_context(struct mm_struct *);
static inline void flush_tlb_current(struct mm_struct *mm) static inline void flush_tlb_current(struct mm_struct *mm)
{ {
__load_new_mm_context(mm); unsigned long mmc, asn, ptbr, flags;
local_irq_save(flags);
mmc = __get_new_mm_context(mm, smp_processor_id());
mm->context.asid[smp_processor_id()] = mmc;
/*
* Force a new ASN for a task. Note that there is no way to
* write UPN only now, so call load_asn_ptbr here.
*/
asn = mmc & HARDWARE_ASN_MASK;
ptbr = virt_to_pfn(mm->pgd);
load_asn_ptbr(asn, ptbr);
local_irq_restore(flags);
} }
/* /*
...@@ -27,12 +40,10 @@ static inline void flush_tlb_current_page(struct mm_struct *mm, ...@@ -27,12 +40,10 @@ static inline void flush_tlb_current_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long addr) unsigned long addr)
{ {
if (vma->vm_flags & VM_EXEC) { if (vma->vm_flags & VM_EXEC)
tbi(3, addr); tbis(addr);
if (icache_is_vivt_no_ictag()) else
imb(); tbisd(addr);
} else
tbi(2, addr);
} }
...@@ -65,7 +76,7 @@ static inline void flush_tlb_other(struct mm_struct *mm) ...@@ -65,7 +76,7 @@ static inline void flush_tlb_other(struct mm_struct *mm)
*/ */
static inline void flush_tlb_all(void) static inline void flush_tlb_all(void)
{ {
tbia(); tbiv();
} }
/* Flush a specified user mapping. */ /* Flush a specified user mapping. */
......
...@@ -292,6 +292,8 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long len) ...@@ -292,6 +292,8 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long len)
{ {
return __copy_user((__force void *)to, from, len); return __copy_user((__force void *)to, from, len);
} }
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
extern long __clear_user(void __user *to, long len); extern long __clear_user(void __user *to, long len);
......
...@@ -32,7 +32,7 @@ struct vcpucb { ...@@ -32,7 +32,7 @@ struct vcpucb {
unsigned long vcpu_irq_disabled; unsigned long vcpu_irq_disabled;
unsigned long vcpu_irq; unsigned long vcpu_irq;
unsigned long ptbr; unsigned long ptbr;
unsigned long int_stat0; unsigned long tid;
unsigned long int_stat1; unsigned long int_stat1;
unsigned long int_stat2; unsigned long int_stat2;
unsigned long int_stat3; unsigned long int_stat3;
......
...@@ -7,8 +7,10 @@ ...@@ -7,8 +7,10 @@
#define HMC_bpt 0x80 #define HMC_bpt 0x80
#define HMC_callsys 0x83 #define HMC_callsys 0x83
#define HMC_imb 0x86 #define HMC_imb 0x86
#define HMC_rdunique 0x9E #define HMC_rdtp 0x9E
#define HMC_wrunique 0x9F #define HMC_wrtp 0x9F
#define HMC_rdunique HMC_rdtp
#define HMC_wrunique HMC_wrtp
#define HMC_gentrap 0xAA #define HMC_gentrap 0xAA
#define HMC_wrperfmon 0xB0 #define HMC_wrperfmon 0xB0
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
/* /*
* KVM SW specific structures and definitions. * KVM SW specific structures and definitions.
*/ */
#define SWVM_IRQS 64 #define SWVM_IRQS 256
enum SW64_KVM_IRQ { enum SW64_KVM_IRQ {
SW64_KVM_IRQ_IPI = 27, SW64_KVM_IRQ_IPI = 27,
SW64_KVM_IRQ_TIMER = 9, SW64_KVM_IRQ_TIMER = 9,
......
...@@ -36,7 +36,8 @@ struct user_fpsimd_state { ...@@ -36,7 +36,8 @@ struct user_fpsimd_state {
#define FPREG_END 62 #define FPREG_END 62
#define FPCR 63 #define FPCR 63
#define PC 64 #define PC 64
#define UNIQUE 65 #define TP 65
#define UNIQUE TP
#define VECREG_BASE 67 #define VECREG_BASE 67
#define VECREG_END 161 #define VECREG_END 161
#define F31_V1 98 #define F31_V1 98
......
...@@ -2,15 +2,13 @@ ...@@ -2,15 +2,13 @@
#ifndef _UAPI_ASM_SW64_SIGCONTEXT_H #ifndef _UAPI_ASM_SW64_SIGCONTEXT_H
#define _UAPI_ASM_SW64_SIGCONTEXT_H #define _UAPI_ASM_SW64_SIGCONTEXT_H
/*
* Signal context structure
*
* The context is saved before a signal handler is invoked, and it is
* restored by sys_sigreturn / sys_rt_sigreturn.
*/
struct sigcontext { struct sigcontext {
/*
* What should we have here? I'd probably better use the same
* stack layout as DEC Unix, just in case we ever want to try
* running their binaries..
*
* This is the basic layout, but I don't know if we'll ever
* actually fill in all the values..
*/
long sc_onstack; long sc_onstack;
long sc_mask; long sc_mask;
long sc_pc; long sc_pc;
...@@ -19,6 +17,7 @@ struct sigcontext { ...@@ -19,6 +17,7 @@ struct sigcontext {
long sc_ownedfp; long sc_ownedfp;
long sc_fpregs[128]; /* SIMD-FP */ long sc_fpregs[128]; /* SIMD-FP */
unsigned long sc_fpcr; unsigned long sc_fpcr;
/* TODO: Following are unused, to be removed and synced with libc */
unsigned long sc_fp_control; unsigned long sc_fp_control;
unsigned long sc_reserved1, sc_reserved2; unsigned long sc_reserved1, sc_reserved2;
unsigned long sc_ssize; unsigned long sc_ssize;
......
...@@ -17,7 +17,7 @@ obj-y := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \ ...@@ -17,7 +17,7 @@ obj-y := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \
irq_sw64.o signal.o setup.o ptrace.o time.o \ irq_sw64.o signal.o setup.o ptrace.o time.o \
systbls.o dup_print.o tc.o timer.o \ systbls.o dup_print.o tc.o timer.o \
insn.o early_init.o topology.o cacheinfo.o \ insn.o early_init.o topology.o cacheinfo.o \
vdso.o vdso/ vdso.o vdso/ hmcall.o
obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_STACKTRACE) += stacktrace.o
...@@ -31,9 +31,13 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o ...@@ -31,9 +31,13 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o
obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_PCI) += pci_common.o obj-$(CONFIG_PCI) += pci_common.o
obj-$(CONFIG_RELOCATABLE) += relocate.o obj-$(CONFIG_RELOCATABLE) += relocate.o
obj-$(CONFIG_DEBUG_FS) += segvdbg.o bindvcpu.o obj-$(CONFIG_DEBUG_FS) += segvdbg.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifeq ($(CONFIG_DEBUG_FS)$(CONFIG_NUMA),yy)
obj-y += bindvcpu.o
endif
ifndef CONFIG_PCI ifndef CONFIG_PCI
obj-y += pci-noop.o obj-y += pci-noop.o
endif endif
......
...@@ -33,9 +33,8 @@ void foo(void) ...@@ -33,9 +33,8 @@ void foo(void)
OFFSET(PSTATE_FPREGS, processor_state, fpregs); OFFSET(PSTATE_FPREGS, processor_state, fpregs);
OFFSET(PSTATE_FPCR, processor_state, fpcr); OFFSET(PSTATE_FPCR, processor_state, fpcr);
#ifdef CONFIG_HIBERNATION #ifdef CONFIG_HIBERNATION
OFFSET(PSTATE_PCB, processor_state, pcb); OFFSET(PSTATE_SP, processor_state, sp);
#endif #endif
OFFSET(PCB_KSP, pcb_struct, ksp);
OFFSET(PBE_ADDR, pbe, address); OFFSET(PBE_ADDR, pbe, address);
OFFSET(PBE_ORIG_ADDR, pbe, orig_address); OFFSET(PBE_ORIG_ADDR, pbe, orig_address);
OFFSET(PBE_NEXT, pbe, next); OFFSET(PBE_NEXT, pbe, next);
...@@ -89,9 +88,6 @@ void foo(void) ...@@ -89,9 +88,6 @@ void foo(void)
DEFINE(PT_REGS_R26, offsetof(struct pt_regs, r26)); DEFINE(PT_REGS_R26, offsetof(struct pt_regs, r26));
DEFINE(PT_REGS_R27, offsetof(struct pt_regs, r27)); DEFINE(PT_REGS_R27, offsetof(struct pt_regs, r27));
DEFINE(PT_REGS_R28, offsetof(struct pt_regs, r28)); DEFINE(PT_REGS_R28, offsetof(struct pt_regs, r28));
DEFINE(PT_REGS_TRAP_A0, offsetof(struct pt_regs, trap_a0));
DEFINE(PT_REGS_TRAP_A1, offsetof(struct pt_regs, trap_a1));
DEFINE(PT_REGS_TRAP_A2, offsetof(struct pt_regs, trap_a2));
DEFINE(PT_REGS_PS, offsetof(struct pt_regs, ps)); DEFINE(PT_REGS_PS, offsetof(struct pt_regs, ps));
DEFINE(PT_REGS_PC, offsetof(struct pt_regs, pc)); DEFINE(PT_REGS_PC, offsetof(struct pt_regs, pc));
DEFINE(PT_REGS_GP, offsetof(struct pt_regs, gp)); DEFINE(PT_REGS_GP, offsetof(struct pt_regs, gp));
...@@ -222,4 +218,5 @@ void foo(void) ...@@ -222,4 +218,5 @@ void foo(void)
OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]); OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]);
OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]); OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]);
BLANK(); BLANK();
DEFINE(ASM_THREAD_SIZE, THREAD_SIZE);
} }
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/debug.h> #include <asm/debug.h>
extern bool bind_vcpu_enabled; __read_mostly bool bind_vcpu_enabled;
EXPORT_SYMBOL(bind_vcpu_enabled);
static int __init bind_vcpu_init(void) static int __init bind_vcpu_init(void)
{ {
......
...@@ -109,14 +109,21 @@ struct clk *sw64_clk_get(struct device *dev, const char *id) ...@@ -109,14 +109,21 @@ struct clk *sw64_clk_get(struct device *dev, const char *id)
} }
EXPORT_SYMBOL(sw64_clk_get); EXPORT_SYMBOL(sw64_clk_get);
unsigned long sw64_clk_get_rate(struct clk *clk) unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy)
{ {
if (!clk) int i;
return 0; u64 val;
return (unsigned long)clk->rate; val = sw64_io_read(0, CLK_CTL);
val = val >> CORE_PLL2_CFG_SHIFT;
for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) {
if (cpu_freq[val] == cpu_freq[i])
return cpu_freq[i];
}
return 0;
} }
EXPORT_SYMBOL(sw64_clk_get_rate); EXPORT_SYMBOL(__sw64_cpufreq_get);
void sw64_store_policy(struct cpufreq_policy *policy) void sw64_store_policy(struct cpufreq_policy *policy)
{ {
...@@ -124,15 +131,17 @@ void sw64_store_policy(struct cpufreq_policy *policy) ...@@ -124,15 +131,17 @@ void sw64_store_policy(struct cpufreq_policy *policy)
} }
EXPORT_SYMBOL_GPL(sw64_store_policy); EXPORT_SYMBOL_GPL(sw64_store_policy);
int sw64_set_rate(int index, unsigned long rate) void sw64_set_rate(unsigned long rate)
{ {
unsigned int i, val; unsigned int i, val;
int index = -1;
rate /= 1000000; rate /= 1000000;
for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) { for (i = 0; i < sizeof(cpu_freq)/sizeof(int); i++) {
if (rate == cpu_freq[i]) { if (rate == cpu_freq[i]) {
index = i; index = i;
update_cpu_freq(cpu_freq[i]);
break; break;
} }
} }
...@@ -178,7 +187,5 @@ int sw64_set_rate(int index, unsigned long rate) ...@@ -178,7 +187,5 @@ int sw64_set_rate(int index, unsigned long rate)
/* LV1 select PLL0/PLL1 */ /* LV1 select PLL0/PLL1 */
sw64_io_write(0, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); sw64_io_write(0, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT);
sw64_io_write(1, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); sw64_io_write(1, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT);
return index;
} }
EXPORT_SYMBOL_GPL(sw64_set_rate); EXPORT_SYMBOL_GPL(sw64_set_rate);
...@@ -23,6 +23,7 @@ static void __init sw64_setup_platform_ops(void) ...@@ -23,6 +23,7 @@ static void __init sw64_setup_platform_ops(void)
asmlinkage __visible void __init sw64_start_kernel(void) asmlinkage __visible void __init sw64_start_kernel(void)
{ {
fixup_hmcall();
sw64_setup_chip_ops(); sw64_setup_chip_ops();
sw64_setup_platform_ops(); sw64_setup_platform_ops();
sw64_platform->ops_fixup(); sw64_platform->ops_fixup();
......
...@@ -14,11 +14,10 @@ ...@@ -14,11 +14,10 @@
/* /*
* This defines the normal kernel pt-regs layout. * This defines the normal kernel pt-regs layout.
* *
* regs 9-15 preserved by C code * regs 9-15 preserved by C code, saving to pt_regs will make
* them easier to be accessed in an unified way.
* regs 16-18 saved by HMcode * regs 16-18 saved by HMcode
* regs 29-30 saved and set up by HMcode * regs 29-30 saved and set up by HMcode
* JRP - Save regs 16-18 in a special area of the stack, so that
* the hmcode-provided values are available to the signal handler.
*/ */
.macro SAVE_COMMON_REGS .macro SAVE_COMMON_REGS
...@@ -42,9 +41,6 @@ ...@@ -42,9 +41,6 @@
stl $25, PT_REGS_R25($sp) stl $25, PT_REGS_R25($sp)
stl $26, PT_REGS_R26($sp) stl $26, PT_REGS_R26($sp)
stl $27, PT_REGS_R27($sp) stl $27, PT_REGS_R27($sp)
stl $16, PT_REGS_TRAP_A0($sp)
stl $17, PT_REGS_TRAP_A1($sp)
stl $18, PT_REGS_TRAP_A2($sp)
.endm .endm
.macro RESTORE_COMMON_REGS .macro RESTORE_COMMON_REGS
...@@ -384,11 +380,10 @@ $syscall_trace_failed: ...@@ -384,11 +380,10 @@ $syscall_trace_failed:
* Integer register context switch * Integer register context switch
* The callee-saved registers must be saved and restored. * The callee-saved registers must be saved and restored.
* *
* a0: physical address of next task's pcb, used by hmcode * a0: previous task_struct (must be preserved across the switch)
* a1: previous task_struct (must be preserved across the switch) * a1: next task_struct
* a2: next task_struct
* *
* The value of a1 must be preserved by this function, as that's how * The value of a0 must be preserved by this function, as that's how
* arguments are passed to schedule_tail. * arguments are passed to schedule_tail.
*/ */
.align 4 .align 4
...@@ -397,33 +392,28 @@ $syscall_trace_failed: ...@@ -397,33 +392,28 @@ $syscall_trace_failed:
__switch_to: __switch_to:
.prologue 0 .prologue 0
/* Save context into prev->thread */ /* Save context into prev->thread */
stl $26, TASK_THREAD_RA($17) stl $26, TASK_THREAD_RA($16)
stl $30, TASK_THREAD_SP($17) stl $30, TASK_THREAD_SP($16)
stl $9, TASK_THREAD_S0($17) stl $9, TASK_THREAD_S0($16)
stl $10, TASK_THREAD_S1($17) stl $10, TASK_THREAD_S1($16)
stl $11, TASK_THREAD_S2($17) stl $11, TASK_THREAD_S2($16)
stl $12, TASK_THREAD_S3($17) stl $12, TASK_THREAD_S3($16)
stl $13, TASK_THREAD_S4($17) stl $13, TASK_THREAD_S4($16)
stl $14, TASK_THREAD_S5($17) stl $14, TASK_THREAD_S5($16)
stl $15, TASK_THREAD_S6($17) stl $15, TASK_THREAD_S6($16)
/* Restore context from next->thread */ /* Restore context from next->thread */
ldl $26, TASK_THREAD_RA($18) ldl $26, TASK_THREAD_RA($17)
ldl $9, TASK_THREAD_S0($18) ldl $30, TASK_THREAD_SP($17)
ldl $10, TASK_THREAD_S1($18) ldl $9, TASK_THREAD_S0($17)
ldl $11, TASK_THREAD_S2($18) ldl $10, TASK_THREAD_S1($17)
ldl $12, TASK_THREAD_S3($18) ldl $11, TASK_THREAD_S2($17)
ldl $13, TASK_THREAD_S4($18) ldl $12, TASK_THREAD_S3($17)
ldl $14, TASK_THREAD_S5($18) ldl $13, TASK_THREAD_S4($17)
ldl $15, TASK_THREAD_S6($18) ldl $14, TASK_THREAD_S5($17)
sys_call HMC_swpctx ldl $15, TASK_THREAD_S6($17)
/*
* SP has been saved and restored by HMC_swpctx,
* and restore it again here for future expansion.
*/
ldl $30, TASK_THREAD_SP($18)
ldi $8, 0x3fff ldi $8, 0x3fff
bic $sp, $8, $8 bic $sp, $8, $8
mov $17, $0 mov $16, $0
ret ret
.end __switch_to .end __switch_to
...@@ -436,8 +426,7 @@ __switch_to: ...@@ -436,8 +426,7 @@ __switch_to:
.ent ret_from_fork .ent ret_from_fork
ret_from_fork: ret_from_fork:
ldi $26, ret_from_sys_call ldi $26, ret_from_sys_call
mov $17, $16 call $31, schedule_tail
jmp $31, schedule_tail
.end ret_from_fork .end ret_from_fork
/* /*
...@@ -447,7 +436,6 @@ ret_from_fork: ...@@ -447,7 +436,6 @@ ret_from_fork:
.globl ret_from_kernel_thread .globl ret_from_kernel_thread
.ent ret_from_kernel_thread .ent ret_from_kernel_thread
ret_from_kernel_thread: ret_from_kernel_thread:
mov $17, $16
call $26, schedule_tail call $26, schedule_tail
mov $9, $27 mov $9, $27
mov $10, $16 mov $10, $16
......
...@@ -24,7 +24,7 @@ __start: ...@@ -24,7 +24,7 @@ __start:
/* We need to get current_task_info loaded up... */ /* We need to get current_task_info loaded up... */
ldi $8, init_thread_union ldi $8, init_thread_union
/* ... and find our stack ... */ /* ... and find our stack ... */
ldi $30, 0x4000 - PT_REGS_SIZE($8) ldi $30, ASM_THREAD_SIZE($8)
/* ... and then we can clear bss data. */ /* ... and then we can clear bss data. */
ldi $2, __bss_start ldi $2, __bss_start
ldi $3, __bss_stop ldi $3, __bss_stop
...@@ -51,7 +51,7 @@ __start: ...@@ -51,7 +51,7 @@ __start:
ldl $29, 0($30) ldl $29, 0($30)
addl $29, $0, $29 addl $29, $0, $29
/* Repoint the sp into the new kernel image */ /* Repoint the sp into the new kernel image */
ldi $30, 0x4000 - PT_REGS_SIZE($8) ldi $30, ASM_THREAD_SIZE($8)
#endif #endif
/* ... and then we can start the kernel. */ /* ... and then we can start the kernel. */
call $26, sw64_start_kernel call $26, sw64_start_kernel
...@@ -71,24 +71,20 @@ __smp_callin: ...@@ -71,24 +71,20 @@ __smp_callin:
br $27, 2f # we copy this from above "br $27 1f" br $27, 2f # we copy this from above "br $27 1f"
2: ldgp $29, 0($27) # First order of business, load the GP. 2: ldgp $29, 0($27) # First order of business, load the GP.
subl $31, 2, $16 bis $31, $31, $16 # invalidate all TLB with current VPN
sys_call HMC_tbi sys_call HMC_tbi
sys_call HMC_whami # Get hard cid sys_call HMC_whami # Get hard cid
sll $0, 2, $0
ldi $1, __rcid_to_cpu ldi $1, __rcid_to_cpu
addl $1, $0, $1 s4addl $0, $1, $1
ldw $0, 0($1) # Get logical cpu number ldw $0, 0($1) # Get logical cpu number
sll $0, 3, $0 ldi $2, tidle_ksp
ldi $1, tidle_pcb s8addl $0, $2, $2
addl $1, $0, $1 ldl $30, 0($2) # Get ksp of idle thread
ldl $16, 0($1) # Get PCBB of idle thread
sys_call HMC_swpctx ldi $8, -ASM_THREAD_SIZE($30) # Find "current"
ldi $8, 0x3fff # Find "current".
bic $30, $8, $8
call $26, smp_callin call $26, smp_callin
sys_call HMC_halt sys_call HMC_halt
......
...@@ -14,7 +14,7 @@ void save_processor_state(void) ...@@ -14,7 +14,7 @@ void save_processor_state(void)
vcb->ksp = rdksp(); vcb->ksp = rdksp();
vcb->usp = rdusp(); vcb->usp = rdusp();
vcb->pcbb = rdpcbb(); vcb->tid = rtid();
vcb->ptbr = rdptbr(); vcb->ptbr = rdptbr();
} }
...@@ -24,11 +24,10 @@ void restore_processor_state(void) ...@@ -24,11 +24,10 @@ void restore_processor_state(void)
wrksp(vcb->ksp); wrksp(vcb->ksp);
wrusp(vcb->usp); wrusp(vcb->usp);
wrpcbb(vcb->pcbb); wrtp(vcb->tid);
wrptbr(vcb->ptbr); wrptbr(vcb->ptbr);
sflush(); sflush();
tbia(); tbiv();
imb();
} }
int swsusp_arch_resume(void) int swsusp_arch_resume(void)
......
...@@ -30,8 +30,7 @@ ENTRY(swsusp_arch_suspend) ...@@ -30,8 +30,7 @@ ENTRY(swsusp_arch_suspend)
rfpcr $f0 rfpcr $f0
fstd $f0, PSTATE_FPCR($16) fstd $f0, PSTATE_FPCR($16)
ldi $1, PSTATE_PCB($16) stl sp, PSTATE_SP($16)
stl sp, PCB_KSP($1)
call swsusp_save call swsusp_save
ldi $16, hibernate_state ldi $16, hibernate_state
ldi $1, PSTATE_REGS($16) ldi $1, PSTATE_REGS($16)
...@@ -112,8 +111,7 @@ $hibernate_setfpec_over: ...@@ -112,8 +111,7 @@ $hibernate_setfpec_over:
vldd $f8, CALLEE_F8($1) vldd $f8, CALLEE_F8($1)
vldd $f9, CALLEE_F9($1) vldd $f9, CALLEE_F9($1)
ldi $1, PSTATE_PCB($16) ldl sp, PSTATE_SP($16)
ldl sp, PCB_KSP($1)
ldi $8, 0x3fff ldi $8, 0x3fff
bic sp, $8, $8 bic sp, $8, $8
......
// SPDX-License-Identifier: GPL-2.0
/*
* arch/sw_64/kernel/hmcall.c
*
* Copyright (C) 2022 WXIAT
* Author: He Sheng
*/
#include <asm/hmcall.h>
#include <asm/page.h>
#define A0(func) (((HMC_##func & 0xFF) >> 6) & 0x1)
#define A1(func) ((((HMC_##func & 0xFF)>>6) & 0x2) >> 1)
#define A2(func) ((HMC_##func & 0x3F) << 7)
#define T(func) ((A0(func) ^ A1(func)) & 0x1)
#define B0(func) ((T(func) | A0(func)) << 13)
#define B1(func) (((~T(func) & 1) | A1(func)) << 14)
#define PRI_BASE 0x10000UL
#define HMCALL_ENTRY(func) (PRI_BASE | B1(func) | B0(func) | A2(func))
static inline void fixup_rdtp(void)
{
unsigned int *entry = __va(HMCALL_ENTRY(rdtp));
entry[0] = 0x181ffec7; /* pri_rcsr $0, CSR__TID */
entry[1] = 0x1ee00000; /* pri_ret $23 */
}
static inline void fixup_wrtp(void)
{
unsigned int *entry = __va(HMCALL_ENTRY(wrtp));
entry[0] = 0x1a1fffc7; /* pri_wcsr $16, CSR__TID */
entry[1] = 0x1ee00000; /* pri_ret $23 */
}
void __init fixup_hmcall(void)
{
#if defined(CONFIG_SUBARCH_C3A) || defined(CONFIG_SUBARCH_C3B)
fixup_rdtp();
fixup_wrtp();
#endif
}
#undef A0
#undef A1
#undef A2
#undef T
#undef B0
#undef B1
...@@ -9,15 +9,6 @@ ...@@ -9,15 +9,6 @@
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/irq_impl.h> #include <asm/irq_impl.h>
asmlinkage void
do_entInt(unsigned long type, unsigned long vector,
unsigned long irq_arg, struct pt_regs *regs)
{
local_irq_disable();
handle_chip_irq(type, vector, irq_arg, regs);
}
EXPORT_SYMBOL(do_entInt);
void __init void __init
init_IRQ(void) init_IRQ(void)
{ {
......
...@@ -95,7 +95,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { ...@@ -95,7 +95,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
{ "pc", 8, offsetof(struct pt_regs, pc)}, { "pc", 8, offsetof(struct pt_regs, pc)},
{ "", 8, -1 }, { "", 8, -1 },
{ "unique", 8, -1}, { "tp", 8, -1},
}; };
char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
......
...@@ -204,9 +204,6 @@ void machine_kexec(struct kimage *image) ...@@ -204,9 +204,6 @@ void machine_kexec(struct kimage *image)
pr_info("Will call new kernel at %08lx\n", image->start); pr_info("Will call new kernel at %08lx\n", image->start);
pr_info("Bye ...\n"); pr_info("Bye ...\n");
//flush_cache_all();
//sflush();
//tbia();
smp_wmb(); smp_wmb();
((noretfun_t) reboot_code_buffer)(); ((noretfun_t) reboot_code_buffer)();
} }
...@@ -614,7 +614,8 @@ void __init sw64_init_arch(void) ...@@ -614,7 +614,8 @@ void __init sw64_init_arch(void)
cpu_num = sw64_chip->get_cpu_num(); cpu_num = sw64_chip->get_cpu_num();
for (node = 0; node < cpu_num; node++) { for (node = 0; node < cpu_num; node++) {
set_devint_wken(node); if (is_in_host())
set_devint_wken(node);
rc_enable = sw64_chip_init->pci_init.get_rc_enable(node); rc_enable = sw64_chip_init->pci_init.get_rc_enable(node);
if (rc_enable == 0) { if (rc_enable == 0) {
printk("PCIe is disabled on node %ld\n", node); printk("PCIe is disabled on node %ld\n", node);
......
...@@ -28,6 +28,6 @@ u64 perf_reg_abi(struct task_struct *task) ...@@ -28,6 +28,6 @@ u64 perf_reg_abi(struct task_struct *task)
void perf_get_regs_user(struct perf_regs *regs_user, void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs) struct pt_regs *regs)
{ {
regs_user->regs = NULL; regs_user->regs = task_pt_regs(current);
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->abi = perf_reg_abi(current);
} }
...@@ -52,7 +52,7 @@ void arch_cpu_idle(void) ...@@ -52,7 +52,7 @@ void arch_cpu_idle(void)
static void common_shutdown_1(void *generic_ptr) static void common_shutdown_1(void *generic_ptr)
{ {
struct halt_info *how = (struct halt_info *)generic_ptr; struct halt_info *how = (struct halt_info *)generic_ptr;
int cpuid = smp_processor_id(); int cpuid __maybe_unused = smp_processor_id();
/* No point in taking interrupts anymore. */ /* No point in taking interrupts anymore. */
local_irq_disable(); local_irq_disable();
...@@ -102,17 +102,6 @@ void machine_power_off(void) ...@@ -102,17 +102,6 @@ void machine_power_off(void)
} }
/* Used by sysrq-p, among others. I don't believe r9-r15 are ever
* saved in the context it's used.
*/
void
show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
dik_show_regs(regs);
}
/* /*
* Re-start a thread when doing execve() * Re-start a thread when doing execve()
*/ */
...@@ -136,7 +125,7 @@ flush_thread(void) ...@@ -136,7 +125,7 @@ flush_thread(void)
wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0)); wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0));
/* Clean slate for TLS. */ /* Clean slate for TLS. */
current_thread_info()->pcb.unique = 0; current_thread_info()->pcb.tp = 0;
} }
void void
...@@ -146,7 +135,11 @@ release_thread(struct task_struct *dead_task) ...@@ -146,7 +135,11 @@ release_thread(struct task_struct *dead_task)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{ {
fpstate_save(src); /*
* aux_save() has to read the current TLS pointer from CSR:TID as it
* may be out-of-sync with the saved value.
*/
aux_save(src);
*dst = *src; *dst = *src;
return 0; return 0;
} }
...@@ -167,8 +160,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, ...@@ -167,8 +160,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *childregs = task_pt_regs(p);
struct pt_regs *regs = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
childti->pcb.ksp = (unsigned long) childregs;
childti->pcb.flags = 7; /* set FEN, clear everything else */
p->thread.sp = (unsigned long) childregs; p->thread.sp = (unsigned long) childregs;
if (unlikely(p->flags & PF_KTHREAD)) { if (unlikely(p->flags & PF_KTHREAD)) {
...@@ -180,6 +171,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, ...@@ -180,6 +171,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
childti->pcb.usp = 0; childti->pcb.usp = 0;
return 0; return 0;
} }
/* /*
* Note: if CLONE_SETTLS is not set, then we must inherit the * Note: if CLONE_SETTLS is not set, then we must inherit the
* value from the parent, which will have been set by the block * value from the parent, which will have been set by the block
...@@ -188,10 +180,11 @@ copy_thread(unsigned long clone_flags, unsigned long usp, ...@@ -188,10 +180,11 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
* application calling fork. * application calling fork.
*/ */
if (clone_flags & CLONE_SETTLS) if (clone_flags & CLONE_SETTLS)
childti->pcb.unique = tls; childti->pcb.tp = regs->r20;
else else
regs->r20 = 0; regs->r20 = 0;
childti->pcb.usp = usp ?: rdusp(); if (usp)
childti->pcb.usp = usp;
*childregs = *regs; *childregs = *regs;
childregs->r0 = 0; childregs->r0 = 0;
childregs->r19 = 0; childregs->r19 = 0;
...@@ -214,7 +207,7 @@ void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *regs) ...@@ -214,7 +207,7 @@ void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *regs)
dest[i] = *(__u64 *)((void *)regs + regoffsets[i]); dest[i] = *(__u64 *)((void *)regs + regoffsets[i]);
dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp;
dest[31] = regs->pc; dest[31] = regs->pc;
dest[32] = ti->pcb.unique; dest[32] = ti->pcb.tp;
} }
EXPORT_SYMBOL(sw64_elf_core_copy_regs); EXPORT_SYMBOL(sw64_elf_core_copy_regs);
......
...@@ -7,13 +7,9 @@ ...@@ -7,13 +7,9 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/sw64io.h> #include <asm/sw64io.h>
/* ptrace.c */
extern int ptrace_set_bpt(struct task_struct *child);
extern int ptrace_cancel_bpt(struct task_struct *child);
/* traps.c */ /* traps.c */
extern void dik_show_regs(struct pt_regs *regs); extern void show_regs(struct pt_regs *regs);
extern void die_if_kernel(char *str, struct pt_regs *regs, long err); extern void die(char *str, struct pt_regs *regs, long err);
/* timer.c */ /* timer.c */
extern void setup_timer(void); extern void setup_timer(void);
......
...@@ -72,7 +72,7 @@ short regoffsets[32] = { ...@@ -72,7 +72,7 @@ short regoffsets[32] = {
static int pcboff[] = { static int pcboff[] = {
[USP] = PCB_OFF(usp), [USP] = PCB_OFF(usp),
[UNIQUE] = PCB_OFF(unique), [TP] = PCB_OFF(tp),
[DA_MATCH] = PCB_OFF(da_match), [DA_MATCH] = PCB_OFF(da_match),
[DA_MASK] = PCB_OFF(da_mask), [DA_MASK] = PCB_OFF(da_mask),
[DV_MATCH] = PCB_OFF(dv_match), [DV_MATCH] = PCB_OFF(dv_match),
...@@ -154,119 +154,12 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data) ...@@ -154,119 +154,12 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
return 0; return 0;
} }
static inline int
read_int(struct task_struct *task, unsigned long addr, int *data)
{
int copied = access_process_vm(task, addr, data, sizeof(int), FOLL_FORCE);
return (copied == sizeof(int)) ? 0 : -EIO;
}
static inline int
write_int(struct task_struct *task, unsigned long addr, int data)
{
int copied = access_process_vm(task, addr, &data, sizeof(int),
FOLL_FORCE | FOLL_WRITE);
return (copied == sizeof(int)) ? 0 : -EIO;
}
/*
* Set breakpoint.
*/
int
ptrace_set_bpt(struct task_struct *child)
{
int displ, i, res, reg_b, nsaved = 0;
unsigned int insn, op_code;
unsigned long pc;
pc = get_reg(child, REG_PC);
res = read_int(child, pc, (int *)&insn);
if (res < 0)
return res;
op_code = insn >> 26;
/* br bsr beq bne blt ble bgt bge blbc blbs fbeq fbne fblt fble fbgt fbge */
if ((1UL << op_code) & 0x3fff000000000030UL) {
/*
* It's a branch: instead of trying to figure out
* whether the branch will be taken or not, we'll put
* a breakpoint at either location. This is simpler,
* more reliable, and probably not a whole lot slower
* than the alternative approach of emulating the
* branch (emulation can be tricky for fp branches).
*/
displ = ((s32)(insn << 11)) >> 9;
task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
if (displ) /* guard against unoptimized code */
task_thread_info(child)->bpt_addr[nsaved++]
= pc + 4 + displ;
/*call ret jmp*/
} else if (op_code >= 0x1 && op_code <= 0x3) {
reg_b = (insn >> 16) & 0x1f;
task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b);
} else {
task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
}
/* install breakpoints: */
for (i = 0; i < nsaved; ++i) {
res = read_int(child, task_thread_info(child)->bpt_addr[i],
(int *)&insn);
if (res < 0)
return res;
task_thread_info(child)->bpt_insn[i] = insn;
res = write_int(child, task_thread_info(child)->bpt_addr[i],
BREAKINST);
if (res < 0)
return res;
}
task_thread_info(child)->bpt_nsaved = nsaved;
return 0;
}
/* /*
* Ensure no single-step breakpoint is pending. Returns non-zero * Called by ptrace_detach
* value if child was being single-stepped.
*/
int
ptrace_cancel_bpt(struct task_struct *child)
{
int i, nsaved = task_thread_info(child)->bpt_nsaved;
task_thread_info(child)->bpt_nsaved = 0;
if (nsaved > 2) {
printk("%s: bogus nsaved: %d!\n", __func__, nsaved);
nsaved = 2;
}
for (i = 0; i < nsaved; ++i) {
write_int(child, task_thread_info(child)->bpt_addr[i],
task_thread_info(child)->bpt_insn[i]);
}
return (nsaved != 0);
}
void user_enable_single_step(struct task_struct *child)
{
/* Mark single stepping. */
task_thread_info(child)->bpt_nsaved = -1;
}
void user_disable_single_step(struct task_struct *child)
{
ptrace_cancel_bpt(child);
}
/*
* Called by kernel/ptrace.c when detaching..
*
* Make sure the single step bit is not set.
*/ */
void ptrace_disable(struct task_struct *child) void ptrace_disable(struct task_struct *child)
{ {
user_disable_single_step(child); /**/
} }
static int gpr_get(struct task_struct *target, static int gpr_get(struct task_struct *target,
...@@ -487,7 +380,7 @@ int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_r ...@@ -487,7 +380,7 @@ int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_r
case MMCSR__DA_MATCH: case MMCSR__DA_MATCH:
case MMCSR__DV_MATCH: case MMCSR__DV_MATCH:
case MMCSR__DAV_MATCH: case MMCSR__DAV_MATCH:
dik_show_regs(regs); show_regs(regs);
if (!(current->ptrace & PT_PTRACED)) { if (!(current->ptrace & PT_PTRACED)) {
printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm); printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm);
...@@ -611,10 +504,6 @@ static const struct pt_regs_offset regoffset_table[] = { ...@@ -611,10 +504,6 @@ static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_NAME(r26), REG_OFFSET_NAME(r26),
REG_OFFSET_NAME(r27), REG_OFFSET_NAME(r27),
REG_OFFSET_NAME(r28), REG_OFFSET_NAME(r28),
REG_OFFSET_NAME(hae),
REG_OFFSET_NAME(trap_a0),
REG_OFFSET_NAME(trap_a1),
REG_OFFSET_NAME(trap_a2),
REG_OFFSET_NAME(ps), REG_OFFSET_NAME(ps),
REG_OFFSET_NAME(pc), REG_OFFSET_NAME(pc),
REG_OFFSET_NAME(gp), REG_OFFSET_NAME(gp),
......
...@@ -28,9 +28,10 @@ ...@@ -28,9 +28,10 @@
#include <linux/genalloc.h> #include <linux/genalloc.h>
#include <linux/acpi.h> #include <linux/acpi.h>
#include <asm/sw64_init.h>
#include <asm/efi.h> #include <asm/efi.h>
#include <asm/kvm_cma.h> #include <asm/kvm_cma.h>
#include <asm/mmu_context.h>
#include <asm/sw64_init.h>
#include "proto.h" #include "proto.h"
#include "pci_impl.h" #include "pci_impl.h"
...@@ -137,6 +138,14 @@ struct screen_info screen_info = { ...@@ -137,6 +138,14 @@ struct screen_info screen_info = {
}; };
EXPORT_SYMBOL(screen_info); EXPORT_SYMBOL(screen_info);
/*
* Move global data into per-processor storage.
*/
void store_cpu_data(int cpu)
{
cpu_data[cpu].last_asn = ASN_FIRST_VERSION;
}
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
void *kexec_control_page; void *kexec_control_page;
...@@ -859,13 +868,12 @@ setup_arch(char **cmdline_p) ...@@ -859,13 +868,12 @@ setup_arch(char **cmdline_p)
/* Default root filesystem to sda2. */ /* Default root filesystem to sda2. */
ROOT_DEV = Root_SDA2; ROOT_DEV = Root_SDA2;
/*
* Identify the flock of penguins.
*/
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
setup_smp(); setup_smp();
#else
store_cpu_data(0);
#endif #endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
cpu_set_node(); cpu_set_node();
#endif #endif
......
...@@ -38,6 +38,36 @@ SYSCALL_DEFINE2(odd_sigprocmask, int, how, unsigned long, newmask) ...@@ -38,6 +38,36 @@ SYSCALL_DEFINE2(odd_sigprocmask, int, how, unsigned long, newmask)
return res; return res;
} }
SYSCALL_DEFINE3(odd_sigaction, int, sig,
const struct odd_sigaction __user *, act,
struct odd_sigaction __user *, oact)
{
struct k_sigaction new_ka, old_ka;
old_sigset_t mask;
int ret;
if (act) {
if (!access_ok(act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
__get_user(mask, &act->sa_mask))
return -EFAULT;
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
return -EFAULT;
}
return ret;
}
/* /*
* Do a signal return; undo the signal stack. * Do a signal return; undo the signal stack.
*/ */
...@@ -133,11 +163,6 @@ do_sigreturn(struct sigcontext __user *sc) ...@@ -133,11 +163,6 @@ do_sigreturn(struct sigcontext __user *sc)
if (restore_sigcontext(sc, regs)) if (restore_sigcontext(sc, regs))
goto give_sigsegv; goto give_sigsegv;
/* Send SIGTRAP if we're single-stepping: */
if (ptrace_cancel_bpt(current)) {
force_sig_fault(SIGTRAP, TRAP_BRKPT,
(void __user *)regs->pc, 0);
}
return; return;
give_sigsegv: give_sigsegv:
...@@ -164,11 +189,6 @@ do_rt_sigreturn(struct rt_sigframe __user *frame) ...@@ -164,11 +189,6 @@ do_rt_sigreturn(struct rt_sigframe __user *frame)
if (restore_altstack(&frame->uc.uc_stack)) if (restore_altstack(&frame->uc.uc_stack))
goto give_sigsegv; goto give_sigsegv;
/* Send SIGTRAP if we're single-stepping: */
if (ptrace_cancel_bpt(current)) {
force_sig_fault(SIGTRAP, TRAP_BRKPT,
(void __user *)regs->pc, 0);
}
return; return;
give_sigsegv: give_sigsegv:
...@@ -235,10 +255,6 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, ...@@ -235,10 +255,6 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
offsetof(struct user_fpsimd_state, fpcr)); offsetof(struct user_fpsimd_state, fpcr));
err |= __put_user(current->thread.fpstate.fpcr, &sc->sc_fpcr); err |= __put_user(current->thread.fpstate.fpcr, &sc->sc_fpcr);
err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0);
err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1);
err |= __put_user(regs->trap_a2, &sc->sc_traparg_a2);
return err; return err;
} }
...@@ -351,19 +367,15 @@ syscall_restart(unsigned long r0, unsigned long r19, ...@@ -351,19 +367,15 @@ syscall_restart(unsigned long r0, unsigned long r19,
static void static void
do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
{ {
unsigned long single_stepping = ptrace_cancel_bpt(current);
struct ksignal ksig; struct ksignal ksig;
/* This lets the debugger run, ... */ /* This lets the debugger run, ... */
if (get_signal(&ksig)) { if (get_signal(&ksig)) {
/* ... so re-check the single stepping. */
single_stepping |= ptrace_cancel_bpt(current);
/* Whee! Actually deliver the signal. */ /* Whee! Actually deliver the signal. */
if (r0) if (r0)
syscall_restart(r0, r19, regs, &ksig.ka); syscall_restart(r0, r19, regs, &ksig.ka);
handle_signal(&ksig, regs); handle_signal(&ksig, regs);
} else { } else {
single_stepping |= ptrace_cancel_bpt(current);
if (r0) { if (r0) {
switch (regs->r0) { switch (regs->r0) {
case ERESTARTNOHAND: case ERESTARTNOHAND:
...@@ -383,8 +395,6 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) ...@@ -383,8 +395,6 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
} }
restore_saved_sigmask(); restore_saved_sigmask();
} }
if (single_stepping)
ptrace_set_bpt(current); /* re-set breakpoint */
} }
void void
......
...@@ -34,7 +34,7 @@ EXPORT_SYMBOL(__cpu_to_rcid); ...@@ -34,7 +34,7 @@ EXPORT_SYMBOL(__cpu_to_rcid);
int __rcid_to_cpu[NR_CPUS]; /* Map physical to logical */ int __rcid_to_cpu[NR_CPUS]; /* Map physical to logical */
EXPORT_SYMBOL(__rcid_to_cpu); EXPORT_SYMBOL(__rcid_to_cpu);
unsigned long tidle_pcb[NR_CPUS]; void *tidle_ksp[NR_CPUS];
/* State of each CPU */ /* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 }; DEFINE_PER_CPU(int, cpu_state) = { 0 };
...@@ -59,29 +59,6 @@ EXPORT_SYMBOL(smp_num_cpus); ...@@ -59,29 +59,6 @@ EXPORT_SYMBOL(smp_num_cpus);
#define send_sleep_interrupt(cpu) send_ipi((cpu), II_SLEEP) #define send_sleep_interrupt(cpu) send_ipi((cpu), II_SLEEP)
#define send_wakeup_interrupt(cpu) send_ipi((cpu), II_WAKE) #define send_wakeup_interrupt(cpu) send_ipi((cpu), II_WAKE)
/*
* Called by both boot and secondaries to move global data into
* per-processor storage.
*/
static inline void __init
smp_store_cpu_info(int cpuid)
{
cpu_data[cpuid].loops_per_jiffy = loops_per_jiffy;
cpu_data[cpuid].last_asn = ASN_FIRST_VERSION;
cpu_data[cpuid].need_new_asn = 0;
cpu_data[cpuid].asn_lock = 0;
}
/*
* Ideally sets up per-cpu profiling hooks. Doesn't do much now...
*/
static inline void __init
smp_setup_percpu_timer(int cpuid)
{
setup_timer();
cpu_data[cpuid].prof_counter = 1;
cpu_data[cpuid].prof_multiplier = 1;
}
static void __init wait_boot_cpu_to_stop(int cpuid) static void __init wait_boot_cpu_to_stop(int cpuid)
{ {
...@@ -128,11 +105,13 @@ void smp_callin(void) ...@@ -128,11 +105,13 @@ void smp_callin(void)
wrent(entInt, 0); wrent(entInt, 0);
/* Get our local ticker going. */ /* Get our local ticker going. */
smp_setup_percpu_timer(cpuid); setup_timer();
/* All kernel threads share the same mm context. */ /* All kernel threads share the same mm context. */
mmgrab(&init_mm); mmgrab(&init_mm);
current->active_mm = &init_mm; current->active_mm = &init_mm;
/* update csr:ptbr */
wrptbr(virt_to_phys(init_mm.pgd));
/* inform the notifiers about the new cpu */ /* inform the notifiers about the new cpu */
notify_cpu_starting(cpuid); notify_cpu_starting(cpuid);
...@@ -176,23 +155,11 @@ static inline void set_secondary_ready(int cpuid) ...@@ -176,23 +155,11 @@ static inline void set_secondary_ready(int cpuid)
*/ */
static int secondary_cpu_start(int cpuid, struct task_struct *idle) static int secondary_cpu_start(int cpuid, struct task_struct *idle)
{ {
struct pcb_struct *ipcb;
unsigned long timeout; unsigned long timeout;
ipcb = &task_thread_info(idle)->pcb;
/* /*
* Initialize the idle's PCB to something just good enough for * Precalculate the target ksp.
* us to get started. Immediately after starting, we'll swpctx
* to the target idle task's pcb. Reuse the stack in the mean
* time. Precalculate the target PCBB.
*/ */
ipcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16; tidle_ksp[cpuid] = idle->stack + THREAD_SIZE;
ipcb->usp = 0;
ipcb->pcc = 0;
ipcb->asn = 0;
tidle_pcb[cpuid] = ipcb->unique = virt_to_phys(ipcb);
ipcb->dv_match = ipcb->dv_mask = 0;
DBGS("Starting secondary cpu %d: state 0x%lx\n", cpuid, idle->state); DBGS("Starting secondary cpu %d: state 0x%lx\n", cpuid, idle->state);
...@@ -298,7 +265,7 @@ void __init setup_smp(void) ...@@ -298,7 +265,7 @@ void __init setup_smp(void)
__cpu_to_rcid[num] = i; __cpu_to_rcid[num] = i;
__rcid_to_cpu[i] = num; __rcid_to_cpu[i] = num;
set_cpu_possible(num, true); set_cpu_possible(num, true);
smp_store_cpu_info(num); store_cpu_data(num);
if (!cpumask_test_cpu(i, &cpu_offline)) if (!cpumask_test_cpu(i, &cpu_offline))
set_cpu_present(num, true); set_cpu_present(num, true);
num++; num++;
...@@ -407,18 +374,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) ...@@ -407,18 +374,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
void __init native_smp_cpus_done(unsigned int max_cpus) void __init native_smp_cpus_done(unsigned int max_cpus)
{ {
int cpu;
unsigned long bogosum = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (cpu_online(cpu))
bogosum += cpu_data[cpu].loops_per_jiffy;
smp_booted = 1; smp_booted = 1;
pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
num_online_cpus(),
(bogosum + 2500) / (500000/HZ),
((bogosum + 2500) / (5000/HZ)) % 100);
} }
int setup_profiling_timer(unsigned int multiplier) int setup_profiling_timer(unsigned int multiplier)
...@@ -519,22 +476,9 @@ void native_send_call_func_single_ipi(int cpu) ...@@ -519,22 +476,9 @@ void native_send_call_func_single_ipi(int cpu)
send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
} }
static void
ipi_imb(void *ignored)
{
imb();
}
void smp_imb(void)
{
/* Must wait other processors to flush their icache before continue. */
on_each_cpu(ipi_imb, NULL, 1);
}
EXPORT_SYMBOL(smp_imb);
static void ipi_flush_tlb_all(void *ignored) static void ipi_flush_tlb_all(void *ignored)
{ {
tbia(); tbiv();
} }
void flush_tlb_all(void) void flush_tlb_all(void)
...@@ -545,8 +489,6 @@ void flush_tlb_all(void) ...@@ -545,8 +489,6 @@ void flush_tlb_all(void)
on_each_cpu(ipi_flush_tlb_all, NULL, 1); on_each_cpu(ipi_flush_tlb_all, NULL, 1);
} }
#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
static void ipi_flush_tlb_mm(void *x) static void ipi_flush_tlb_mm(void *x)
{ {
struct mm_struct *mm = (struct mm_struct *) x; struct mm_struct *mm = (struct mm_struct *) x;
...@@ -651,50 +593,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l ...@@ -651,50 +593,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l
} }
EXPORT_SYMBOL(flush_tlb_range); EXPORT_SYMBOL(flush_tlb_range);
static void ipi_flush_icache_page(void *x)
{
struct mm_struct *mm = (struct mm_struct *) x;
if (mm == current->mm)
__load_new_mm_context(mm);
else
flush_tlb_other(mm);
}
void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len)
{
struct mm_struct *mm = vma->vm_mm;
if ((vma->vm_flags & VM_EXEC) == 0)
return;
if (!icache_is_vivt_no_ictag())
return;
preempt_disable();
if (mm == current->mm) {
__load_new_mm_context(mm);
if (atomic_read(&mm->mm_users) == 1) {
int cpu, this_cpu = smp_processor_id();
for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (!cpu_online(cpu) || cpu == this_cpu)
continue;
if (mm->context.asid[cpu])
mm->context.asid[cpu] = 0;
}
preempt_enable();
return;
}
} else
flush_tlb_other(mm);
smp_call_function(ipi_flush_icache_page, mm, 1);
preempt_enable();
}
int native_cpu_disable(void) int native_cpu_disable(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
......
...@@ -33,6 +33,7 @@ void sw64_suspend_enter(void) ...@@ -33,6 +33,7 @@ void sw64_suspend_enter(void)
*/ */
disable_local_timer(); disable_local_timer();
current_thread_info()->pcb.tp = rtid();
#ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_BOOTCORE #ifdef CONFIG_SW64_SUSPEND_DEEPSLEEP_BOOTCORE
sw64_suspend_deep_sleep(&suspend_state); sw64_suspend_deep_sleep(&suspend_state);
...@@ -40,6 +41,7 @@ void sw64_suspend_enter(void) ...@@ -40,6 +41,7 @@ void sw64_suspend_enter(void)
mtinten(); mtinten();
asm("halt"); asm("halt");
#endif #endif
wrtp(current_thread_info()->pcb.tp);
disable_local_timer(); disable_local_timer();
} }
......
...@@ -163,7 +163,7 @@ ...@@ -163,7 +163,7 @@
#153 is unused #153 is unused
#154 is unused #154 is unused
#155 is unused #155 is unused
156 common sigaction sys_sigaction 156 common sigaction sys_odd_sigaction
#157 is unused #157 is unused
#158 is unused #158 is unused
#159 is unused #159 is unused
......
...@@ -4,6 +4,9 @@ ...@@ -4,6 +4,9 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/clk-provider.h> #include <linux/clk-provider.h>
#ifndef CONFIG_SMP
#include <linux/clocksource.h>
#endif
#include <asm/debug.h> #include <asm/debug.h>
...@@ -93,10 +96,6 @@ void setup_clocksource(void) ...@@ -93,10 +96,6 @@ void setup_clocksource(void)
} }
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
void __init common_init_rtc(void)
{
setup_timer();
}
void __init void __init
time_init(void) time_init(void)
...@@ -111,15 +110,9 @@ time_init(void) ...@@ -111,15 +110,9 @@ time_init(void)
setup_clocksource(); setup_clocksource();
of_clk_init(NULL); of_clk_init(NULL);
/* Startup the timer source. */ /* Startup the timer source. */
common_init_rtc(); setup_timer();
} /* Calibrate the delay loop directly */
lpj_fine = cycle_freq / HZ;
void calibrate_delay(void)
{
loops_per_jiffy = get_cpu_freq() / HZ;
pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n",
loops_per_jiffy / (500000 / HZ),
(loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy);
} }
static void __init calibrate_sched_clock(void) static void __init calibrate_sched_clock(void)
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <asm/gentrap.h> #include <asm/gentrap.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
...@@ -29,8 +31,18 @@ ...@@ -29,8 +31,18 @@
#include "proto.h" #include "proto.h"
void dik_show_regs(struct pt_regs *regs) enum SW64_IF_TYPES {
IF_BREAKPOINT = 0,
IF_RESERVED,
IF_GENTRAP,
IF_FEN,
IF_OPDEC,
};
void show_regs(struct pt_regs *regs)
{ {
show_regs_print_info(KERN_DEFAULT);
printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n",
regs->pc, regs->r26, regs->ps, print_tainted()); regs->pc, regs->r26, regs->ps, print_tainted());
printk("pc is at %pSR\n", (void *)regs->pc); printk("pc is at %pSR\n", (void *)regs->pc);
...@@ -60,8 +72,7 @@ void dik_show_regs(struct pt_regs *regs) ...@@ -60,8 +72,7 @@ void dik_show_regs(struct pt_regs *regs)
printk("gp = %016lx sp = %p\n", regs->gp, regs+1); printk("gp = %016lx sp = %p\n", regs->gp, regs+1);
} }
static void static void show_code(unsigned int *pc)
dik_show_code(unsigned int *pc)
{ {
long i; long i;
unsigned int insn; unsigned int insn;
...@@ -75,33 +86,43 @@ dik_show_code(unsigned int *pc) ...@@ -75,33 +86,43 @@ dik_show_code(unsigned int *pc)
printk("\n"); printk("\n");
} }
void die_if_kernel(char *str, struct pt_regs *regs, long err) static DEFINE_SPINLOCK(die_lock);
void die(char *str, struct pt_regs *regs, long err)
{ {
if (regs->ps & 8) static int die_counter;
return; unsigned long flags;
#ifdef CONFIG_SMP int ret;
printk("CPU %d ", hard_smp_processor_id());
#endif oops_enter();
printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
dik_show_regs(regs); spin_lock_irqsave(&die_lock, flags);
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); console_verbose();
bust_spinlocks(1);
pr_emerg("%s [#%d]\n", str, ++die_counter);
ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV);
print_modules();
show_regs(regs);
show_code((unsigned int *)regs->pc);
show_stack(current, NULL, KERN_EMERG); show_stack(current, NULL, KERN_EMERG);
dik_show_code((unsigned int *)regs->pc);
if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { bust_spinlocks(0);
printk("die_if_kernel recursion detected.\n"); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
local_irq_enable(); spin_unlock_irqrestore(&die_lock, flags);
while (1) oops_exit();
asm("nop");
}
if (kexec_should_crash(current)) if (kexec_should_crash(current))
crash_kexec(regs); crash_kexec(regs);
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops) if (panic_on_oops)
panic("Fatal exception"); panic("Fatal exception");
do_exit(SIGSEGV); if (ret != NOTIFY_STOP)
do_exit(SIGSEGV);
} }
#ifndef CONFIG_MATHEMU #ifndef CONFIG_MATHEMU
...@@ -135,11 +156,17 @@ do_entArith(unsigned long summary, unsigned long write_mask, ...@@ -135,11 +156,17 @@ do_entArith(unsigned long summary, unsigned long write_mask,
if (si_code == 0) if (si_code == 0)
return; return;
} }
die_if_kernel("Arithmetic fault", regs, 0);
if (!user_mode(regs))
die("Arithmetic fault", regs, 0);
force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0); force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0);
} }
/*
* BPT/GENTRAP/OPDEC make regs->pc = exc_pc + 4. debugger should
* do something necessary to handle it correctly.
*/
asmlinkage void asmlinkage void
do_entIF(unsigned long inst_type, struct pt_regs *regs) do_entIF(unsigned long inst_type, struct pt_regs *regs)
{ {
...@@ -149,35 +176,23 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) ...@@ -149,35 +176,23 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
type = inst_type & 0xffffffff; type = inst_type & 0xffffffff;
inst = inst_type >> 32; inst = inst_type >> 32;
if ((regs->ps & ~IPL_MAX) == 0 && type != 4) { if (!user_mode(regs) && type != IF_OPDEC) {
if (type == 1) { if (type == IF_BREAKPOINT) {
const unsigned int *data
= (const unsigned int *) regs->pc;
printk("Kernel bug at %s:%d\n",
(const char *)(data[1] | (long)data[2] << 32),
data[0]);
} else if (type == 0) {
/* support kgdb */ /* support kgdb */
notify_die(0, "kgdb trap", regs, 0, 0, SIGTRAP); notify_die(0, "kgdb trap", regs, 0, 0, SIGTRAP);
return; return;
} }
die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), die((type == IF_RESERVED ? "Kernel Bug" : "Instruction fault"),
regs, type); regs, type);
} }
switch (type) { switch (type) {
case 0: /* breakpoint */ case IF_BREAKPOINT: /* gdb do pc-4 for sigtrap */
if (ptrace_cancel_bpt(current))
regs->pc -= 4; /* make pc point to former bpt */
force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0); force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0);
return; return;
case 1: /* bugcheck */ case IF_GENTRAP:
force_sig_fault(SIGTRAP, TRAP_UNK, (void __user *)regs->pc, 0); regs->pc -= 4;
return;
case 2: /* gentrap */
switch ((long)regs->r16) { switch ((long)regs->r16) {
case GEN_INTOVF: case GEN_INTOVF:
signo = SIGFPE; signo = SIGFPE;
...@@ -230,6 +245,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) ...@@ -230,6 +245,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
case GEN_SUBRNG6: case GEN_SUBRNG6:
case GEN_SUBRNG7: case GEN_SUBRNG7:
default: default:
regs->pc += 4;
signo = SIGTRAP; signo = SIGTRAP;
code = TRAP_UNK; code = TRAP_UNK;
break; break;
...@@ -238,7 +254,11 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) ...@@ -238,7 +254,11 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
force_sig_fault(signo, code, (void __user *)regs->pc, regs->r16); force_sig_fault(signo, code, (void __user *)regs->pc, regs->r16);
return; return;
case 4: /* opDEC */ case IF_FEN:
fpu_enable();
return;
case IF_OPDEC:
switch (inst) { switch (inst) {
case BREAK_KPROBE: case BREAK_KPROBE:
if (notify_die(DIE_BREAK, "kprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) if (notify_die(DIE_BREAK, "kprobe", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
...@@ -253,27 +273,15 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) ...@@ -253,27 +273,15 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) if (notify_die(DIE_UPROBE_XOL, "uprobe_xol", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
return; return;
} }
if ((regs->ps & ~IPL_MAX) == 0)
die_if_kernel("Instruction fault", regs, type);
break;
case 3: /* FEN fault */
/*
* Irritating users can call HMC_clrfen to disable the
* FPU for the process. The kernel will then trap to
* save and restore the FP registers.
* Given that GCC by default generates code that uses the if (user_mode(regs))
* FP registers, HMC_clrfen is not useful except for DoS regs->pc -= 4;
* attacks. So turn the bleeding FPU back on and be done else
* with it. die("Instruction fault", regs, type);
*/ break;
current_thread_info()->pcb.flags |= 1;
__reload_thread(&current_thread_info()->pcb);
return;
case 5: /* illoc */
default: /* unexpected instruction-fault type */ default: /* unexpected instruction-fault type */
regs->pc -= 4;
break; break;
} }
...@@ -490,21 +498,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, ...@@ -490,21 +498,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
* Since the registers are in a weird format, dump them ourselves. * Since the registers are in a weird format, dump them ourselves.
*/ */
printk("%s(%d): unhandled unaligned exception\n", die("Unhandled unaligned exception", regs, error);
current->comm, task_pid_nr(current));
dik_show_regs(regs);
dik_show_code((unsigned int *)pc);
show_stack(current, NULL, KERN_EMERG);
if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) {
printk("die_if_kernel recursion detected.\n");
local_irq_enable();
while (1)
asm("nop");
}
do_exit(SIGSEGV);
} }
/* /*
......
...@@ -33,7 +33,7 @@ SECTIONS ...@@ -33,7 +33,7 @@ SECTIONS
} :text } :text
_etext = .; /* End of text section */ _etext = .; /* End of text section */
RO_DATA(4096) RO_DATA(PAGE_SIZE)
/* Will be freed after init */ /* Will be freed after init */
__init_begin = ALIGN(PAGE_SIZE); __init_begin = ALIGN(PAGE_SIZE);
......
...@@ -44,7 +44,7 @@ config KVM_SW64_HOST ...@@ -44,7 +44,7 @@ config KVM_SW64_HOST
config KVM_MEMHOTPLUG config KVM_MEMHOTPLUG
bool "Memory hotplug support for guest" bool "Memory hotplug support for guest"
depends on KVM depends on KVM && MEMORY_HOTPLUG
help help
Provides memory hotplug support for SW64 guest. Provides memory hotplug support for SW64 guest.
......
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
bool set_msi_flag; bool set_msi_flag;
unsigned long sw64_kvm_last_vpn[NR_CPUS]; unsigned long sw64_kvm_last_vpn[NR_CPUS];
__read_mostly bool bind_vcpu_enabled; #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA)
extern bool bind_vcpu_enabled;
#endif
#define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid] #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid]
#ifdef CONFIG_SUBARCH_C3B #ifdef CONFIG_SUBARCH_C3B
...@@ -306,6 +308,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -306,6 +308,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE) if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE)
return 0; return 0;
if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr)))
return 0;
if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr)))
return 0;
#ifndef CONFIG_KVM_MEMHOTPLUG #ifndef CONFIG_KVM_MEMHOTPLUG
if (mem->guest_phys_addr) { if (mem->guest_phys_addr) {
pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__); pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__);
...@@ -313,12 +321,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -313,12 +321,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
} }
#endif #endif
if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr)))
return 0;
if (test_bit(IO_MARK_BIT + 1, &(mem->guest_phys_addr)))
return 0;
if (!sw64_kvm_pool) if (!sw64_kvm_pool)
return -ENOMEM; return -ENOMEM;
...@@ -409,6 +411,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) ...@@ -409,6 +411,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{ {
unsigned long addr = vcpu->kvm->arch.host_phys_addr; unsigned long addr = vcpu->kvm->arch.host_phys_addr;
hrtimer_cancel(&vcpu->arch.hrt);
vcpu->arch.vcb.whami = vcpu->vcpu_id; vcpu->arch.vcb.whami = vcpu->vcpu_id;
vcpu->arch.vcb.vcpu_irq_disabled = 1; vcpu->arch.vcb.vcpu_irq_disabled = 1;
vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */ vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */
...@@ -539,6 +542,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -539,6 +542,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu->arch.vcb.vpcr vcpu->arch.vcb.vpcr
= get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NUMA)
if (unlikely(bind_vcpu_enabled)) { if (unlikely(bind_vcpu_enabled)) {
int nid; int nid;
unsigned long end; unsigned long end;
...@@ -548,11 +552,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -548,11 +552,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (pfn_to_nid(PHYS_PFN(end)) == nid) if (pfn_to_nid(PHYS_PFN(end)) == nid)
set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]); set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]);
} }
#else #endif
#else /* !CONFIG_KVM_MEMHOTPLUG */
unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd); unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd);
vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0); vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0);
#endif #endif /* CONFIG_KVM_MEMHOTPLUG */
vcpu->arch.vcb.upcr = 0x7; vcpu->arch.vcb.upcr = 0x7;
} }
......
...@@ -61,10 +61,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, ...@@ -61,10 +61,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src,
unsigned long checksum = ~0U; unsigned long checksum = ~0U;
int err = 0; int err = 0;
if (likely(!uaccess_kernel())) err = __copy_from_user(dst, src, len+8);
err = __copy_from_user(dst, src, len + 8);
else
memcpy(dst, src, len + 8);
while (len > 0) { while (len > 0) {
word = *dst; word = *dst;
...@@ -93,10 +90,7 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src, ...@@ -93,10 +90,7 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src,
unsigned long checksum = ~0U; unsigned long checksum = ~0U;
int err = 0; int err = 0;
if (likely(!uaccess_kernel())) err = __copy_from_user(dst, src, len+8);
err = __copy_from_user(dst, src, len + 8);
else
memcpy(dst, src, len + 8);
dst = (unsigned long *)((unsigned long)dst & (~7UL)); dst = (unsigned long *)((unsigned long)dst & (~7UL));
word = *dst; word = *dst;
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* template for memcpy and copy_user with SIMD
*
* $4: 8-byte misalignment of src when dest is 8-byte aligned
* $5: 32-byte misalignment of src when dest is 32-byte aligned
* $7: SIMD status
* 0: not in simd loop
* 1: in simd loop
* 2: in simd_u loop
* $16: latest dest, clobbered
* $17: latest src, clobbered
* $18: bytes left to copy
*
*/
#define NC_STORE_THRESHOLD 2048
#define SAVE_SIMD_REGS \
ldi $sp, -0x60($sp); \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vstd $f1, 0($23); \
vstd $f2, 0x20($23); \
ldi $7, 1
#define RESTORE_SIMD_REGS \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vldd $f1, 0($23); \
vldd $f2, 0x20($23); \
ldi $sp, 0x60($sp); \
bis $31, $31, $7
#define SAVE_SIMD_U_REGS \
ldi $sp, -0xc0($sp); \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vstd $f1, 0($23); \
vstd $f2, 0x20($23); \
vstd $f4, 0x40($23); \
vstd $f5, 0x60($23); \
vstd $f3, 0x80($23); \
ldi $7, 2
#define RESTORE_SIMD_U_REGS \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vldd $f1, 0($23); \
vldd $f2, 0x20($23); \
vldd $f4, 0x40($23); \
vldd $f5, 0x60($23); \
vldd $f3, 0x80($23); \
ldi $sp, 0xc0($sp); \
bis $31, $31, $7
ble $18, $out
and $16, 7, $1
beq $1, $dest_aligned_8
$byte_loop_head:
FIXUP_LDST( ldbu $2, 0($17) )
FIXUP_LDST( stb $2, 0($16) )
subl $18, 1, $18
addl $17, 1, $17
addl $16, 1, $16
ble $18, $out
and $16, 7, $1
bne $1, $byte_loop_head
$dest_aligned_8:
and $17, 7, $4
cmplt $18, 16, $1
bne $1, $quad_loop_end
and $16, 31, $1
beq $1, $dest_aligned_32
cmplt $18, 64, $1
bne $1, $simd_end
bne $4, $quad_u_loop_head
$quad_loop_head:
FIXUP_LDST( ldl $2, 0($17) )
FIXUP_LDST( stl $2, 0($16) )
addl $16, 8, $16
addl $17, 8, $17
subl $18, 8, $18
and $16, 31, $1
beq $1, $dest_aligned_32
br $31, $quad_loop_head
$dest_aligned_32:
cmplt $18, 64, $1
bne $1, $simd_end
and $17, 31, $5
bne $5, $prep_simd_u_loop
$prep_simd_loop:
SAVE_SIMD_REGS
ldi $1, NC_STORE_THRESHOLD($31)
cmple $18, $1, $1
bne $1, $simd_loop
.align 4
$simd_loop_nc:
FIXUP_LDST( vldd $f1, 0($17) )
FIXUP_LDST( vldd $f2, 32($17) )
FIXUP_LDST( vstd_nc $f1, 0($16) )
FIXUP_LDST( vstd_nc $f2, 32($16) )
subl $18, 64, $18
addl $17, 64, $17
addl $16, 64, $16
cmplt $18, 64, $1
beq $1, $simd_loop_nc
memb # required for _nc store instructions
br $31, $simd_loop_end
.align 4
$simd_loop:
FIXUP_LDST( vldd $f1, 0($17) )
FIXUP_LDST( vldd $f2, 32($17) )
FIXUP_LDST( vstd $f1, 0($16) )
FIXUP_LDST( vstd $f2, 32($16) )
subl $18, 64, $18
addl $17, 64, $17
addl $16, 64, $16
cmplt $18, 64, $1
beq $1, $simd_loop
$simd_loop_end:
cmplt $18, 32, $1
bne $1, $no_more_simd
FIXUP_LDST( vldd $f1, 0($17) )
FIXUP_LDST( vstd $f1, 0($16) )
subl $18, 32, $18
addl $17, 32, $17
addl $16, 32, $16
$no_more_simd:
RESTORE_SIMD_REGS
$simd_end:
ble $18, $out
cmplt $18, 16, $1
bne $1, $quad_loop_end
bne $4, $prep_quad_u_loop_tail
.align 4
$quad_loop_tail:
FIXUP_LDST( ldl $2, 0($17) )
FIXUP_LDST( ldl $3, 8($17) )
FIXUP_LDST( stl $2, 0($16) )
FIXUP_LDST( stl $3, 8($16) )
subl $18, 16, $18
addl $17, 16, $17
addl $16, 16, $16
cmplt $18, 16, $1
beq $1, $quad_loop_tail
$quad_loop_end:
ble $18, $out
cmplt $18, 8, $1
bne $1, $byte_loop_tail
bne $4, $move_one_quad_u
$move_one_quad:
FIXUP_LDST( ldl $2, 0($17) )
FIXUP_LDST( stl $2, 0($16) )
subl $18, 8, $18
addl $17, 8, $17
addl $16, 8, $16
ble $18, $out
.align 3
$byte_loop_tail:
FIXUP_LDST( ldbu $2, 0($17) )
FIXUP_LDST( stb $2, 0($16) )
subl $18, 1, $18
addl $17, 1, $17
addl $16, 1, $16
bgt $18, $byte_loop_tail
br $31, $out
/* misaligned src and dst */
$quad_u_loop_head:
FIXUP_LDST( ldl_u $2, 0($17) )
FIXUP_LDST( ldl_u $3, 7($17) )
extll $2, $4, $2
exthl $3, $4, $3
bis $2, $3, $2
FIXUP_LDST( stl $2, 0($16) )
addl $16, 8, $16
addl $17, 8, $17
subl $18, 8, $18
and $16, 31, $1
beq $1, $dest_aligned_32
br $31, $quad_u_loop_head
$prep_simd_u_loop:
SAVE_SIMD_U_REGS
andnot $17, 31, $3
ldi $2, 256($31)
sll $5, 3, $1
subl $2, $1, $2
sll $1, 29, $1
sll $2, 29, $2
ifmovd $1, $f1
ifmovd $2, $f2
FIXUP_LDST( vldd $f4, 0($3) )
ldi $1, NC_STORE_THRESHOLD($31)
cmple $18, $1, $1
bne $1, $simd_u_loop
.align 4
$simd_u_loop_nc:
FIXUP_LDST( vldd $f5, 32($3) )
srlow $f4, $f1, $f4
sllow $f5, $f2, $f3
vlogfc $f3, $f4, $f31, $f3
FIXUP_LDST( vstd_nc $f3, 0($16) )
FIXUP_LDST( vldd $f4, 64($3) )
srlow $f5, $f1, $f5
sllow $f4, $f2, $f3
vlogfc $f5, $f3, $f31, $f5
FIXUP_LDST( vstd_nc $f5, 32($16) )
subl $18, 64, $18
addl $3, 64, $3
addl $16, 64, $16
cmplt $18, 64, $1
beq $1, $simd_u_loop_nc
memb # required for _nc store instructions
br $31, $simd_u_loop_end
.align 4
$simd_u_loop:
FIXUP_LDST( vldd $f5, 32($3) )
srlow $f4, $f1, $f4
sllow $f5, $f2, $f3
vlogfc $f4, $f3, $f31, $f3
FIXUP_LDST( vstd $f3, 0($16) )
FIXUP_LDST( vldd $f4, 64($3) )
srlow $f5, $f1, $f5
sllow $f4, $f2, $f3
vlogfc $f5, $f3, $f31, $f3
FIXUP_LDST( vstd $f3, 32($16) )
subl $18, 64, $18
addl $3, 64, $3
addl $16, 64, $16
cmplt $18, 64, $1
beq $1, $simd_u_loop
$simd_u_loop_end:
cmplt $18, 32, $1
bne $1, $no_more_simd_u
FIXUP_LDST( vldd $f5, 32($3) )
srlow $f4, $f1, $f4
sllow $f5, $f2, $f3
vlogfc $f4, $f3, $f31, $f3
FIXUP_LDST( vstd $f3, 0($16) )
subl $18, 32, $18
addl $3, 32, $3
addl $16, 32, $16
$no_more_simd_u:
RESTORE_SIMD_U_REGS
bis $3, $5, $17
br $31, $simd_end
$prep_quad_u_loop_tail:
FIXUP_LDST( ldl_u $2, 0($17) )
.align 4
$quad_u_loop_tail:
FIXUP_LDST( ldl_u $3, 8($17) )
extll $2, $4, $22
exthl $3, $4, $23
bis $22, $23, $22
FIXUP_LDST( stl $22, 0($16) )
FIXUP_LDST( ldl_u $2, 16($17) )
extll $3, $4, $24
exthl $2, $4, $25
bis $24, $25, $24
FIXUP_LDST( stl $24, 8($16) )
subl $18, 16, $18
addl $17, 16, $17
addl $16, 16, $16
cmplt $18, 16, $1
beq $1, $quad_u_loop_tail
br $31, $quad_loop_end
$move_one_quad_u:
FIXUP_LDST( ldl_u $2, 0($17) )
FIXUP_LDST( ldl_u $3, 8($17) )
extll $2, $4, $22
exthl $3, $4, $23
bis $22, $23, $22
FIXUP_LDST( stl $22, 0($16) )
subl $18, 8, $18
addl $17, 8, $17
addl $16, 8, $16
ble $18, $out
br $31, $byte_loop_tail
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/*
* Copy to/from user space, handling exceptions as we go.. This
* isn't exactly pretty.
*
* This is essentially the same as "memcpy()", but with a few twists.
* Notably, we have to make sure that $18 is always up-to-date and
* contains the right "bytes left to copy" value (and that it is updated
* only _after_ a successful copy). There is also some rather minor
* exception setup stuff..
*
* Inputs:
* length in $18
* destination address in $16
* source address in $17
* return address in $26
*
* Outputs:
* bytes left to copy in $0
*
* Clobbers:
* $1,$2,$3,$4,$5,$16,$17
*
*/
/* Author: Copy_user simd version 1.1 (20190904) by Gao Xiuwu.
*/
#include <asm/export.h> #include <asm/export.h>
/* Allow an exception for an insn; exit if we get one. */ /* Allow an exception for an insn; exit if we get one. */
#define EXI(x, y...) \ #define FIXUP_LDST(x, y) \
99: x, ##y; \ 99: x, y; \
.section __ex_table, "a"; \
.long 99b - .; \
ldi $31, $exitin-99b($31); \
.previous
#define EXO(x,y...) \
99: x, ##y; \
.section __ex_table, "a"; \ .section __ex_table, "a"; \
.long 99b - .; \ .long 99b - .; \
ldi $31, $exitout-99b($31); \ ldi $31, $out-99b($31); \
.previous .previous
.set noat /*
.align 4 * $7: SIMD status
* 0: not in simd loop
* 1: in simd loop
* 2: in simd_u loop
* $18: bytes left to copy
*
*/
.globl __copy_user .globl __copy_user
.ent __copy_user .ent __copy_user
__copy_user: __copy_user:
.prologue 0 .prologue 0
subl $18, 32, $1 bis $31, $31, $7
beq $18, $zerolength #include "deep-copy_template.S"
$out:
and $16, 7, $3
ble $1, $onebyteloop
beq $3, $destaligned
subl $3, 8, $3
/*
* The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U)
* This loop aligns the destination a byte at a time
* We know we have at least one trip through this loop
*/
$aligndest:
EXI(ldbu $1, 0($17))
addl $16, 1, $16
addl $3, 1, $3
/*
* the -1 is to compensate for the inc($16) done in a previous quadpack
* which allows us zero dependencies within either quadpack in the loop
*/
EXO(stb $1, -1($16))
addl $17, 1, $17
subl $18, 1, $18
bne $3, $aligndest
/*
* If we fell through into here, we have a minimum of 33 - 7 bytes
* If we arrived via branch, we have a minimum of 32 bytes
*/
$destaligned:
and $17, 7, $1
bic $18, 7, $4
#EXI(ldl_u $3, 0($17))
beq $1, $quadaligned
#ifndef MISQUAD_SCALAR
$misquad:
and $16, 31, $1
beq $1, $dest32Baligned
$align_32B:
EXI(ldbu $1, 0($17))
addl $17, 1, $17
EXO(stb $1, 0($16))
subl $18, 1, $18
addl $16, 1, $16
and $16, 31, $1
beq $18, $exitout
bne $1, $align_32B
$dest32Baligned:
ldi $2, 256($31)
andnot $17, 31, $3
EXI(vldd $f10, 0($3))
and $17, 31, $5
sll $5, 3, $5
subw $2, $5, $4
ifmovs $5, $f15
ifmovs $4, $f14
cmple $18, 63, $1
bne $1, $misalign_tail_simd
$misalign_body_simd:
EXI(vldd $f11, 32($3))
fillcs 128*5($3)
srlow $f10, $f15, $f12
sllow $f11, $f14, $f13
#fillde 128*5($16)
vlogfc $f12, $f13, $f31, $f12
EXI(vldd $f10, 64($3))
srlow $f11, $f15, $f22
sllow $f10, $f14, $f23
vlogfc $f22, $f23, $f31, $f22
EXO(vstd $f12, 0($16))
EXO(vstd $f22, 32($16))
addl $16, 64, $16
addl $3, 64, $3
subl $18, 64, $18
cmple $18, 63, $1
beq $1, $misalign_body_simd
br $misalign_tail_simd
$misalign_tail_simd:
cmple $18, 31, $1
bne $1, $before_misalign_tail_quads
EXI(vldd $f11, 32($3))
srlow $f10, $f15, $f12
sllow $f11, $f14, $f13
vlogfc $f12, $f13, $f31, $f12
EXO(vstd $f12, 0($16))
subl $18, 32, $18
addl $16, 32, $16
addl $3, 32, $3
vfmov $f11, $f10
$before_misalign_tail_quads:
srlow $f10, $f15, $f12
s8subl $18, $4, $1
ble $1, $tail_quads
EXI(vldd $f11, 32($3))
sllow $f11, $f14, $f13
vlogfc $f12, $f13, $f31, $f12
$tail_quads:
subl $18, 8, $1
blt $1, $less_than_8
$move_a_quad:
fimovd $f12, $1
srlow $f12, 64, $f12
EXO(stl $1, 0($16))
subl $18, 8, $18
addl $16, 8, $16
subl $18, 8, $1
bge $1, $move_a_quad
$less_than_8:
.align 4
beq $18, $exitout
fimovd $f12, $1
$tail_bytes:
EXO(stb $1, 0($16))
subl $18, 1, $18
srl $1, 8, $1
addl $16, 1, $16
bgt $18, $tail_bytes
br $exitout
#else
/*
* In the worst case, we've just executed an ldl_u here from 0($17)
* and we'll repeat it once if we take the branch
*/
/* Misaligned quadword loop - not unrolled. Leave it that way. */
$misquad:
EXI(ldl_u $2, 8($17))
subl $4, 8, $4
extll $3, $17, $3
exthl $2, $17, $1
bis $3, $1, $1
EXO(stl $1, 0($16))
addl $17, 8, $17
subl $18, 8, $18
addl $16, 8, $16
bis $2, $2, $3
bne $4, $misquad
beq $18, $zerolength
/* We know we have at least one trip through the byte loop */
EXI(ldbu $2, 0($17))
addl $16, 1, $16
br $31, $dirtyentry
#endif
/* Do the trailing byte loop load, then hop into the store part of the loop */
/*
* A minimum of (33 - 7) bytes to do a quad at a time.
* Based upon the usage context, it's worth the effort to unroll this loop
* $18 - number of bytes to be moved
* $4 - number of bytes to move as quadwords
* $16 is current destination address
* $17 is current source address
*/
$quadaligned:
and $16, 31, $1
beq $1, $quadaligned_dest32Baligned
$quadaligned_align_32B:
EXI(ldl $1, 0($17))
addl $17, 8, $17
EXO(stl $1, 0($16))
subl $18, 8, $18
subl $4, 8, $4
addl $16, 8, $16
and $16, 31, $1
beq $4, $onebyteloop
bne $1, $quadaligned_align_32B
$quadaligned_dest32Baligned:
and $17, 31, $2
bne $2, $dest32Baligned
$quad32Bailgned:
subl $4, 64, $2
blt $2, $onequad
/*
* There is a significant assumption here that the source and destination
* addresses differ by more than 32 bytes. In this particular case, a
* sparsity of registers further bounds this to be a minimum of 8 bytes.
* But if this isn't met, then the output result will be incorrect.
* Furthermore, due to a lack of available registers, we really can't
* unroll this to be an 8x loop (which would enable us to use the wh64
* instruction memory hint instruction).
*/
$simd_quadalign_unroll2:
fillcs 128 * 5($17)
EXI(vldd $f22, 0($17))
EXI(vldd $f23, 32($17))
EXO(vstd $f22, 0($16))
EXO(vstd $f23, 32($16))
#fillde 128 * 5($16)
subl $4, 64, $4
subl $18, 64, $18
addl $17, 64, $17
addl $16, 64, $16
subl $4, 64, $3
bge $3, $simd_quadalign_unroll2
bne $4, $onequad
br $31, $noquads
$onequad:
EXI(ldl $1, 0($17))
subl $4, 8, $4
addl $17, 8, $17
EXO(stl $1, 0($16))
subl $18, 8, $18
addl $16, 8, $16
bne $4, $onequad
$noquads:
beq $18, $zerolength
/*
* For small copies (or the tail of a larger copy), do a very simple byte loop.
* There's no point in doing a lot of complex alignment calculations to try to
* to quadword stuff for a small amount of data.
* $18 - remaining number of bytes left to copy
* $16 - current dest addr
* $17 - current source addr
*/
$onebyteloop:
EXI(ldbu $2, 0($17))
addl $16, 1, $16
$dirtyentry:
/*
* the -1 is to compensate for the inc($16) done in a previous quadpack
* which allows us zero dependencies within either quadpack in the loop
*/
EXO(stb $2, -1($16))
addl $17, 1, $17
subl $18, 1, $18
bgt $18, $onebyteloop
$zerolength:
$exitout:
bis $31, $18, $0 bis $31, $18, $0
ret $31, ($26), 1 beq $7, $return
subl $7, 1, $7
beq $7, $restore_simd
$exitin: $restore_simd_u:
RESTORE_SIMD_U_REGS
br $31, $return
/* A stupid byte-by-byte zeroing of the rest of the output $restore_simd:
* buffer. This cures security holes by never leaving RESTORE_SIMD_REGS
* random kernel data around to be copied elsewhere.
*/
mov $18, $1
$101:
EXO(stb $31, 0($16))
subl $1, 1, $1
addl $16, 1, $16
bgt $1, $101
bis $31, $18, $0
ret $31, ($26), 1
$return:
ret
.end __copy_user .end __copy_user
EXPORT_SYMBOL(__copy_user) EXPORT_SYMBOL(__copy_user)
...@@ -2,307 +2,18 @@ ...@@ -2,307 +2,18 @@
#include <asm/export.h> #include <asm/export.h>
#define NC_STORE_THRESHOLD 2048 #define FIXUP_LDST(x, y) \
x, y
#define SAVE_SIMD_REGS \
ldi $sp, -0x60($sp); \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vstd $f1, 0($23); \
vstd $f2, 0x20($23)
#define RESTORE_SIMD_REGS \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vldd $f1, 0($23); \
vldd $f2, 0x20($23); \
ldi $sp, 0x60($sp)
#define SAVE_SIMD_U_REGS \
ldi $sp, -0x120($sp); \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vstd $f1, 0($23); \
vstd $f2, 0x20($23); \
vstd $f4, 0x40($23); \
vstd $f5, 0x60($23); \
vstd $f10, 0x80($23); \
vstd $f11, 0xa0($23); \
vstd $f20, 0xc0($23); \
vstd $f21, 0xe0($23)
#define RESTORE_SIMD_U_REGS \
addl $sp, 0x1f, $23; \
bic $23, 0x1f, $23; \
vldd $f1, 0($23); \
vldd $f2, 0x20($23); \
vldd $f4, 0x40($23); \
vldd $f5, 0x60($23); \
vldd $f10, 0x80($23); \
vldd $f11, 0xa0($23); \
vldd $f20, 0xc0($23); \
vldd $f21, 0xe0($23); \
ldi $sp, 0x120($sp)
.set noat
.align 4
.globl memcpy .globl memcpy
.ent memcpy .ent memcpy
memcpy: memcpy:
.frame $30, 0, $26, 0 .frame $30, 0, $26, 0
.prologue 0 .prologue 0
mov $16, $0 mov $16, $0
ble $18, $out #include "deep-copy_template.S"
and $16, 7, $1
beq $1, $dest_aligned_8
.align 4
$byte_loop_head:
ldbu $2, 0($17)
subl $18, 1, $18
addl $17, 1, $17
stb $2, 0($16)
addl $16, 1, $16
ble $18, $out
and $16, 7, $1
bne $1, $byte_loop_head
$dest_aligned_8:
and $17, 7, $4
subl $18, 16, $18
blt $18, $quad_end
subl $18, 64, $18
blt $18, $simd_end
and $16, 31, $1
beq $1, $dest_aligned_32
bne $4, $quad_u_loop_head
.align 5
$quad_loop_head:
ldl $2, 0($17)
subl $18, 8, $18
addl $17, 8, $17
stl $2, 0($16)
addl $16, 8, $16
and $16, 31, $1
blt $18, $simd_end
beq $16, $dest_aligned_32
br $31, $quad_loop_head
$dest_aligned_32:
and $17, 31, $5
bne $5, $prep_simd_u_loop
$prep_simd_loop:
SAVE_SIMD_REGS
ldi $1, NC_STORE_THRESHOLD($31)
cmple $18, $1, $1
bne $1, $simd_loop
.align 5
$simd_loop_nc:
fillcs 128 * 5($17)
vldd $f1, 0($17)
vldd $f2, 32($17)
subl $18, 64, $18
addl $17, 64, $17
vstd_nc $f1, 0($16)
vstd_nc $f2, 32($16)
addl $16, 64, $16
bge $18, $simd_loop_nc
memb # required for _nc store instructions
br $31, $simd_loop_end
.align 5
$simd_loop:
fillcs 128 * 5($17)
vldd $f1, 0($17)
vldd $f2, 32($17)
subl $18, 64, $18
addl $17, 64, $17
vstd $f1, 0($16)
vstd $f2, 32($16)
addl $16, 64, $16
bge $18, $simd_loop
$simd_loop_end:
addl $18, 64, $1
cmplt $1, 32, $1
bne $1, $no_more_simd
vldd $f1, 0($17)
subl $18, 32, $18
addl $17, 32, $17
vstd $f1, 0($16)
addl $16, 32, $16
$no_more_simd:
RESTORE_SIMD_REGS
$simd_end:
addl $18, 64, $18
blt $18, $quad_end
bne $4, $prep_quad_u_loop_tail
.align 4
$quad_loop_tail:
ldl $2, 0($17)
ldl $3, 8($17)
subl $18, 16, $18
addl $17, 16, $17
stl $2, 0($16)
stl $3, 8($16)
addl $16, 16, $16
bge $18, $quad_loop_tail
$quad_end:
addl $18, 16, $18
ble $18, $out
cmplt $18, 8, $1
bne $1, $byte_loop_tail
bne $4, $move_one_quad_u
$move_one_quad:
ldl $2, 0($17)
subl $18, 8, $18
addl $17, 8, $17
stl $2, 0($16)
addl $16, 8, $16
ble $18, $out
.align 4
$byte_loop_tail:
ldbu $2, 0($17)
subl $18, 1, $18
addl $17, 1, $17
stb $2, 0($16)
addl $16, 1, $16
bgt $18, $byte_loop_tail
$out: $out:
ret $31, ($26), 1 ret
.align 5
$quad_u_loop_head:
ldl_u $2, 0($17)
ldl_u $3, 7($17)
subl $18, 8, $18
addl $17, 8, $17
extll $2, $4, $2
exthl $3, $4, $3
bis $2, $3, $2
stl $2, 0($16)
addl $16, 8, $16
blt $18, $simd_end
beq $16, $dest_aligned_32
br $31, $quad_u_loop_head
$prep_simd_u_loop:
SAVE_SIMD_U_REGS
andnot $17, 31, $3
ldi $2, 256($31)
sll $5, 3, $1
subl $2, $1, $2
sll $1, 29, $1
sll $2, 29, $2
ifmovd $1, $f1
ifmovd $2, $f2
vldd $f4, 0($3)
ldi $1, NC_STORE_THRESHOLD($31)
cmple $18, $1, $1
bne $1, $simd_u_loop
.align 5
$simd_u_loop_nc:
vldd $f5, 32($3)
fillcs 128 * 5($3)
srlow $f4, $f1, $f10
sllow $f5, $f2, $f11
vlogfc $f10, $f11, $f31, $f10
vldd $f4, 64($3)
srlow $f5, $f1, $f20
sllow $f4, $f2, $f21
vlogfc $f20, $f21, $f31, $f20
vstd_nc $f10, 0($16)
vstd_nc $f20, 32($16)
subl $18, 64, $18
addl $3, 64, $3
addl $16, 64, $16
bge $18, $simd_u_loop_nc
memb # required for _nc store instructions
br $31, $simd_u_loop_end
.align 5
$simd_u_loop:
vldd $f5, 32($3)
fillcs 128 * 5($3)
srlow $f4, $f1, $f10
sllow $f5, $f2, $f11
vlogfc $f10, $f11, $f31, $f10
vldd $f4, 64($3)
srlow $f5, $f1, $f20
sllow $f4, $f2, $f21
vlogfc $f20, $f21, $f31, $f20
vstd $f10, 0($16)
vstd $f20, 32($16)
subl $18, 64, $18
addl $3, 64, $3
addl $16, 64, $16
bge $18, $simd_u_loop
$simd_u_loop_end:
addl $18, 64, $1
cmplt $1, 32, $1
bne $1, $no_more_simd_u
vldd $f5, 32($3)
srlow $f4, $f1, $f10
sllow $f5, $f2, $f11
vlogfc $f10, $f11, $f31, $f10
vstd $f10, 0($16)
subl $18, 32, $18
addl $3, 32, $3
addl $16, 32, $16
$no_more_simd_u:
RESTORE_SIMD_U_REGS
bis $3, $5, $17
br $31, $simd_end
$prep_quad_u_loop_tail:
ldl_u $2, 0($17)
.align 5
$quad_u_loop_tail:
ldl_u $3, 8($17)
extll $2, $4, $22
exthl $3, $4, $23
bis $22, $23, $22
stl $22, 0($16)
ldl_u $2, 16($17)
extll $3, $4, $24
exthl $2, $4, $25
bis $24, $25, $24
stl $24, 8($16)
subl $18, 16, $18
addl $17, 16, $17
addl $16, 16, $16
bge $18, $quad_u_loop_tail
br $31, $quad_end
$move_one_quad_u:
ldl_u $2, 0($17)
ldl_u $3, 8($17)
subl $18, 8, $18
addl $17, 8, $17
extll $2, $4, $22
exthl $3, $4, $23
bis $22, $23, $22
stl $22, 0($16)
addl $16, 8, $16
ble $18, $out
br $31, $byte_loop_tail
.end memcpy .end memcpy
EXPORT_SYMBOL(memcpy) EXPORT_SYMBOL(memcpy)
__memcpy = memcpy __memcpy = memcpy
......
...@@ -41,15 +41,12 @@ void __iowrite64_copy(void __iomem *to, ...@@ -41,15 +41,12 @@ void __iowrite64_copy(void __iomem *to,
const void *from, const void *from,
size_t count) size_t count)
{ {
#ifdef CONFIG_64BIT
u64 __iomem *dst = to; u64 __iomem *dst = to;
const u64 *src = from; const u64 *src = from;
const u64 *end = src + count; const u64 *end = src + count;
while (src < end) while (src < end) {
__raw_writeq(*src++, dst++); __raw_writeq(*src++, dst++);
mb(); mb();
#else }
__iowrite32_copy(to, from, count * 2);
#endif
} }
...@@ -28,12 +28,6 @@ void __delay(unsigned long loops) ...@@ -28,12 +28,6 @@ void __delay(unsigned long loops)
} }
EXPORT_SYMBOL(__delay); EXPORT_SYMBOL(__delay);
#ifdef CONFIG_SMP
#define LPJ cpu_data[smp_processor_id()].loops_per_jiffy
#else
#define LPJ loops_per_jiffy
#endif
void udelay(unsigned long usecs) void udelay(unsigned long usecs)
{ {
unsigned long loops = usecs * get_cpu_freq() / 1000000; unsigned long loops = usecs * get_cpu_freq() / 1000000;
......
...@@ -31,8 +31,8 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr) ...@@ -31,8 +31,8 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr)
} }
#endif #endif
extern void die_if_kernel(char *, struct pt_regs *, long); extern void die(char *, struct pt_regs *, long);
extern void dik_show_regs(struct pt_regs *regs); extern void show_regs(struct pt_regs *regs);
void show_all_vma(void) void show_all_vma(void)
{ {
...@@ -61,31 +61,6 @@ void show_all_vma(void) ...@@ -61,31 +61,6 @@ void show_all_vma(void)
} }
} }
/*
* Force a new ASN for a task.
*/
#ifndef CONFIG_SMP
unsigned long last_asn = ASN_FIRST_VERSION;
#endif
void
__load_new_mm_context(struct mm_struct *next_mm)
{
unsigned long mmc;
struct pcb_struct *pcb;
mmc = __get_new_mm_context(next_mm, smp_processor_id());
next_mm->context.asid[smp_processor_id()] = mmc;
pcb = &current_thread_info()->pcb;
pcb->asn = mmc & HARDWARE_ASN_MASK;
pcb->ptbr = virt_to_pfn(next_mm->pgd);
__reload_thread(pcb);
}
/* /*
* This routine handles page faults. It determines the address, * This routine handles page faults. It determines the address,
* and the problem, and then passes it off to handle_mm_fault(). * and the problem, and then passes it off to handle_mm_fault().
...@@ -301,7 +276,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, ...@@ -301,7 +276,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
*/ */
pr_alert("Unable to handle kernel paging request at virtual address %016lx\n", pr_alert("Unable to handle kernel paging request at virtual address %016lx\n",
address); address);
die_if_kernel("Oops", regs, cause); die("Oops", regs, cause);
do_exit(SIGKILL); do_exit(SIGKILL);
/* /*
...@@ -332,7 +307,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, ...@@ -332,7 +307,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
if (unlikely(segv_debug_enabled)) { if (unlikely(segv_debug_enabled)) {
pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n", pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n",
current->pid, cause, mmcsr, address, regs->pc); current->pid, cause, mmcsr, address, regs->pc);
dik_show_regs(regs); show_regs(regs);
show_all_vma(); show_all_vma();
} }
......
...@@ -34,6 +34,7 @@ static pud_t vmalloc_pud[1024] __attribute__((__aligned__(PAGE_SIZE))); ...@@ -34,6 +34,7 @@ static pud_t vmalloc_pud[1024] __attribute__((__aligned__(PAGE_SIZE)));
static phys_addr_t mem_start; static phys_addr_t mem_start;
static phys_addr_t mem_size_limit; static phys_addr_t mem_size_limit;
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
unsigned long memory_block_size_bytes(void) unsigned long memory_block_size_bytes(void)
{ {
if (is_in_guest()) if (is_in_guest())
...@@ -41,6 +42,7 @@ unsigned long memory_block_size_bytes(void) ...@@ -41,6 +42,7 @@ unsigned long memory_block_size_bytes(void)
else else
return MIN_MEMORY_BLOCK_SIZE; return MIN_MEMORY_BLOCK_SIZE;
} }
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
static int __init setup_mem_size(char *p) static int __init setup_mem_size(char *p)
{ {
...@@ -75,34 +77,14 @@ pgd_alloc(struct mm_struct *mm) ...@@ -75,34 +77,14 @@ pgd_alloc(struct mm_struct *mm)
return ret; return ret;
} }
static inline unsigned long
load_PCB(struct pcb_struct *pcb)
{
register unsigned long sp __asm__("$30");
pcb->ksp = sp;
return __reload_thread(pcb);
}
/* Set up initial PCB, VPTB, and other such nicities. */ /* Set up initial PCB, VPTB, and other such nicities. */
static inline void static inline void
switch_to_system_map(void) switch_to_system_map(void)
{ {
unsigned long newptbr;
unsigned long original_pcb_ptr;
/*
* Initialize the kernel's page tables. Linux puts the vptb in
* the last slot of the L1 page table.
*/
memset(swapper_pg_dir, 0, PAGE_SIZE); memset(swapper_pg_dir, 0, PAGE_SIZE);
newptbr = virt_to_pfn(swapper_pg_dir); wrptbr(virt_to_phys(swapper_pg_dir));
tbiv();
/* Also set up the real kernel PCB while we're at it. */
init_thread_info.pcb.ptbr = newptbr;
init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */
original_pcb_ptr = load_PCB(&init_thread_info.pcb);
tbia();
} }
void __init callback_init(void) void __init callback_init(void)
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/mmdebug.h> #include <linux/mmdebug.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mm.h>
#include <asm/page.h> #include <asm/page.h>
unsigned long __phys_addr(unsigned long x) unsigned long __phys_addr(unsigned long x)
......
...@@ -21,80 +21,82 @@ ...@@ -21,80 +21,82 @@
#ifndef _SW64_BPF_JIT_H #ifndef _SW64_BPF_JIT_H
#define _SW64_BPF_JIT_H #define _SW64_BPF_JIT_H
/* SW64 instruction field shift */
#define SW64_BPF_OPCODE_OFFSET 26 #define SW64_BPF_OPCODE_OFFSET 26
#define SW64_BPF_RA_OFFSET 21 #define SW64_BPF_RA_OFFSET 21
#define SW64_BPF_RB_OFFSET 16 #define SW64_BPF_RB_OFFSET 16
#define SW64_BPF_SIMPLE_ALU_IMM_OFFSET 13 #define SW64_BPF_SIMPLE_ALU_IMM_OFFSET 13
#define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET 5 #define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET 5
#define SW64_BPF_SIMPLE_ALU_RC_OFFSET 0 #define SW64_BPF_SIMPLE_ALU_RC_OFFSET 0
#define SW64_BPF_LS_FUNC_OFFSET 12
#define SW64_BPF_OPCODE_BR_CALL 0x01 /* SW64 instruction opcodes */
#define SW64_BPF_OPCODE_BR_RET 0x02 #define SW64_BPF_OPCODE_CALL 0x01
#define SW64_BPF_OPCODE_BR_JMP 0x03 #define SW64_BPF_OPCODE_RET 0x02
#define SW64_BPF_OPCODE_BR_BR 0x04 #define SW64_BPF_OPCODE_JMP 0x03
#define SW64_BPF_OPCODE_BR_BSR 0x05 #define SW64_BPF_OPCODE_BR 0x04
#define SW64_BPF_OPCODE_BR_BEQ 0x30 #define SW64_BPF_OPCODE_BSR 0x05
#define SW64_BPF_OPCODE_BR_BNE 0x31 #define SW64_BPF_OPCODE_MISC 0x06
#define SW64_BPF_OPCODE_BR_BLT 0x32 #define SW64_BPF_OPCODE_LOCK 0x08
#define SW64_BPF_OPCODE_BR_BLE 0x33
#define SW64_BPF_OPCODE_BR_BGT 0x34
#define SW64_BPF_OPCODE_BR_BGE 0x35
#define SW64_BPF_OPCODE_BR_BLBC 0x36
#define SW64_BPF_OPCODE_BR_BLBS 0x37
#define SW64_BPF_OPCODE_LS_LDBU 0x20
#define SW64_BPF_OPCODE_LS_LDHU 0x21
#define SW64_BPF_OPCODE_LS_LDW 0x22
#define SW64_BPF_OPCODE_LS_LDL 0x23
#define SW64_BPF_OPCODE_LS_STB 0x28
#define SW64_BPF_OPCODE_LS_STH 0x29
#define SW64_BPF_OPCODE_LS_STW 0x2A
#define SW64_BPF_OPCODE_LS_STL 0x2B
#define SW64_BPF_OPCODE_LS_LDI 0x3E
#define SW64_BPF_OPCODE_LS_LDIH 0x3F
#define SW64_BPF_OPCODE_ALU_REG 0x10 #define SW64_BPF_OPCODE_ALU_REG 0x10
#define SW64_BPF_OPCODE_ALU_IMM 0x12 #define SW64_BPF_OPCODE_ALU_IMM 0x12
#define SW64_BPF_OPCODE_LDBU 0x20
#define SW64_BPF_OPCODE_LDHU 0x21
#define SW64_BPF_OPCODE_LDW 0x22
#define SW64_BPF_OPCODE_LDL 0x23
#define SW64_BPF_OPCODE_STB 0x28
#define SW64_BPF_OPCODE_STH 0x29
#define SW64_BPF_OPCODE_STW 0x2A
#define SW64_BPF_OPCODE_STL 0x2B
#define SW64_BPF_OPCODE_BEQ 0x30
#define SW64_BPF_OPCODE_BNE 0x31
#define SW64_BPF_OPCODE_BLT 0x32
#define SW64_BPF_OPCODE_BLE 0x33
#define SW64_BPF_OPCODE_BGT 0x34
#define SW64_BPF_OPCODE_BGE 0x35
#define SW64_BPF_OPCODE_BLBC 0x36
#define SW64_BPF_OPCODE_BLBS 0x37
#define SW64_BPF_OPCODE_LDI 0x3E
#define SW64_BPF_OPCODE_LDIH 0x3F
/* SW64 MISC instructions function codes */
#define SW64_BPF_FUNC_MISC_RD_F 0x1000
#define SW64_BPF_FUNC_MISC_WR_F 0x1020
/* SW64 LOCK instructions function codes */
#define SW64_BPF_FUNC_LOCK_LLDW 0x0
#define SW64_BPF_FUNC_LOCK_LLDL 0x1
#define SW64_BPF_FUNC_LOCK_LSTW 0x8
#define SW64_BPF_FUNC_LOCK_LSTL 0x9
/* SW64 ALU instructions function codes */
#define SW64_BPF_FUNC_ALU_ADDW 0x00 #define SW64_BPF_FUNC_ALU_ADDW 0x00
#define SW64_BPF_FUNC_ALU_SUBW 0x01 #define SW64_BPF_FUNC_ALU_SUBW 0x01
#define SW64_BPF_FUNC_ALU_ADDL 0x08 #define SW64_BPF_FUNC_ALU_ADDL 0x08
#define SW64_BPF_FUNC_ALU_SUBL 0x09 #define SW64_BPF_FUNC_ALU_SUBL 0x09
#define SW64_BPF_FUNC_ALU_MULW 0x10 #define SW64_BPF_FUNC_ALU_MULW 0x10
#define SW64_BPF_FUNC_ALU_MULL 0x18 #define SW64_BPF_FUNC_ALU_MULL 0x18
#define SW64_BPF_FUNC_ALU_CMPEQ 0x28
#define SW64_BPF_FUNC_ALU_CMPLT 0x29
#define SW64_BPF_FUNC_ALU_CMPLE 0x2A
#define SW64_BPF_FUNC_ALU_CMPULT 0x2B
#define SW64_BPF_FUNC_ALU_CMPULE 0x2C
#define SW64_BPF_FUNC_ALU_AND 0x38
#define SW64_BPF_FUNC_ALU_BIC 0x39
#define SW64_BPF_FUNC_ALU_BIS 0x3A
#define SW64_BPF_FUNC_ALU_ORNOT 0x3B
#define SW64_BPF_FUNC_ALU_XOR 0x3C
#define SW64_BPF_FUNC_ALU_EQV 0x3D
#define SW64_BPF_FUNC_ALU_SLL 0x48
#define SW64_BPF_FUNC_ALU_SRL 0x49
#define SW64_BPF_FUNC_ALU_SRA 0x4A
#define SW64_BPF_FUNC_ALU_ZAP 0x68 #define SW64_BPF_FUNC_ALU_ZAP 0x68
#define SW64_BPF_FUNC_ALU_ZAPNOT 0x69 #define SW64_BPF_FUNC_ALU_ZAPNOT 0x69
#define SW64_BPF_FUNC_ALU_SEXTB 0x6A #define SW64_BPF_FUNC_ALU_SEXTB 0x6A
#define SW64_BPF_FUNC_ALU_SEXTH 0x6B #define SW64_BPF_FUNC_ALU_SEXTH 0x6B
#define SW64_BPF_OPCODE_BS_REG 0x10
#define SW64_BPF_OPCODE_BS_IMM 0x12
#define SW64_BPF_FUNC_BS_SLL 0x48
#define SW64_BPF_FUNC_BS_SRL 0x49
#define SW64_BPF_FUNC_BS_SRA 0x4A
#define SW64_BPF_OPCODE_LOGIC_REG 0x10
#define SW64_BPF_OPCODE_LOGIC_IMM 0x12
#define SW64_BPF_FUNC_LOGIC_AND 0x38
#define SW64_BPF_FUNC_LOGIC_BIC 0x39
#define SW64_BPF_FUNC_LOGIC_BIS 0x3A
#define SW64_BPF_FUNC_LOGIC_ORNOT 0x3B
#define SW64_BPF_FUNC_LOGIC_XOR 0x3C
#define SW64_BPF_FUNC_LOGIC_EQV 0x3D
#define SW64_BPF_OPCODE_CMP_REG 0x10
#define SW64_BPF_OPCODE_CMP_IMM 0x12
#define SW64_BPF_FUNC_CMP_EQ 0x28
#define SW64_BPF_FUNC_CMP_LT 0x29
#define SW64_BPF_FUNC_CMP_LE 0x2A
#define SW64_BPF_FUNC_CMP_ULT 0x2B
#define SW64_BPF_FUNC_CMP_ULE 0x2C
/* special instuction used in jit_fill_hole() */ /* special instuction used in jit_fill_hole() */
#define SW64_BPF_ILLEGAL_INSN ((1 << 25) | 0x80) #define SW64_BPF_ILLEGAL_INSN (0x1ff00000) /* pri_ret/b $31 */
enum sw64_bpf_registers { enum sw64_bpf_registers {
SW64_BPF_REG_V0 = 0, /* keep return value */ SW64_BPF_REG_V0 = 0, /* keep return value */
...@@ -135,25 +137,45 @@ enum sw64_bpf_registers { ...@@ -135,25 +137,45 @@ enum sw64_bpf_registers {
/* SW64 load and store instructions */ /* SW64 load and store instructions */
#define SW64_BPF_LDBU(dst, rb, offset16) \ #define SW64_BPF_LDBU(dst, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDBU, dst, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDBU, dst, rb, offset16)
#define SW64_BPF_LDHU(dst, rb, offset16) \ #define SW64_BPF_LDHU(dst, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDHU, dst, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDHU, dst, rb, offset16)
#define SW64_BPF_LDW(dst, rb, offset16) \ #define SW64_BPF_LDW(dst, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDW, dst, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDW, dst, rb, offset16)
#define SW64_BPF_LDL(dst, rb, offset16) \ #define SW64_BPF_LDL(dst, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDL, dst, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDL, dst, rb, offset16)
#define SW64_BPF_STB(src, rb, offset16) \ #define SW64_BPF_STB(src, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STB, src, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STB, src, rb, offset16)
#define SW64_BPF_STH(src, rb, offset16) \ #define SW64_BPF_STH(src, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STH, src, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STH, src, rb, offset16)
#define SW64_BPF_STW(src, rb, offset16) \ #define SW64_BPF_STW(src, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STW, src, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STW, src, rb, offset16)
#define SW64_BPF_STL(src, rb, offset16) \ #define SW64_BPF_STL(src, rb, offset16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_STL, src, rb, offset16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STL, src, rb, offset16)
#define SW64_BPF_LDI(dst, rb, imm16) \ #define SW64_BPF_LDI(dst, rb, imm16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDI, dst, rb, imm16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDI, dst, rb, imm16)
#define SW64_BPF_LDIH(dst, rb, imm16) \ #define SW64_BPF_LDIH(dst, rb, imm16) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LS_LDIH, dst, rb, imm16) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDIH, dst, rb, imm16)
/* SW64 lock instructions */
#define SW64_BPF_LLDW(ra, rb, offset16) \
sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDW)
#define SW64_BPF_LLDL(ra, rb, offset16) \
sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDL)
#define SW64_BPF_LSTW(ra, rb, offset16) \
sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTW)
#define SW64_BPF_LSTL(ra, rb, offset16) \
sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTL)
#define SW64_BPF_RD_F(ra) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \
ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_RD_F)
#define SW64_BPF_WR_F(ra) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \
ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_WR_F)
/* SW64 ALU instructions REG format */ /* SW64 ALU instructions REG format */
#define SW64_BPF_ADDW_REG(ra, rb, dst) \ #define SW64_BPF_ADDW_REG(ra, rb, dst) \
...@@ -182,10 +204,10 @@ enum sw64_bpf_registers { ...@@ -182,10 +204,10 @@ enum sw64_bpf_registers {
ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT) ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT)
#define SW64_BPF_SEXTB_REG(rb, dst) \ #define SW64_BPF_SEXTB_REG(rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
0, rb, dst, SW64_BPF_FUNC_ALU_SEXTB) SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTB)
#define SW64_BPF_SEXTH_REG(rb, dst) \ #define SW64_BPF_SEXTH_REG(rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
0, rb, dst, SW64_BPF_FUNC_ALU_SEXTH) SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTH)
/* SW64 ALU instructions IMM format */ /* SW64 ALU instructions IMM format */
#define SW64_BPF_ADDW_IMM(ra, imm8, dst) \ #define SW64_BPF_ADDW_IMM(ra, imm8, dst) \
...@@ -214,130 +236,133 @@ enum sw64_bpf_registers { ...@@ -214,130 +236,133 @@ enum sw64_bpf_registers {
ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT) ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT)
#define SW64_BPF_SEXTB_IMM(imm8, dst) \ #define SW64_BPF_SEXTB_IMM(imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
0, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB) SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB)
#define SW64_BPF_SEXTH_IMM(imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTH)
/* SW64 bit shift instructions REG format */ /* SW64 bit shift instructions REG format */
#define SW64_BPF_SLL_REG(src, rb, dst) \ #define SW64_BPF_SLL_REG(src, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
src, rb, dst, SW64_BPF_FUNC_BS_SLL) src, rb, dst, SW64_BPF_FUNC_ALU_SLL)
#define SW64_BPF_SRL_REG(src, rb, dst) \ #define SW64_BPF_SRL_REG(src, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
src, rb, dst, SW64_BPF_FUNC_BS_SRL) src, rb, dst, SW64_BPF_FUNC_ALU_SRL)
#define SW64_BPF_SRA_REG(src, rb, dst) \ #define SW64_BPF_SRA_REG(src, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_BS_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
src, rb, dst, SW64_BPF_FUNC_BS_SRA) src, rb, dst, SW64_BPF_FUNC_ALU_SRA)
/* SW64 bit shift instructions IMM format */ /* SW64 bit shift instructions IMM format */
#define SW64_BPF_SLL_IMM(src, imm8, dst) \ #define SW64_BPF_SLL_IMM(src, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
src, imm8, dst, SW64_BPF_FUNC_BS_SLL) src, imm8, dst, SW64_BPF_FUNC_ALU_SLL)
#define SW64_BPF_SRL_IMM(src, imm8, dst) \ #define SW64_BPF_SRL_IMM(src, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
src, imm8, dst, SW64_BPF_FUNC_BS_SRL) src, imm8, dst, SW64_BPF_FUNC_ALU_SRL)
#define SW64_BPF_SRA_IMM(src, imm8, dst) \ #define SW64_BPF_SRA_IMM(src, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_BS_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
src, imm8, dst, SW64_BPF_FUNC_BS_SRA) src, imm8, dst, SW64_BPF_FUNC_ALU_SRA)
/* SW64 control instructions */ /* SW64 control instructions */
#define SW64_BPF_CALL(ra, rb) \ #define SW64_BPF_CALL(ra, rb) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_CALL, ra, rb, 0) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_CALL, ra, rb, 0)
#define SW64_BPF_RET(rb) \ #define SW64_BPF_RET(rb) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_RET, SW64_BPF_REG_ZR, rb, 0) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_RET, SW64_BPF_REG_ZR, rb, 0)
#define SW64_BPF_JMP(ra, rb) \ #define SW64_BPF_JMP(ra, rb) \
sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_BR_JMP, ra, rb, 0) sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_JMP, ra, rb, 0)
#define SW64_BPF_BR(ra, offset) \ #define SW64_BPF_BR(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BR, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR, ra, offset)
#define SW64_BPF_BSR(ra, offset) \ #define SW64_BPF_BSR(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BSR, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BSR, ra, offset)
#define SW64_BPF_BEQ(ra, offset) \ #define SW64_BPF_BEQ(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BEQ, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BEQ, ra, offset)
#define SW64_BPF_BNE(ra, offset) \ #define SW64_BPF_BNE(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BNE, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BNE, ra, offset)
#define SW64_BPF_BLT(ra, offset) \ #define SW64_BPF_BLT(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLT, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLT, ra, offset)
#define SW64_BPF_BLE(ra, offset) \ #define SW64_BPF_BLE(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLE, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLE, ra, offset)
#define SW64_BPF_BGT(ra, offset) \ #define SW64_BPF_BGT(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGT, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGT, ra, offset)
#define SW64_BPF_BGE(ra, offset) \ #define SW64_BPF_BGE(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BGE, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGE, ra, offset)
#define SW64_BPF_BLBC(ra, offset) \ #define SW64_BPF_BLBC(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBC, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBC, ra, offset)
#define SW64_BPF_BLBS(ra, offset) \ #define SW64_BPF_BLBS(ra, offset) \
sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR_BLBS, ra, offset) sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBS, ra, offset)
/* SW64 bit logic instructions REG format */ /* SW64 bit logic instructions REG format */
#define SW64_BPF_AND_REG(ra, rb, dst) \ #define SW64_BPF_AND_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_LOGIC_AND) ra, rb, dst, SW64_BPF_FUNC_ALU_AND)
#define SW64_BPF_ANDNOT_REG(ra, rb, dst) \ #define SW64_BPF_ANDNOT_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIC) ra, rb, dst, SW64_BPF_FUNC_ALU_BIC)
#define SW64_BPF_OR_REG(ra, rb, dst) \ #define SW64_BPF_BIS_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_LOGIC_BIS) ra, rb, dst, SW64_BPF_FUNC_ALU_BIS)
#define SW64_BPF_ORNOT_REG(ra, rb, dst) \ #define SW64_BPF_ORNOT_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_LOGIC_ORNOT) ra, rb, dst, SW64_BPF_FUNC_ALU_ORNOT)
#define SW64_BPF_XOR_REG(ra, rb, dst) \ #define SW64_BPF_XOR_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_LOGIC_XOR) ra, rb, dst, SW64_BPF_FUNC_ALU_XOR)
#define SW64_BPF_EQV_REG(ra, rb, dst) \ #define SW64_BPF_EQV_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_LOGIC_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_LOGIC_EQV) ra, rb, dst, SW64_BPF_FUNC_ALU_EQV)
/* SW64 bit logic instructions IMM format */ /* SW64 bit logic instructions IMM format */
#define SW64_BPF_AND_IMM(ra, imm8, dst) \ #define SW64_BPF_AND_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_LOGIC_AND) ra, imm8, dst, SW64_BPF_FUNC_ALU_AND)
#define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \ #define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIC) ra, imm8, dst, SW64_BPF_FUNC_ALU_BIC)
#define SW64_BPF_OR_IMM(ra, imm8, dst) \ #define SW64_BPF_BIS_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_LOGIC_BIS) ra, imm8, dst, SW64_BPF_FUNC_ALU_BIS)
#define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \ #define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_LOGIC_ORNOT) ra, imm8, dst, SW64_BPF_FUNC_ALU_ORNOT)
#define SW64_BPF_XOR_IMM(ra, imm8, dst) \ #define SW64_BPF_XOR_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_LOGIC_XOR) ra, imm8, dst, SW64_BPF_FUNC_ALU_XOR)
#define SW64_BPF_EQV_IMM(ra, imm8, dst) \ #define SW64_BPF_EQV_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_LOGIC_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_LOGIC_EQV) ra, imm8, dst, SW64_BPF_FUNC_ALU_EQV)
/* SW64 compare instructions REG format */ /* SW64 compare instructions REG format */
#define SW64_BPF_CMPEQ_REG(ra, rb, dst) \ #define SW64_BPF_CMPEQ_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_CMP_EQ) ra, rb, dst, SW64_BPF_FUNC_ALU_CMPEQ)
#define SW64_BPF_CMPLT_REG(ra, rb, dst) \ #define SW64_BPF_CMPLT_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_CMP_LT) ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLT)
#define SW64_BPF_CMPLE_REG(ra, rb, dst) \ #define SW64_BPF_CMPLE_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_CMP_LE) ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLE)
#define SW64_BPF_CMPULT_REG(ra, rb, dst) \ #define SW64_BPF_CMPULT_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_CMP_ULT) ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULT)
#define SW64_BPF_CMPULE_REG(ra, rb, dst) \ #define SW64_BPF_CMPULE_REG(ra, rb, dst) \
sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_CMP_REG, \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
ra, rb, dst, SW64_BPF_FUNC_CMP_ULE) ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULE)
/* SW64 compare instructions imm format */ /* SW64 compare instructions imm format */
#define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \ #define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_CMP_EQ) ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPEQ)
#define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \ #define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_CMP_LT) ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLT)
#define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \ #define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_CMP_LE) ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLE)
#define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \ #define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_CMP_ULT) ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULT)
#define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \ #define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \
sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_CMP_IMM, \ sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
ra, imm8, dst, SW64_BPF_FUNC_CMP_ULE) ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULE)
#endif /* _SW64_BPF_JIT_H */ #endif /* _SW64_BPF_JIT_H */
...@@ -29,46 +29,37 @@ ...@@ -29,46 +29,37 @@
#include "bpf_jit.h" #include "bpf_jit.h"
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TCALL_CNT (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
/*
* TO-DO List:
* DIV
* MOD
*/
static const int bpf2sw64[] = { static const int bpf2sw64[] = {
/* return value from in-kernel function, and exit value from eBPF */ /* return value from in-kernel function, and exit value from eBPF */
[BPF_REG_0] = SW64_BPF_REG_V0, [BPF_REG_0] = SW64_BPF_REG_V0,
/* arguments from eBPF program to in-kernel function */ /* arguments from eBPF program to in-kernel function */
[BPF_REG_1] = SW64_BPF_REG_A1, [BPF_REG_1] = SW64_BPF_REG_A0,
[BPF_REG_2] = SW64_BPF_REG_A2, [BPF_REG_2] = SW64_BPF_REG_A1,
[BPF_REG_3] = SW64_BPF_REG_A3, [BPF_REG_3] = SW64_BPF_REG_A2,
[BPF_REG_4] = SW64_BPF_REG_A4, [BPF_REG_4] = SW64_BPF_REG_A3,
[BPF_REG_5] = SW64_BPF_REG_A5, [BPF_REG_5] = SW64_BPF_REG_A4,
/* callee saved registers that in-kernel function will preserve */ /* callee saved registers that in-kernel function will preserve */
[BPF_REG_6] = SW64_BPF_REG_S1, [BPF_REG_6] = SW64_BPF_REG_S0,
[BPF_REG_7] = SW64_BPF_REG_S2, [BPF_REG_7] = SW64_BPF_REG_S1,
[BPF_REG_8] = SW64_BPF_REG_S3, [BPF_REG_8] = SW64_BPF_REG_S2,
[BPF_REG_9] = SW64_BPF_REG_S4, [BPF_REG_9] = SW64_BPF_REG_S3,
/* read-only frame pointer to access stack */ /* read-only frame pointer to access stack */
[BPF_REG_FP] = SW64_BPF_REG_S0, [BPF_REG_FP] = SW64_BPF_REG_FP,
/* temporary registers for internal BPF JIT */
[TMP_REG_1] = SW64_BPF_REG_T1,
[TMP_REG_2] = SW64_BPF_REG_T2,
/* tail_call_cnt */ /* tail_call_cnt */
[TCALL_CNT] = SW64_BPF_REG_S5, [TCALL_CNT] = SW64_BPF_REG_S4,
/* temporary register for blinding constants */ /* temporary register for blinding constants */
[BPF_REG_AX] = SW64_BPF_REG_T12, [BPF_REG_AX] = SW64_BPF_REG_T11,
}; };
struct jit_ctx { struct jit_ctx {
const struct bpf_prog *prog; const struct bpf_prog *prog;
int idx; // JITed instruction index int idx; // JITed instruction index
int current_tmp_reg;
int epilogue_offset; int epilogue_offset;
int *insn_offset; // [bpf_insn_idx] = jited_insn_idx int *insn_offset; // [bpf_insn_idx] = jited_insn_idx
int exentry_idx;
u32 *image; // JITed instruction u32 *image; // JITed instruction
u32 stack_size; u32 stack_size;
}; };
...@@ -83,7 +74,7 @@ static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra, ...@@ -83,7 +74,7 @@ static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra,
{ {
opcode = opcode << SW64_BPF_OPCODE_OFFSET; opcode = opcode << SW64_BPF_OPCODE_OFFSET;
ra = ra << SW64_BPF_RA_OFFSET; ra = ra << SW64_BPF_RA_OFFSET;
return opcode | ra | disp; return opcode | ra | (disp & 0x1fffff);
} }
static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra,
...@@ -92,7 +83,17 @@ static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, ...@@ -92,7 +83,17 @@ static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra,
opcode = opcode << SW64_BPF_OPCODE_OFFSET; opcode = opcode << SW64_BPF_OPCODE_OFFSET;
ra = ra << SW64_BPF_RA_OFFSET; ra = ra << SW64_BPF_RA_OFFSET;
rb = rb << SW64_BPF_RB_OFFSET; rb = rb << SW64_BPF_RB_OFFSET;
return opcode | ra | rb | disp; return opcode | ra | rb | (disp & 0xffff);
}
static inline u32 sw64_bpf_gen_format_ls_func(int opcode, enum sw64_bpf_registers ra,
enum sw64_bpf_registers rb, u16 disp, int function)
{
opcode = opcode << SW64_BPF_OPCODE_OFFSET;
ra = ra << SW64_BPF_RA_OFFSET;
rb = rb << SW64_BPF_RB_OFFSET;
function = function << SW64_BPF_LS_FUNC_OFFSET;
return opcode | ra | rb | function | (disp & 0xfff);
} }
static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra, static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra,
...@@ -107,12 +108,12 @@ static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_r ...@@ -107,12 +108,12 @@ static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_r
} }
static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra, static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra,
enum sw64_bpf_registers rc, u8 imm, int function) u32 imm, enum sw64_bpf_registers rc, int function)
{ {
opcode = opcode << SW64_BPF_OPCODE_OFFSET; opcode = opcode << SW64_BPF_OPCODE_OFFSET;
ra = ra << SW64_BPF_RA_OFFSET; ra = ra << SW64_BPF_RA_OFFSET;
imm = (imm & 0xff) << SW64_BPF_SIMPLE_ALU_IMM_OFFSET;
rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET; rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET;
imm = imm << SW64_BPF_SIMPLE_ALU_IMM_OFFSET;
function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET; function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET;
return opcode | ra | imm | function | rc; return opcode | ra | imm | function | rc;
} }
...@@ -125,57 +126,85 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx) ...@@ -125,57 +126,85 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
ctx->idx++; ctx->idx++;
} }
static inline void emit_sw64_ldu64(const int dst, const u64 imm64, struct jit_ctx *ctx) static inline int get_tmp_reg(struct jit_ctx *ctx)
{ {
u16 imm_tmp; ctx->current_tmp_reg++;
int reg_tmp = SW64_BPF_REG_T8; /* Do not use 22-25. Should be more than enough. */
if (unlikely(ctx->current_tmp_reg == 8)) {
imm_tmp = (imm64 >> 60) & 0xf; pr_err("eBPF JIT %s[%d]: not enough temporary registers!\n",
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); current->comm, current->pid);
emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx); return -1;
}
imm_tmp = (imm64 >> 45) & 0x7fff; return ctx->current_tmp_reg;
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); }
emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
imm_tmp = (imm64 >> 30) & 0x7fff;
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
imm_tmp = (imm64 >> 15) & 0x7fff;
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
imm_tmp = imm64 & 0x7fff; static inline void put_tmp_reg(struct jit_ctx *ctx)
emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); {
ctx->current_tmp_reg--;
if (ctx->current_tmp_reg == 21)
ctx->current_tmp_reg = 7;
} }
static inline void emit_sw64_ldu32(const int dst, const u32 imm32, struct jit_ctx *ctx) static void emit_sw64_ldu32(const int dst, const u32 imm, struct jit_ctx *ctx)
{ {
u16 imm_tmp; u16 imm_tmp;
int reg_tmp = SW64_BPF_REG_T8; u8 reg_tmp = get_tmp_reg(ctx);
if (!imm) {
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx);
put_tmp_reg(ctx);
return;
}
if (imm <= S16_MAX) {
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
put_tmp_reg(ctx);
return;
}
imm_tmp = (imm32 >> 30) & 3; if (imm >= U32_MAX - S16_MAX) {
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
put_tmp_reg(ctx);
return;
}
imm_tmp = (imm >> 30) & 3;
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx); if (imm_tmp)
emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx);
imm_tmp = (imm32 >> 15) & 0x7fff; imm_tmp = (imm >> 15) & 0x7fff;
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); if (imm_tmp) {
emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
}
imm_tmp = imm & 0x7fff;
if (imm_tmp)
emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
imm_tmp = imm32 & 0x7fff; put_tmp_reg(ctx);
emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
} }
static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ctx *ctx) static void emit_sw64_lds32(const int dst, const s32 imm, struct jit_ctx *ctx)
{ {
s16 hi = imm32 >> 16; s16 hi = imm >> 16;
s16 lo = imm32 & 0xffff; s16 lo = imm & 0xffff;
int reg_tmp = SW64_BPF_REG_T8; u8 reg_tmp = get_tmp_reg(ctx);
if (!imm) {
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx);
put_tmp_reg(ctx);
return;
}
if (imm >= S16_MIN && imm <= S16_MAX) {
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
put_tmp_reg(ctx);
return;
}
emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx); emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx);
if (lo & 0x8000) { // sign bit is 1 if (lo & 0x8000) { // sign bit is 1
...@@ -183,214 +212,422 @@ static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ct ...@@ -183,214 +212,422 @@ static inline void emit_sw64_lds32(const int dst, const s32 imm32, struct jit_ct
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx); emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx);
emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
emit(SW64_BPF_LDI(dst, dst, lo), ctx); if (lo)
emit(SW64_BPF_LDI(dst, dst, lo), ctx);
} else { // sign bit is 0 } else { // sign bit is 0
emit(SW64_BPF_LDI(dst, dst, lo), ctx); if (lo)
emit(SW64_BPF_LDI(dst, dst, lo), ctx);
} }
put_tmp_reg(ctx);
} }
/* dst = ra / rb */ static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx)
static void emit_sw64_div(const int ra, const int rb, const int dst, struct jit_ctx *ctx)
{ {
pr_err("DIV is not supported for now.\n"); u16 imm_tmp;
u8 reg_tmp = get_tmp_reg(ctx);
if (!imm) {
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx);
put_tmp_reg(ctx);
return;
}
if (imm <= U32_MAX) {
put_tmp_reg(ctx);
return emit_sw64_ldu32(dst, (u32)imm, ctx);
}
if (imm >= (U64_MAX - S16_MAX) || imm <= S16_MAX) {
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
put_tmp_reg(ctx);
return;
}
imm_tmp = (imm >> 60) & 0xf;
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx);
if (imm_tmp)
emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx);
imm_tmp = (imm >> 45) & 0x7fff;
if (imm_tmp) {
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
}
imm_tmp = (imm >> 30) & 0x7fff;
if (imm_tmp) {
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
}
imm_tmp = (imm >> 15) & 0x7fff;
if (imm_tmp) {
emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
}
imm_tmp = imm & 0x7fff;
if (imm_tmp)
emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
put_tmp_reg(ctx);
} }
/* dst = ra % rb */ /* Do not change!!! See arch/sw_64/lib/divide.S for more detail */
static void emit_sw64_mod(const int ra, const int rb, const int dst, struct jit_ctx *ctx) #define REG(x) "$"str(x)
#define str(x) #x
#define DIVIDEND 24
#define DIVISOR 25
#define RESULT 27
/* Make these functions noinline because we need their address at runtime */
noinline void sw64_bpf_jit_helper_div32(void)
{ {
pr_err("MOD is not supported for now.\n"); register u32 __dividend asm(REG(DIVIDEND));
register u32 __divisor asm(REG(DIVISOR));
u32 res = __dividend / __divisor;
asm volatile(
""
:: "r"(res));
}
noinline void sw64_bpf_jit_helper_mod32(void)
{
register u32 __dividend asm(REG(DIVIDEND));
register u32 __divisor asm(REG(DIVISOR));
u32 res = __dividend % __divisor;
asm volatile(
""
:: "r"(res));
}
noinline void sw64_bpf_jit_helper_div64(void)
{
register u64 __dividend asm(REG(DIVIDEND));
register u64 __divisor asm(REG(DIVISOR));
u64 res = __dividend / __divisor;
asm volatile(
""
:: "r"(res));
}
noinline void sw64_bpf_jit_helper_mod64(void)
{
register u64 __dividend asm(REG(DIVIDEND));
register u64 __divisor asm(REG(DIVISOR));
u64 res = __dividend % __divisor;
asm volatile(
""
:: "r"(res));
}
static void emit_sw64_divmod(const int dst, const int src, struct jit_ctx *ctx, u8 code)
{
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, dst, DIVIDEND), ctx);
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, DIVISOR), ctx);
switch (BPF_CLASS(code)) {
case BPF_ALU:
switch (BPF_OP(code)) {
case BPF_DIV:
emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_div32, ctx);
break;
case BPF_MOD:
emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_mod32, ctx);
break;
}
emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx);
emit(SW64_BPF_ZAP_IMM(RESULT, 0xf0, dst), ctx);
break;
case BPF_ALU64:
switch (BPF_OP(code)) {
case BPF_DIV:
emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_div64, ctx);
break;
case BPF_MOD:
emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)sw64_bpf_jit_helper_mod64, ctx);
break;
}
emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx);
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, RESULT, dst), ctx);
break;
}
}
#undef REG
#undef str
#undef DIVIDEND
#undef DIVISOR
#undef RESULT
/* STX XADD: lock *(u32 *)(dst + off) += src */
static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ctx)
{
int atomic_start;
int atomic_end;
u8 tmp1 = get_tmp_reg(ctx);
u8 tmp2 = get_tmp_reg(ctx);
u8 tmp3 = get_tmp_reg(ctx);
if (off < -0x800 || off > 0x7ff) {
emit(SW64_BPF_LDI(tmp1, dst, off), ctx);
dst = tmp1;
off = 0;
}
atomic_start = ctx->idx;
emit(SW64_BPF_LLDW(tmp2, dst, off), ctx);
emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx);
emit(SW64_BPF_WR_F(tmp3), ctx);
emit(SW64_BPF_ADDW_REG(tmp2, src, tmp2), ctx);
if (ctx->idx & 1)
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
emit(SW64_BPF_LSTW(tmp2, dst, off), ctx);
emit(SW64_BPF_RD_F(tmp3), ctx);
atomic_end = ctx->idx;
emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
}
/* STX XADD: lock *(u64 *)(dst + off) += src */
static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ctx)
{
int atomic_start;
int atomic_end;
u8 tmp1 = get_tmp_reg(ctx);
u8 tmp2 = get_tmp_reg(ctx);
u8 tmp3 = get_tmp_reg(ctx);
if (off < -0x800 || off > 0x7ff) {
emit(SW64_BPF_LDI(tmp1, dst, off), ctx);
dst = tmp1;
off = 0;
}
atomic_start = ctx->idx;
emit(SW64_BPF_LLDL(tmp2, dst, off), ctx);
emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx);
emit(SW64_BPF_WR_F(tmp3), ctx);
emit(SW64_BPF_ADDL_REG(tmp2, src, tmp2), ctx);
if (ctx->idx & 1)
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
emit(SW64_BPF_LSTL(tmp2, dst, off), ctx);
emit(SW64_BPF_RD_F(tmp3), ctx);
atomic_end = ctx->idx;
emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
} }
static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx) static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx)
{ {
int tmp = SW64_BPF_REG_T8; u8 tmp = get_tmp_reg(ctx);
emit(SW64_BPF_LDI(tmp, dst, 0), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp, 0x2, tmp), ctx);
emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx);
emit(SW64_BPF_SRL_REG(tmp, 8, tmp), ctx); emit(SW64_BPF_SRL_IMM(tmp, 8, tmp), ctx);
emit(SW64_BPF_SLL_REG(dst, 8, dst), ctx); emit(SW64_BPF_SLL_IMM(dst, 8, dst), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp, dst), ctx); emit(SW64_BPF_BIS_REG(dst, tmp, dst), ctx);
put_tmp_reg(ctx);
} }
static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx) static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx)
{ {
int tmp1 = SW64_BPF_REG_T8; u8 tmp1 = get_tmp_reg(ctx);
int tmp2 = SW64_BPF_REG_T9; u8 tmp2 = get_tmp_reg(ctx);
emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x8, tmp1), ctx);
emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp2), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x8, tmp1), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x4, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(dst, 0x6, dst), ctx); emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp1), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x4, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(dst, 0x9, dst), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx);
emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx); emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx);
emit(SW64_BPF_SLL_IMM(dst, 24, dst), ctx);
emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
} }
static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx) static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx)
{ {
int tmp1 = SW64_BPF_REG_T8; u8 tmp1 = get_tmp_reg(ctx);
int tmp2 = SW64_BPF_REG_T9; u8 tmp2 = get_tmp_reg(ctx);
emit(SW64_BPF_LDI(tmp1, dst, 0), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x80, tmp1), ctx);
emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); emit(SW64_BPF_SRL_IMM(tmp1, 56, tmp2), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x1, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x80, tmp1), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x40, tmp1), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0x81, dst), ctx); emit(SW64_BPF_SRL_IMM(tmp1, 40, tmp1), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 56, tmp1), ctx); emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_SRL_IMM(tmp2, 56, tmp2), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x20, tmp1), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp1), ctx);
emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
emit(SW64_BPF_LDI(tmp2, dst, 0), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x10, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x2, tmp1), ctx); emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x40, tmp1), ctx); emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0x42, dst), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x08, tmp1), ctx);
emit(SW64_BPF_SRL_IMM(tmp2, 40, tmp2), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x4, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x20, tmp1), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0x24, dst), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx);
emit(SW64_BPF_SRL_IMM(tmp2, 24, tmp2), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx);
emit(SW64_BPF_LDI(tmp1, dst, 0), ctx);
emit(SW64_BPF_LDI(tmp2, dst, 0), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp1, 0x8, tmp1), ctx);
emit(SW64_BPF_ZAPNOT_IMM(tmp2, 0x10, tmp1), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0x18, dst), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx);
emit(SW64_BPF_SRL_IMM(tmp2, 8, tmp2), ctx); emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp2, dst), ctx); emit(SW64_BPF_ZAPNOT_IMM(dst, 0x04, tmp1), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx);
emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_ZAPNOT_IMM(dst, 0x02, tmp1), ctx);
emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx);
emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
emit(SW64_BPF_ZAPNOT_IMM(dst, 0x01, dst), ctx);
emit(SW64_BPF_SLL_IMM(dst, 56, dst), ctx);
emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
} }
static void jit_fill_hole(void *area, unsigned int size) static void jit_fill_hole(void *area, unsigned int size)
{ {
memset(area, SW64_BPF_ILLEGAL_INSN, size); unsigned long c = SW64_BPF_ILLEGAL_INSN;
c |= c << 32;
__constant_c_memset(area, c, size);
}
static int offset_to_epilogue(const struct jit_ctx *ctx);
static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx)
{
int from = ctx->insn_offset[bpf_idx + 1];
int to = ctx->insn_offset[bpf_idx + 1 + off];
if (ctx->image == NULL)
return 0;
return to - from;
} }
static int offset_to_epilogue(const struct jit_ctx *ctx) static int offset_to_epilogue(const struct jit_ctx *ctx)
{ {
if (ctx->image == NULL)
return 0;
return ctx->epilogue_offset - ctx->idx; return ctx->epilogue_offset - ctx->idx;
} }
/* For tail call to jump into */ /* For tail call, jump to set up function call stack */
#define PROLOGUE_OFFSET 8 #define PROLOGUE_OFFSET 11
static void build_prologue(struct jit_ctx *ctx, bool was_classic) static void build_prologue(struct jit_ctx *ctx, bool was_classic)
{ {
const int r6 = bpf2sw64[BPF_REG_6]; const u8 r6 = bpf2sw64[BPF_REG_6];
const int r7 = bpf2sw64[BPF_REG_7]; const u8 r7 = bpf2sw64[BPF_REG_7];
const int r8 = bpf2sw64[BPF_REG_8]; const u8 r8 = bpf2sw64[BPF_REG_8];
const int r9 = bpf2sw64[BPF_REG_9]; const u8 r9 = bpf2sw64[BPF_REG_9];
const int fp = bpf2sw64[BPF_REG_FP]; const u8 fp = bpf2sw64[BPF_REG_FP];
const int tcc = bpf2sw64[TCALL_CNT]; const u8 tcc = bpf2sw64[TCALL_CNT];
const int tmp1 = bpf2sw64[TMP_REG_1];
/* Save callee-saved registers */ /* Save callee-saved registers */
emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx); emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx);
emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 0), ctx); emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx);
emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 8), ctx); emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 8), ctx);
emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 16), ctx); emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 16), ctx);
emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 24), ctx); emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 24), ctx);
emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 32), ctx); emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 32), ctx);
emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 40), ctx); emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 40), ctx);
emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx); emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 48), ctx);
emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx);
/* Set up BPF prog stack base register */ /* Set up BPF prog stack base register */
emit(SW64_BPF_LDI(fp, SW64_BPF_REG_SP, 0), ctx); emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_SP, fp), ctx);
if (!was_classic) if (!was_classic)
/* Initialize tail_call_cnt */ /* Initialize tail_call_cnt */
emit(SW64_BPF_LDI(tcc, SW64_BPF_REG_ZR, 0), ctx); emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, tcc), ctx);
/* Set up function call stack */ /* Set up function call stack */
ctx->stack_size = ctx->prog->aux->stack_depth; ctx->stack_size = (ctx->prog->aux->stack_depth + 15) & (~15);
emit_sw64_ldu32(tmp1, ctx->stack_size, ctx); emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -ctx->stack_size), ctx);
emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx);
} }
static void build_epilogue(struct jit_ctx *ctx) static void build_epilogue(struct jit_ctx *ctx)
{ {
const int r6 = bpf2sw64[BPF_REG_6]; const u8 r6 = bpf2sw64[BPF_REG_6];
const int r7 = bpf2sw64[BPF_REG_7]; const u8 r7 = bpf2sw64[BPF_REG_7];
const int r8 = bpf2sw64[BPF_REG_8]; const u8 r8 = bpf2sw64[BPF_REG_8];
const int r9 = bpf2sw64[BPF_REG_9]; const u8 r9 = bpf2sw64[BPF_REG_9];
const int fp = bpf2sw64[BPF_REG_FP]; const u8 fp = bpf2sw64[BPF_REG_FP];
const int tcc = bpf2sw64[TCALL_CNT]; const u8 tcc = bpf2sw64[TCALL_CNT];
const int tmp1 = bpf2sw64[TMP_REG_1];
/* Destroy function call stack */ /* Destroy function call stack */
emit_sw64_ldu32(tmp1, ctx->stack_size, ctx); emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx);
emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, tmp1, SW64_BPF_REG_SP), ctx);
/* Restore callee-saved registers */ /* Restore callee-saved registers */
emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 0), ctx); emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx);
emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 8), ctx); emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 8), ctx);
emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 16), ctx); emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 16), ctx);
emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 24), ctx); emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 24), ctx);
emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 32), ctx); emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 32), ctx);
emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 40), ctx); emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 40), ctx);
emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 48), ctx); emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 48), ctx);
emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, 56, SW64_BPF_REG_SP), ctx); emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx);
emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 64), ctx);
/* Return */ /* Return */
emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx); emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx);
} }
static int out_offset = -1; /* initialized on the first pass of build_body() */
static int emit_bpf_tail_call(struct jit_ctx *ctx) static int emit_bpf_tail_call(struct jit_ctx *ctx)
{ {
/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ /* bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) */
const u8 r2 = bpf2sw64[BPF_REG_2]; /* struct bpf_array *array */ const u8 r2 = bpf2sw64[BPF_REG_2]; /* struct bpf_array *array */
const u8 r3 = bpf2sw64[BPF_REG_3]; /* u64 index */ const u8 r3 = bpf2sw64[BPF_REG_3]; /* u32 index */
const u8 tmp = bpf2sw64[TMP_REG_1]; const u8 tmp = get_tmp_reg(ctx);
const u8 prg = bpf2sw64[TMP_REG_2]; const u8 prg = get_tmp_reg(ctx);
const u8 tcc = bpf2sw64[TCALL_CNT]; const u8 tcc = bpf2sw64[TCALL_CNT];
const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0)
#define jmp_offset (out_offset - (cur_offset))
u64 offset; u64 offset;
static int out_idx;
#define out_offset (ctx->image ? (out_idx - ctx->idx - 1) : 0)
/* if (index >= array->map.max_entries) /* if (index >= array->map.max_entries)
* goto out; * goto out;
*/ */
offset = offsetof(struct bpf_array, map.max_entries); offset = offsetof(struct bpf_array, map.max_entries);
emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset */ emit_sw64_ldu64(tmp, offset, ctx);
emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */
emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = map.max_entries */ emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = map.max_entries */
emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx); /* map.max_entries is u32 */ emit(SW64_BPF_ZAP_IMM(tmp, 0xf0, tmp), ctx); /* map.max_entries is u32 */
emit(SW64_BPF_SUBL_REG(r3, tmp, tmp), ctx); /* tmp = r3 - tmp = index - map.max_entries */ emit(SW64_BPF_ZAP_IMM(r3, 0xf0, r3), ctx); /* index is u32 */
emit(SW64_BPF_BGE(tmp, jmp_offset), ctx); emit(SW64_BPF_CMPULE_REG(tmp, r3, tmp), ctx);
emit(SW64_BPF_BNE(tmp, out_offset), ctx);
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT) /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out; * goto out;
* tail_call_cnt++; * tail_call_cnt++;
*/ */
emit(SW64_BPF_LDI(tmp, SW64_BPF_REG_ZR, MAX_TAIL_CALL_CNT), ctx); emit_sw64_ldu64(tmp, MAX_TAIL_CALL_CNT, ctx);
emit(SW64_BPF_SUBL_REG(tcc, tmp, tmp), ctx); emit(SW64_BPF_CMPULT_REG(tmp, tcc, tmp), ctx);
emit(SW64_BPF_BGT(tmp, jmp_offset), ctx); emit(SW64_BPF_BNE(tmp, out_offset), ctx);
emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx); emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx);
/* prog = array->ptrs[index]; /* prog = array->ptrs[index];
...@@ -398,34 +635,66 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) ...@@ -398,34 +635,66 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
* goto out; * goto out;
*/ */
offset = offsetof(struct bpf_array, ptrs); offset = offsetof(struct bpf_array, ptrs);
emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset of ptrs */ emit_sw64_ldu64(tmp, offset, ctx);
emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs */ emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs[0] */
emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, ptrs is 8 bit aligned */ emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, each entry is a pointer */
emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &prog */ emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &ptrs[index] */
emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = prog */ emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = ptrs[index] = prog */
emit(SW64_BPF_BEQ(prg, jmp_offset), ctx); emit(SW64_BPF_BEQ(prg, out_offset), ctx);
/* goto *(prog->bpf_func + prologue_offset); */ /* goto *(prog->bpf_func + prologue_offset); */
offset = offsetof(struct bpf_prog, bpf_func); offset = offsetof(struct bpf_prog, bpf_func);
emit_sw64_ldu64(tmp, offset, ctx); /* tmp = offset */ emit_sw64_ldu64(tmp, offset, ctx);
emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx); /* tmp = prg + tmp = &bpf_func */ emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx); /* tmp = prg + tmp = &bpf_func */
emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ emit(SW64_BPF_LDL(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */
emit(SW64_BPF_ZAPNOT_IMM(tmp, 0xf, tmp), ctx); /* bpf_func is unsigned int */ emit(SW64_BPF_BEQ(tmp, out_offset), ctx);
emit(SW64_BPF_ADDL_REG(tmp, sizeof(u32) * PROLOGUE_OFFSET, tmp), ctx); emit(SW64_BPF_LDI(tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
emit(SW64_BPF_ADDL_REG(SW64_BPF_REG_SP, ctx->stack_size, SW64_BPF_REG_SP), ctx); emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx);
emit(SW64_BPF_BR(tmp, 0), ctx); emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp), ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
/* out */ /* out */
if (out_offset == -1) if (ctx->image == NULL)
out_offset = cur_offset; out_idx = ctx->idx;
if (cur_offset != out_offset) { if (ctx->image != NULL && out_idx <= 0)
pr_err("tail_call out_offset = %d, expected %d!\n",
cur_offset, out_offset);
return -1; return -1;
} #undef out_offset
return 0;
}
/* For accesses to BTF pointers, add an entry to the exception table */
static int add_exception_handler(const struct bpf_insn *insn,
struct jit_ctx *ctx,
int dst_reg)
{
off_t offset;
unsigned long pc;
struct exception_table_entry *ex;
if (!ctx->image)
/* First pass */
return 0;
if (!ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
return 0;
if (WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->exentry_idx];
pc = (unsigned long)&ctx->image[ctx->idx - 1];
offset = (long)&ex->insn - pc;
ex->insn = offset;
ex->fixup.bits.nextinsn = sizeof(u32);
ex->fixup.bits.valreg = dst_reg;
ex->fixup.bits.errreg = SW64_BPF_REG_ZR;
ctx->exentry_idx++;
return 0; return 0;
#undef cur_offset
#undef jmp_offset
} }
/* JITs an eBPF instruction. /* JITs an eBPF instruction.
...@@ -434,80 +703,110 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) ...@@ -434,80 +703,110 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
* >0 - successfully JITed a 16-byte eBPF instruction. * >0 - successfully JITed a 16-byte eBPF instruction.
* <0 - failed to JIT. * <0 - failed to JIT.
*/ */
static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
{ {
const u8 code = insn->code; const u8 code = insn->code;
const u8 dst = bpf2sw64[insn->dst_reg]; u8 dst = bpf2sw64[insn->dst_reg];
const u8 src = bpf2sw64[insn->src_reg]; u8 src = bpf2sw64[insn->src_reg];
const u8 tmp1 = bpf2sw64[TMP_REG_1]; const u8 tmp1 __maybe_unused = get_tmp_reg(ctx);
const u8 tmp2 = bpf2sw64[TMP_REG_2]; const u8 tmp2 __maybe_unused = get_tmp_reg(ctx);
const s16 off = insn->off; const s16 off = insn->off;
const s32 imm = insn->imm; const s32 imm = insn->imm;
int jmp_offset; const int bpf_idx = insn - ctx->prog->insnsi;
s32 jmp_offset;
u64 func; u64 func;
struct bpf_insn insn1; struct bpf_insn insn1;
u64 imm64; u64 imm64;
int ret;
switch (code) { switch (code) {
case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU | BPF_MOV | BPF_X:
emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X:
emit(SW64_BPF_LDI(dst, src, 0), ctx); emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx);
break; break;
case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU | BPF_ADD | BPF_X:
emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx); emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break; break;
case BPF_ALU64 | BPF_ADD | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_X:
emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx); emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU | BPF_SUB | BPF_X:
emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx); emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break; break;
case BPF_ALU64 | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X:
emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx); emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU | BPF_MUL | BPF_X:
emit(SW64_BPF_MULW_REG(dst, src, dst), ctx); emit(SW64_BPF_MULW_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break; break;
case BPF_ALU64 | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X:
emit(SW64_BPF_MULL_REG(dst, src, dst), ctx); emit(SW64_BPF_MULL_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_DIV | BPF_X:
emit_sw64_divmod(dst, src, ctx, code);
break;
case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X:
emit_sw64_div(dst, src, dst, ctx); emit_sw64_divmod(dst, src, ctx, code);
return -EINVAL; break;
case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_MOD | BPF_X:
emit_sw64_divmod(dst, src, ctx, code);
break;
case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X:
emit_sw64_mod(dst, src, dst, ctx); emit_sw64_divmod(dst, src, ctx, code);
return -EINVAL; break;
case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_LSH | BPF_X:
emit(SW64_BPF_SLL_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X:
emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); emit(SW64_BPF_SLL_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X:
emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
case BPF_ALU64 | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X:
emit(SW64_BPF_SRL_REG(dst, src, dst), ctx); emit(SW64_BPF_SRL_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU | BPF_ARSH | BPF_X:
emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
emit(SW64_BPF_SRA_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X:
emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); emit(SW64_BPF_SRA_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU | BPF_AND | BPF_X:
emit(SW64_BPF_AND_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_AND | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X:
emit(SW64_BPF_AND_REG(dst, src, dst), ctx); emit(SW64_BPF_AND_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU | BPF_OR | BPF_X:
emit(SW64_BPF_BIS_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X:
emit(SW64_BPF_OR_REG(dst, src, dst), ctx); emit(SW64_BPF_BIS_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU | BPF_XOR | BPF_X:
emit(SW64_BPF_XOR_REG(dst, src, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X:
emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); emit(SW64_BPF_XOR_REG(dst, src, dst), ctx);
break; break;
case BPF_ALU | BPF_NEG: case BPF_ALU | BPF_NEG:
emit(SW64_BPF_SUBW_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_NEG: case BPF_ALU64 | BPF_NEG:
emit(SW64_BPF_SEXTB_IMM(0xff, tmp1), ctx); emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
emit(SW64_BPF_XOR_IMM(dst, tmp1, dst), ctx);
break; break;
case BPF_ALU | BPF_END | BPF_TO_LE: case BPF_ALU | BPF_END | BPF_TO_LE:
switch (imm) { switch (imm) {
...@@ -519,7 +818,12 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -519,7 +818,12 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break; break;
case 64: case 64:
break; break;
default:
pr_err("eBPF JIT %s[%d]: BPF_TO_LE unknown size\n",
current->comm, current->pid);
return -EINVAL;
} }
break;
case BPF_ALU | BPF_END | BPF_TO_BE: case BPF_ALU | BPF_END | BPF_TO_BE:
switch (imm) { switch (imm) {
case 16: case 16:
...@@ -531,73 +835,223 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -531,73 +835,223 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case 64: case 64:
emit_sw64_htobe64(dst, ctx); emit_sw64_htobe64(dst, ctx);
break; break;
default:
pr_err("eBPF JIT %s[%d]: BPF_TO_BE unknown size\n",
current->comm, current->pid);
return -EINVAL;
} }
break;
case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU | BPF_MOV | BPF_K:
if (imm >= S16_MIN && imm <= S16_MAX)
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
else
emit_sw64_ldu32(dst, imm, ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K:
emit_sw64_lds32(dst, imm, ctx); if (imm >= S16_MIN && imm <= S16_MAX)
emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
else
emit_sw64_lds32(dst, imm, ctx);
break; break;
case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_ADD | BPF_K:
if (imm >= S16_MIN && imm <= S16_MAX) {
emit(SW64_BPF_LDI(dst, dst, imm), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_ADDW_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= S16_MIN && imm <= S16_MAX) {
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_LDI(dst, dst, imm), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_SUB | BPF_K:
if (imm >= -S16_MAX && imm <= -S16_MIN) {
emit(SW64_BPF_LDI(dst, dst, -imm), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= -S16_MAX && imm <= -S16_MIN) {
emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_LDI(dst, dst, -imm), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MUL | BPF_K:
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU | BPF_DIV | BPF_K:
emit_sw64_ldu32(tmp1, imm, ctx);
emit_sw64_divmod(dst, tmp1, ctx, code);
break;
case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); emit_sw64_lds32(tmp1, imm, ctx);
emit_sw64_div(dst, src, tmp1, ctx); emit_sw64_divmod(dst, tmp1, ctx, code);
return -EINVAL; break;
case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_K:
emit_sw64_ldu32(tmp1, imm, ctx);
emit_sw64_divmod(dst, tmp1, ctx, code);
break;
case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); emit_sw64_lds32(tmp1, imm, ctx);
emit_sw64_mod(dst, src, tmp1, ctx); emit_sw64_divmod(dst, tmp1, ctx, code);
return -EINVAL; break;
case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_LSH | BPF_K:
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K:
emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx);
}
break;
case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_K:
emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_AND | BPF_K:
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_OR | BPF_K:
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_OR_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU | BPF_XOR | BPF_K:
if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_ldu32(tmp1, imm, ctx);
emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx);
}
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K:
emit_sw64_lds32(tmp1, imm, ctx); if (imm >= 0 && imm <= U8_MAX) {
emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx);
} else {
emit_sw64_lds32(tmp1, imm, ctx);
emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx);
}
break; break;
case BPF_JMP | BPF_JA: case BPF_JMP | BPF_JA:
emit(SW64_BPF_BR(SW64_BPF_REG_RA, off), ctx); jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx);
if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) {
emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx);
} else {
pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n",
current->comm, current->pid, jmp_offset);
return -EINVAL;
}
break; break;
case BPF_JMP32 | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, src, tmp1), ctx);
src = tmp1;
emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx);
dst = tmp2;
case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JGT | BPF_X:
case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JLT | BPF_X:
...@@ -645,9 +1099,29 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -645,9 +1099,29 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx); emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx);
break; break;
} }
emit(SW64_BPF_BLBS(tmp1, off), ctx); jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx);
if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) {
emit(SW64_BPF_BNE(tmp1, jmp_offset), ctx);
} else {
pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n",
current->comm, current->pid, jmp_offset);
return -EINVAL;
}
break; break;
case BPF_JMP32 | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx);
dst = tmp2;
case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGT | BPF_K:
case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JLT | BPF_K:
...@@ -662,47 +1136,57 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -662,47 +1136,57 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_sw64_lds32(tmp1, imm, ctx); emit_sw64_lds32(tmp1, imm, ctx);
switch (BPF_OP(code)) { switch (BPF_OP(code)) {
case BPF_JEQ: case BPF_JEQ:
emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx);
break; break;
case BPF_JGT: case BPF_JGT:
emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp1), ctx); emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp2), ctx);
break; break;
case BPF_JLT: case BPF_JLT:
emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp2), ctx);
break; break;
case BPF_JGE: case BPF_JGE:
emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp1), ctx); emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp2), ctx);
break; break;
case BPF_JLE: case BPF_JLE:
emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp2), ctx);
break; break;
case BPF_JNE: case BPF_JNE:
emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx);
emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx); emit(SW64_BPF_XOR_IMM(tmp2, 1, tmp2), ctx);
break; break;
case BPF_JSGT: case BPF_JSGT:
emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp1), ctx); emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp2), ctx);
break; break;
case BPF_JSLT: case BPF_JSLT:
emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp2), ctx);
break; break;
case BPF_JSGE: case BPF_JSGE:
emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp1), ctx); emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp2), ctx);
break; break;
case BPF_JSLE: case BPF_JSLE:
emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp2), ctx);
break; break;
case BPF_JSET: case BPF_JSET:
emit(SW64_BPF_AND_REG(dst, tmp1, tmp1), ctx); emit(SW64_BPF_AND_REG(dst, tmp1, tmp2), ctx);
break; break;
} }
emit(SW64_BPF_BLBS(tmp1, off), ctx); jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx);
if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) {
emit(SW64_BPF_BNE(tmp2, jmp_offset), ctx);
} else {
pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n",
current->comm, current->pid, jmp_offset);
return -EINVAL;
}
break; break;
case BPF_JMP | BPF_CALL: case BPF_JMP | BPF_CALL:
func = (u64)__bpf_call_base + imm; func = (u64)__bpf_call_base + imm;
emit_sw64_ldu64(tmp1, func, ctx); if ((func & 0xffffffffe0000000UL) != 0xffffffff80000000UL)
emit(SW64_BPF_CALL(SW64_BPF_REG_RA, tmp1), ctx); /* calling bpf program, switch to vmalloc addr */
func = (func & 0xffffffff) | 0xfffff00000000000UL;
emit_sw64_ldu64(SW64_BPF_REG_PV, func, ctx);
emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx);
break; break;
case BPF_JMP | BPF_TAIL_CALL: case BPF_JMP | BPF_TAIL_CALL:
...@@ -711,38 +1195,60 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -711,38 +1195,60 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break; break;
case BPF_JMP | BPF_EXIT: case BPF_JMP | BPF_EXIT:
if (insn - ctx->prog->insnsi + 1 == ctx->prog->len) // if this is the last instruction, fallthrough to epilogue
if (bpf_idx == ctx->prog->len - 1)
break; break;
jmp_offset = (offset_to_epilogue(ctx) - 1) * 4; jmp_offset = offset_to_epilogue(ctx) - 1;
// emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); // epilogue is always at the end, must jump forward
// break; if (jmp_offset >= -1 && jmp_offset <= 0xfffff) {
emit_sw64_lds32(tmp1, jmp_offset, ctx); if (ctx->image && !jmp_offset)
emit(SW64_BPF_BR(tmp2, 0), ctx); // if this is the last instruction, fallthrough to epilogue
emit(SW64_BPF_ADDL_REG(tmp1, tmp2, tmp1), ctx); emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp1), ctx); else
emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx);
} else {
pr_err("eBPF JIT %s[%d]: BPF_EXIT out of range, %d instructions\n",
current->comm, current->pid, jmp_offset);
return -EINVAL;
}
break; break;
case BPF_LD | BPF_IMM | BPF_DW: case BPF_LD | BPF_IMM | BPF_DW:
insn1 = insn[1]; insn1 = insn[1];
imm64 = (u64)insn1.imm << 32 | (u32)imm; imm64 = ((u64)insn1.imm << 32) | (u32)imm;
emit_sw64_ldu64(dst, imm64, ctx); emit_sw64_ldu64(dst, imm64, ctx);
put_tmp_reg(ctx);
put_tmp_reg(ctx);
return 1; return 1;
/* LDX: dst = *(size *)(src + off) */ /* LDX: dst = *(size *)(src + off) */
case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_W:
emit(SW64_BPF_LDW(dst, src, off), ctx);
break;
case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_H:
emit(SW64_BPF_LDHU(dst, src, off), ctx);
emit(SW64_BPF_SEXTH_REG(dst, dst), ctx);
break;
case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_B:
emit(SW64_BPF_LDBU(dst, src, off), ctx);
emit(SW64_BPF_SEXTB_REG(dst, dst), ctx);
break;
case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_MEM | BPF_DW:
emit(SW64_BPF_LDW(dst, src, off), ctx); case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
switch (BPF_SIZE(code)) {
case BPF_W:
emit(SW64_BPF_LDW(dst, src, off), ctx);
emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
break;
case BPF_H:
emit(SW64_BPF_LDHU(dst, src, off), ctx);
break;
case BPF_B:
emit(SW64_BPF_LDBU(dst, src, off), ctx);
break;
case BPF_DW:
emit(SW64_BPF_LDL(dst, src, off), ctx);
break;
}
ret = add_exception_handler(insn, ctx, dst);
if (ret)
return ret;
break; break;
/* ST: *(size *)(dst + off) = imm */ /* ST: *(size *)(dst + off) = imm */
...@@ -773,33 +1279,32 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -773,33 +1279,32 @@ static inline int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit(SW64_BPF_STW(src, dst, off), ctx); emit(SW64_BPF_STW(src, dst, off), ctx);
break; break;
case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_H:
emit(SW64_BPF_STW(src, dst, off), ctx); emit(SW64_BPF_STH(src, dst, off), ctx);
break; break;
case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_B:
emit(SW64_BPF_STW(src, dst, off), ctx); emit(SW64_BPF_STB(src, dst, off), ctx);
break; break;
case BPF_STX | BPF_MEM | BPF_DW: case BPF_STX | BPF_MEM | BPF_DW:
emit(SW64_BPF_STW(src, dst, off), ctx); emit(SW64_BPF_STL(src, dst, off), ctx);
break; break;
/* STX XADD: lock *(u32 *)(dst + off) += src */ /* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W: case BPF_STX | BPF_XADD | BPF_W:
emit(SW64_BPF_LDW(tmp1, dst, off), ctx); emit_sw64_xadd32(src, dst, off, ctx);
emit(SW64_BPF_ADDW_REG(tmp1, src, tmp1), ctx);
emit(SW64_BPF_STW(tmp1, dst, off), ctx);
break; break;
/* STX XADD: lock *(u64 *)(dst + off) += src */ /* STX XADD: lock *(u64 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_DW: case BPF_STX | BPF_XADD | BPF_DW:
emit(SW64_BPF_LDL(tmp1, dst, off), ctx); emit_sw64_xadd64(src, dst, off, ctx);
emit(SW64_BPF_ADDL_REG(tmp1, src, tmp1), ctx);
emit(SW64_BPF_STL(tmp1, dst, off), ctx);
break; break;
default: default:
pr_err("unknown opcode %02x\n", code); pr_err("eBPF JIT %s[%d]: unknown opcode 0x%02x\n",
current->comm, current->pid, code);
return -EINVAL; return -EINVAL;
} }
put_tmp_reg(ctx);
put_tmp_reg(ctx);
return 0; return 0;
} }
...@@ -812,17 +1317,17 @@ static int build_body(struct jit_ctx *ctx) ...@@ -812,17 +1317,17 @@ static int build_body(struct jit_ctx *ctx)
const struct bpf_insn *insn = &prog->insnsi[i]; const struct bpf_insn *insn = &prog->insnsi[i];
int ret; int ret;
if (ctx->image == NULL)
ctx->insn_offset[i] = ctx->idx;
ret = build_insn(insn, ctx); ret = build_insn(insn, ctx);
if (ret > 0) { if (ret < 0)
return ret;
while (ret > 0) {
i++; i++;
if (ctx->image == NULL) if (ctx->image == NULL)
ctx->insn_offset[i] = ctx->idx; ctx->insn_offset[i] = ctx->insn_offset[i - 1];
continue; ret--;
} }
if (ctx->image == NULL)
ctx->insn_offset[i] = ctx->idx;
if (ret)
return ret;
} }
return 0; return 0;
...@@ -837,6 +1342,9 @@ static int validate_code(struct jit_ctx *ctx) ...@@ -837,6 +1342,9 @@ static int validate_code(struct jit_ctx *ctx)
return -1; return -1;
} }
if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
return -1;
return 0; return 0;
} }
...@@ -854,7 +1362,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -854,7 +1362,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false; bool tmp_blinded = false;
bool extra_pass = false; bool extra_pass = false;
struct jit_ctx ctx; struct jit_ctx ctx;
int image_size; int image_size, prog_size, extable_size;
u8 *image_ptr; u8 *image_ptr;
if (!prog->jit_requested) if (!prog->jit_requested)
...@@ -885,13 +1393,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -885,13 +1393,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
image_ptr = jit_data->image; image_ptr = jit_data->image;
header = jit_data->header; header = jit_data->header;
extra_pass = true; extra_pass = true;
image_size = sizeof(u32) * ctx.idx; prog_size = sizeof(u32) * ctx.idx;
goto skip_init_ctx; goto skip_init_ctx;
} }
memset(&ctx, 0, sizeof(ctx)); memset(&ctx, 0, sizeof(ctx));
ctx.prog = prog; ctx.prog = prog;
ctx.insn_offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); ctx.insn_offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
if (ctx.insn_offset == NULL) { if (ctx.insn_offset == NULL) {
prog = orig_prog; prog = orig_prog;
goto out_off; goto out_off;
...@@ -907,11 +1415,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -907,11 +1415,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off; goto out_off;
} }
ctx.epilogue_offset = ctx.idx; ctx.insn_offset[prog->len] = ctx.epilogue_offset = ctx.idx;
build_epilogue(&ctx); build_epilogue(&ctx);
extable_size = prog->aux->num_exentries *
sizeof(struct exception_table_entry);
/* Now we know the actual image size. */ /* Now we know the actual image size. */
image_size = sizeof(u32) * ctx.idx; /* And we need extra 8 bytes for lock instructions alignment */
prog_size = sizeof(u32) * ctx.idx + 8;
image_size = prog_size + extable_size;
header = bpf_jit_binary_alloc(image_size, &image_ptr, header = bpf_jit_binary_alloc(image_size, &image_ptr,
sizeof(u32), jit_fill_hole); sizeof(u32), jit_fill_hole);
if (header == NULL) { if (header == NULL) {
...@@ -921,9 +1434,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -921,9 +1434,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */ /* 2. Now, the actual pass. */
ctx.image = (u32 *)image_ptr; /* lock instructions need 8-byte alignment */
ctx.image = (u32 *)(((unsigned long)image_ptr + 7) & (~7));
if (extable_size)
prog->aux->extable = (void *)image_ptr + prog_size;
skip_init_ctx: skip_init_ctx:
ctx.idx = 0; ctx.idx = 0;
ctx.exentry_idx = 0;
build_prologue(&ctx, was_classic); build_prologue(&ctx, was_classic);
...@@ -944,7 +1461,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -944,7 +1461,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* And we're done. */ /* And we're done. */
if (bpf_jit_enable > 1) if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, 2, ctx.image); bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
bpf_flush_icache(header, ctx.image + ctx.idx); bpf_flush_icache(header, ctx.image + ctx.idx);
...@@ -957,7 +1474,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -957,7 +1474,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
} }
prog->bpf_func = (void *)ctx.image; prog->bpf_func = (void *)ctx.image;
prog->jited = 1; prog->jited = 1;
prog->jited_len = image_size; prog->jited_len = prog_size;
if (ctx.current_tmp_reg) {
pr_err("eBPF JIT %s[%d]: unreleased temporary regsters %d\n",
current->comm, current->pid, ctx.current_tmp_reg);
}
if (!prog->is_func || extra_pass) { if (!prog->is_func || extra_pass) {
out_off: out_off:
......
...@@ -40,10 +40,8 @@ static int sw64_cpu_freq_notifier(struct notifier_block *nb, ...@@ -40,10 +40,8 @@ static int sw64_cpu_freq_notifier(struct notifier_block *nb,
unsigned long cpu; unsigned long cpu;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
if (val == CPUFREQ_POSTCHANGE) { if (val == CPUFREQ_POSTCHANGE)
sw64_update_clockevents(cpu, freqs->new * 1000); sw64_update_clockevents(cpu, freqs->new * 1000);
current_cpu_data.loops_per_jiffy = loops_per_jiffy;
}
} }
return 0; return 0;
...@@ -59,7 +57,7 @@ static unsigned int sw64_cpufreq_get(unsigned int cpu) ...@@ -59,7 +57,7 @@ static unsigned int sw64_cpufreq_get(unsigned int cpu)
return 0; return 0;
} }
return sw64_clk_get_rate(policy->clk); return __sw64_cpufreq_get(policy) * 1000;
} }
/* /*
...@@ -70,12 +68,12 @@ static int sw64_cpufreq_target(struct cpufreq_policy *policy, ...@@ -70,12 +68,12 @@ static int sw64_cpufreq_target(struct cpufreq_policy *policy,
{ {
unsigned long freq; unsigned long freq;
freq = (get_cpu_freq() / 1000) * index / 48; freq = 50000 * index;
sw64_store_policy(policy); sw64_store_policy(policy);
/* setting the cpu frequency */ /* setting the cpu frequency */
sw64_set_rate(-1, freq * 1000); sw64_set_rate(freq * 1000);
return 0; return 0;
} }
...@@ -100,7 +98,7 @@ static int sw64_cpufreq_cpu_init(struct cpufreq_policy *policy) ...@@ -100,7 +98,7 @@ static int sw64_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (sw64_clockmod_table[i].frequency == 0) if (sw64_clockmod_table[i].frequency == 0)
sw64_clockmod_table[i].frequency = (rate * i) / 48; sw64_clockmod_table[i].frequency = (rate * i) / 48;
sw64_set_rate(-1, rate * 1000); sw64_set_rate(rate * 1000);
policy->clk = cpuclk; policy->clk = cpuclk;
......
...@@ -25,8 +25,6 @@ ...@@ -25,8 +25,6 @@
#include <asm/efi.h> #include <asm/efi.h>
extern bool __virt_addr_valid(unsigned long x);
static int __init is_memory(efi_memory_desc_t *md) static int __init is_memory(efi_memory_desc_t *md)
{ {
if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC)) if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC))
...@@ -128,23 +126,7 @@ static __init int is_usable_memory(efi_memory_desc_t *md) ...@@ -128,23 +126,7 @@ static __init int is_usable_memory(efi_memory_desc_t *md)
} }
return false; return false;
} }
static __initdata char memory_type_name1[][20] = {
"Reserved",
"Loader Code",
"Loader Data",
"Boot Code",
"Boot Data",
"Runtime Code",
"Runtime Data",
"Conventional Memory",
"Unusable Memory",
"ACPI Reclaim Memory",
"ACPI Memory NVS",
"Memory Mapped I/O",
"MMIO Port Space",
"PAL Code",
"Persistent Memory",
};
static __init void reserve_regions(void) static __init void reserve_regions(void)
{ {
efi_memory_desc_t *md; efi_memory_desc_t *md;
...@@ -157,22 +139,6 @@ static __init void reserve_regions(void) ...@@ -157,22 +139,6 @@ static __init void reserve_regions(void)
paddr = md->phys_addr; paddr = md->phys_addr;
npages = md->num_pages; npages = md->num_pages;
if (!__virt_addr_valid(paddr))
continue;
if (md->type >= ARRAY_SIZE(memory_type_name1))
continue;
if (md->attribute & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
EFI_MEMORY_NV |
EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
continue;
if (strncmp(memory_type_name1[md->type], "Reserved", 8) == 0)
continue;
if (efi_enabled(EFI_DBG)) { if (efi_enabled(EFI_DBG)) {
char buf[64]; char buf[64];
......
...@@ -2816,7 +2816,11 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) ...@@ -2816,7 +2816,11 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
} }
/* clear memory. Not sure if this is required or not */ /* clear memory. Not sure if this is required or not */
#if IS_ENABLED(CONFIG_SW64)
memset_io(hpd, 0, mec_hpd_size);
#else
memset(hpd, 0, mec_hpd_size); memset(hpd, 0, mec_hpd_size);
#endif
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
...@@ -2926,7 +2930,11 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, ...@@ -2926,7 +2930,11 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
u64 wb_gpu_addr; u64 wb_gpu_addr;
/* init the mqd struct */ /* init the mqd struct */
#if IS_ENABLED(CONFIG_SW64)
memset_io(mqd, 0, sizeof(struct cik_mqd));
#else
memset(mqd, 0, sizeof(struct cik_mqd)); memset(mqd, 0, sizeof(struct cik_mqd));
#endif
mqd->header = 0xC0310800; mqd->header = 0xC0310800;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff; mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
......
...@@ -4641,8 +4641,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) ...@@ -4641,8 +4641,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */ /* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx]) if (adev->gfx.mec.mqd_backup[mqd_idx]) {
#if IS_ENABLED(CONFIG_SW64)
memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
#else
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
#endif
}
/* reset ring buffer */ /* reset ring buffer */
ring->wptr = 0; ring->wptr = 0;
...@@ -4667,12 +4672,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) ...@@ -4667,12 +4672,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
vi_srbm_select(adev, 0, 0, 0, 0); vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx]) if (adev->gfx.mec.mqd_backup[mqd_idx]) {
#if IS_ENABLED(CONFIG_SW64) #if IS_ENABLED(CONFIG_SW64)
memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
#else #else
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
#endif #endif
}
} }
return 0; return 0;
...@@ -4685,7 +4691,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) ...@@ -4685,7 +4691,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
int mqd_idx = ring - &adev->gfx.compute_ring[0]; int mqd_idx = ring - &adev->gfx.compute_ring[0];
if (!amdgpu_in_reset(adev) && !adev->in_suspend) { if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
#if IS_ENABLED(CONFIG_SW64)
memset_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
#else
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
#endif
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
...@@ -4694,12 +4704,23 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) ...@@ -4694,12 +4704,23 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
vi_srbm_select(adev, 0, 0, 0, 0); vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx]) if (adev->gfx.mec.mqd_backup[mqd_idx]) {
#if IS_ENABLED(CONFIG_SW64)
memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
#else
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
#endif
}
} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */ /* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx]) if (adev->gfx.mec.mqd_backup[mqd_idx]) {
#if IS_ENABLED(CONFIG_SW64)
memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
#else
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
#endif
}
/* reset ring buffer */ /* reset ring buffer */
ring->wptr = 0; ring->wptr = 0;
amdgpu_ring_clear_ring(ring); amdgpu_ring_clear_ring(ring);
......
...@@ -1978,7 +1978,11 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) ...@@ -1978,7 +1978,11 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
return r; return r;
} }
#if IS_ENABLED(CONFIG_SW64)
memset_io(hpd, 0, mec_hpd_size);
#else
memset(hpd, 0, mec_hpd_size); memset(hpd, 0, mec_hpd_size);
#endif
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
...@@ -3724,10 +3728,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) ...@@ -3724,10 +3728,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */ /* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx]) { if (adev->gfx.mec.mqd_backup[mqd_idx]) {
if (IS_ENABLED(CONFIG_SW64)) #if IS_ENABLED(CONFIG_SW64)
memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
else #else
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
#endif
} }
/* reset ring buffer */ /* reset ring buffer */
...@@ -3740,7 +3745,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) ...@@ -3740,7 +3745,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
soc15_grbm_select(adev, 0, 0, 0, 0); soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
} else { } else {
#if IS_ENABLED(CONFIG_SW64)
memset_io((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
#else
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
#endif
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
...@@ -3751,10 +3760,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) ...@@ -3751,10 +3760,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx]) { if (adev->gfx.mec.mqd_backup[mqd_idx]) {
if (IS_ENABLED(CONFIG_SW64)) #if IS_ENABLED(CONFIG_SW64)
memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
else #else
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
#endif
} }
} }
...@@ -3768,7 +3778,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) ...@@ -3768,7 +3778,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
int mqd_idx = ring - &adev->gfx.compute_ring[0]; int mqd_idx = ring - &adev->gfx.compute_ring[0];
if (!amdgpu_in_reset(adev) && !adev->in_suspend) { if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
#if IS_ENABLED(CONFIG_SW64)
memset_io((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
#else
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
#endif
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
...@@ -3778,11 +3792,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) ...@@ -3778,11 +3792,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx]) if (adev->gfx.mec.mqd_backup[mqd_idx])
if (adev->gfx.mec.mqd_backup[mqd_idx]) {
#if IS_ENABLED(CONFIG_SW64)
memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
#else
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
#endif
}
} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */ /* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx]) if (adev->gfx.mec.mqd_backup[mqd_idx])
if (adev->gfx.mec.mqd_backup[mqd_idx]) {
#if IS_ENABLED(CONFIG_SW64)
memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
#else
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
#endif
}
/* reset ring buffer */ /* reset ring buffer */
ring->wptr = 0; ring->wptr = 0;
......
...@@ -1382,7 +1382,7 @@ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned type) ...@@ -1382,7 +1382,7 @@ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned type)
sdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); sdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
sdomain->domain.geometry.aperture_start = SW64_DMA_START; sdomain->domain.geometry.aperture_start = 0ULL;
sdomain->domain.geometry.aperture_end = (~0ULL); sdomain->domain.geometry.aperture_end = (~0ULL);
sdomain->domain.geometry.force_aperture = true; sdomain->domain.geometry.force_aperture = true;
sdomain->type = IOMMU_DOMAIN_UNMANAGED; sdomain->type = IOMMU_DOMAIN_UNMANAGED;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册