提交 18c5e807 编写于 作者: H Huacai Chen 提交者: Hongchen Zhang

LoongArch: Add vector extensions support

LoongArch inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP

--------------------------------

Add LoongArch's vector extensions support, which including 128bit LSX
(i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced
SIMD eXtension).
Signed-off-by: NHuacai Chen <chenhuacai@loongson.cn>
Change-Id: Icbdcc8c0657d85198accb9050f90dd41c9fa456c
上级 6d789c0c
......@@ -75,6 +75,8 @@ config LOONGARCH
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select CPU_SUPPORTS_LSX
select CPU_SUPPORTS_LASX
select GPIOLIB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_MMAP_RND_BITS if MMU
......@@ -145,6 +147,36 @@ config CPU_HAS_PREFETCH
bool
default y
config CPU_HAS_LSX
bool "Support for the Loongson SIMD Extension"
depends on CPU_SUPPORTS_LSX
depends on 64BIT
help
Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
is enabled the kernel will support allocating & switching LSX
vector register contexts. If you know that your kernel will only be
running on CPUs which do not support LSX or that your userland will
not be making use of it then you may wish to say N here to reduce
the size & complexity of your kernel.
If unsure, say Y.
config CPU_HAS_LASX
bool "Support for the Loongson Advanced SIMD Extension"
depends on CPU_SUPPORTS_LASX
depends on 64BIT && CPU_HAS_LSX
help
Loongson Advanced SIMD Extension is 256 bit wide SIMD extension.
If unsure, say Y.
config CPU_SUPPORTS_LSX
bool
config CPU_SUPPORTS_LASX
bool
config GENERIC_CALIBRATE_DELAY
def_bool y
......
......@@ -35,6 +35,8 @@ CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_CPU_HAS_LSX=y
CONFIG_CPU_HAS_LASX=y
CONFIG_HOTPLUG_CPU=y
CONFIG_NUMA=y
CONFIG_ACPI_SPCR_TABLE=y
......
......@@ -117,6 +117,212 @@
.endif
.endm
.macro parse_vr var vr
\var = -1
.ifc \vr, $vr0
\var = 0
.endif
.ifc \vr, $vr1
\var = 1
.endif
.ifc \vr, $vr2
\var = 2
.endif
.ifc \vr, $vr3
\var = 3
.endif
.ifc \vr, $vr4
\var = 4
.endif
.ifc \vr, $vr5
\var = 5
.endif
.ifc \vr, $vr6
\var = 6
.endif
.ifc \vr, $vr7
\var = 7
.endif
.ifc \vr, $vr8
\var = 8
.endif
.ifc \vr, $vr9
\var = 9
.endif
.ifc \vr, $vr10
\var = 10
.endif
.ifc \vr, $vr11
\var = 11
.endif
.ifc \vr, $vr12
\var = 12
.endif
.ifc \vr, $vr13
\var = 13
.endif
.ifc \vr, $vr14
\var = 14
.endif
.ifc \vr, $vr15
\var = 15
.endif
.ifc \vr, $vr16
\var = 16
.endif
.ifc \vr, $vr17
\var = 17
.endif
.ifc \vr, $vr18
\var = 18
.endif
.ifc \vr, $vr19
\var = 19
.endif
.ifc \vr, $vr20
\var = 20
.endif
.ifc \vr, $vr21
\var = 21
.endif
.ifc \vr, $vr22
\var = 22
.endif
.ifc \vr, $vr23
\var = 23
.endif
.ifc \vr, $vr24
\var = 24
.endif
.ifc \vr, $vr25
\var = 25
.endif
.ifc \vr, $vr26
\var = 26
.endif
.ifc \vr, $vr27
\var = 27
.endif
.ifc \vr, $vr28
\var = 28
.endif
.ifc \vr, $vr29
\var = 29
.endif
.ifc \vr, $vr30
\var = 30
.endif
.ifc \vr, $vr31
\var = 31
.endif
.iflt \var
.error "Unable to parse register name \r"
.endif
.endm
.macro parse_xr var xr
\var = -1
.ifc \xr, $xr0
\var = 0
.endif
.ifc \xr, $xr1
\var = 1
.endif
.ifc \xr, $xr2
\var = 2
.endif
.ifc \xr, $xr3
\var = 3
.endif
.ifc \xr, $xr4
\var = 4
.endif
.ifc \xr, $xr5
\var = 5
.endif
.ifc \xr, $xr6
\var = 6
.endif
.ifc \xr, $xr7
\var = 7
.endif
.ifc \xr, $xr8
\var = 8
.endif
.ifc \xr, $xr9
\var = 9
.endif
.ifc \xr, $xr10
\var = 10
.endif
.ifc \xr, $xr11
\var = 11
.endif
.ifc \xr, $xr12
\var = 12
.endif
.ifc \xr, $xr13
\var = 13
.endif
.ifc \xr, $xr14
\var = 14
.endif
.ifc \xr, $xr15
\var = 15
.endif
.ifc \xr, $xr16
\var = 16
.endif
.ifc \xr, $xr17
\var = 17
.endif
.ifc \xr, $xr18
\var = 18
.endif
.ifc \xr, $xr19
\var = 19
.endif
.ifc \xr, $xr20
\var = 20
.endif
.ifc \xr, $xr21
\var = 21
.endif
.ifc \xr, $xr22
\var = 22
.endif
.ifc \xr, $xr23
\var = 23
.endif
.ifc \xr, $xr24
\var = 24
.endif
.ifc \xr, $xr25
\var = 25
.endif
.ifc \xr, $xr26
\var = 26
.endif
.ifc \xr, $xr27
\var = 27
.endif
.ifc \xr, $xr28
\var = 28
.endif
.ifc \xr, $xr29
\var = 29
.endif
.ifc \xr, $xr30
\var = 30
.endif
.ifc \xr, $xr31
\var = 31
.endif
.iflt \var
.error "Unable to parse register name \r"
.endif
.endm
.macro cpu_save_nonscratch thread
stptr.d s0, \thread, THREAD_REG23
stptr.d s1, \thread, THREAD_REG24
......@@ -270,6 +476,424 @@
fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
.macro lsx_save_data thread tmp
parse_r __tmp, \tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
/* vst opcode is 0xb1 */
.word (0xb1 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0)
.word (0xb1 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1)
.word (0xb1 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2)
.word (0xb1 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3)
.word (0xb1 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4)
.word (0xb1 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5)
.word (0xb1 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6)
.word (0xb1 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7)
.word (0xb1 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8)
.word (0xb1 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9)
.word (0xb1 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10)
.word (0xb1 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11)
.word (0xb1 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12)
.word (0xb1 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13)
.word (0xb1 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14)
.word (0xb1 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15)
.word (0xb1 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16)
.word (0xb1 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17)
.word (0xb1 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18)
.word (0xb1 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19)
.word (0xb1 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20)
.word (0xb1 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21)
.word (0xb1 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22)
.word (0xb1 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23)
.word (0xb1 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24)
.word (0xb1 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25)
.word (0xb1 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26)
.word (0xb1 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27)
.word (0xb1 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28)
.word (0xb1 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29)
.word (0xb1 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30)
.word (0xb1 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31)
.endm
.macro lsx_restore_data thread tmp
parse_r __tmp, \tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
/* vld opcode is 0xb0 */
.word (0xb0 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0)
.word (0xb0 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1)
.word (0xb0 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2)
.word (0xb0 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3)
.word (0xb0 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4)
.word (0xb0 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5)
.word (0xb0 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6)
.word (0xb0 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7)
.word (0xb0 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8)
.word (0xb0 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9)
.word (0xb0 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10)
.word (0xb0 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11)
.word (0xb0 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12)
.word (0xb0 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13)
.word (0xb0 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14)
.word (0xb0 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15)
.word (0xb0 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16)
.word (0xb0 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17)
.word (0xb0 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18)
.word (0xb0 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19)
.word (0xb0 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20)
.word (0xb0 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21)
.word (0xb0 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22)
.word (0xb0 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23)
.word (0xb0 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24)
.word (0xb0 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25)
.word (0xb0 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26)
.word (0xb0 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27)
.word (0xb0 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28)
.word (0xb0 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29)
.word (0xb0 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30)
.word (0xb0 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31)
.endm
.macro lsx_save_all thread tmp0 tmp1
fpu_save_cc \thread, \tmp0, \tmp1
fpu_save_csr \thread, \tmp0
lsx_save_data \thread, \tmp0
.endm
.macro lsx_restore_all thread tmp0 tmp1
lsx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
fpu_restore_csr \thread, \tmp0
.endm
.macro lsx_save_upper vd base tmp off
parse_vr __vd, \vd
parse_r __tmp, \tmp
/* vpickve2gr opcode is 0xe5dfe */
.word (0xe5dfe << 11 | 1 << 10 | __vd << 5 | __tmp)
st.d \tmp, \base, (\off+8)
.endm
.macro lsx_save_all_upper thread base tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \base, \thread, \tmp
lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
.endm
.macro lsx_restore_upper vd base tmp off
parse_vr __vd, \vd
parse_r __tmp, \tmp
ld.d \tmp, \base, (\off+8)
/* vinsgr2vr opcode is 0xe5d7e */
.word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd)
.endm
.macro lsx_restore_all_upper thread base tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \base, \thread, \tmp
lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
.endm
.macro lsx_init_upper vd tmp
parse_vr __vd, \vd
parse_r __tmp, \tmp
/* vinsgr2vr opcode is 0xe5d7e */
.word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd)
.endm
.macro lsx_init_all_upper tmp
not \tmp, zero
lsx_init_upper $vr0 \tmp
lsx_init_upper $vr1 \tmp
lsx_init_upper $vr2 \tmp
lsx_init_upper $vr3 \tmp
lsx_init_upper $vr4 \tmp
lsx_init_upper $vr5 \tmp
lsx_init_upper $vr6 \tmp
lsx_init_upper $vr7 \tmp
lsx_init_upper $vr8 \tmp
lsx_init_upper $vr9 \tmp
lsx_init_upper $vr10 \tmp
lsx_init_upper $vr11 \tmp
lsx_init_upper $vr12 \tmp
lsx_init_upper $vr13 \tmp
lsx_init_upper $vr14 \tmp
lsx_init_upper $vr15 \tmp
lsx_init_upper $vr16 \tmp
lsx_init_upper $vr17 \tmp
lsx_init_upper $vr18 \tmp
lsx_init_upper $vr19 \tmp
lsx_init_upper $vr20 \tmp
lsx_init_upper $vr21 \tmp
lsx_init_upper $vr22 \tmp
lsx_init_upper $vr23 \tmp
lsx_init_upper $vr24 \tmp
lsx_init_upper $vr25 \tmp
lsx_init_upper $vr26 \tmp
lsx_init_upper $vr27 \tmp
lsx_init_upper $vr28 \tmp
lsx_init_upper $vr29 \tmp
lsx_init_upper $vr30 \tmp
lsx_init_upper $vr31 \tmp
.endm
.macro lasx_save_data thread tmp
parse_r __tmp, \tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
/* xvst opcode is 0xb3 */
.word (0xb3 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0)
.word (0xb3 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1)
.word (0xb3 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2)
.word (0xb3 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3)
.word (0xb3 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4)
.word (0xb3 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5)
.word (0xb3 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6)
.word (0xb3 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7)
.word (0xb3 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8)
.word (0xb3 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9)
.word (0xb3 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10)
.word (0xb3 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11)
.word (0xb3 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12)
.word (0xb3 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13)
.word (0xb3 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14)
.word (0xb3 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15)
.word (0xb3 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16)
.word (0xb3 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17)
.word (0xb3 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18)
.word (0xb3 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19)
.word (0xb3 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20)
.word (0xb3 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21)
.word (0xb3 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22)
.word (0xb3 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23)
.word (0xb3 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24)
.word (0xb3 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25)
.word (0xb3 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26)
.word (0xb3 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27)
.word (0xb3 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28)
.word (0xb3 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29)
.word (0xb3 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30)
.word (0xb3 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31)
.endm
.macro lasx_restore_data thread tmp
parse_r __tmp, \tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
/* xvld opcode is 0xb2 */
.word (0xb2 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0)
.word (0xb2 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1)
.word (0xb2 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2)
.word (0xb2 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3)
.word (0xb2 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4)
.word (0xb2 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5)
.word (0xb2 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6)
.word (0xb2 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7)
.word (0xb2 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8)
.word (0xb2 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9)
.word (0xb2 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10)
.word (0xb2 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11)
.word (0xb2 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12)
.word (0xb2 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13)
.word (0xb2 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14)
.word (0xb2 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15)
.word (0xb2 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16)
.word (0xb2 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17)
.word (0xb2 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18)
.word (0xb2 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19)
.word (0xb2 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20)
.word (0xb2 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21)
.word (0xb2 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22)
.word (0xb2 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23)
.word (0xb2 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24)
.word (0xb2 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25)
.word (0xb2 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26)
.word (0xb2 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27)
.word (0xb2 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28)
.word (0xb2 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29)
.word (0xb2 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30)
.word (0xb2 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31)
.endm
.macro lasx_save_all thread tmp0 tmp1
fpu_save_cc \thread, \tmp0, \tmp1
fpu_save_csr \thread, \tmp0
lasx_save_data \thread, \tmp0
.endm
.macro lasx_restore_all thread tmp0 tmp1
lasx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
fpu_restore_csr \thread, \tmp0
.endm
.macro lasx_save_upper xd base tmp off
/* Nothing */
.endm
.macro lasx_save_all_upper thread base tmp
/* Nothing */
.endm
.macro lasx_restore_upper xd base tmp off
parse_xr __xd, \xd
parse_xr __xt, \tmp
parse_r __base, \base
/* vld opcode is 0xb0 */
.word (0xb0 << 22 | (\off+16) << 10 | __base << 5 | __xt)
/* xvpermi.q opcode is 0x1dfb */
.word (0x1dfb << 18 | 0x2 << 10 | __xt << 5 | __xd)
.endm
.macro lasx_restore_all_upper thread base tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \base, \thread, \tmp
/* Save $vr31, xvpickve2gr opcode is 0x76efe */
.word (0x76efe << 12 | 0 << 10 | 31 << 5 | 0x11)
.word (0x76efe << 12 | 1 << 10 | 31 << 5 | 0x12)
lasx_restore_upper $xr0, \base, $xr31, (THREAD_FPR0-THREAD_FPR0)
lasx_restore_upper $xr1, \base, $xr31, (THREAD_FPR1-THREAD_FPR0)
lasx_restore_upper $xr2, \base, $xr31, (THREAD_FPR2-THREAD_FPR0)
lasx_restore_upper $xr3, \base, $xr31, (THREAD_FPR3-THREAD_FPR0)
lasx_restore_upper $xr4, \base, $xr31, (THREAD_FPR4-THREAD_FPR0)
lasx_restore_upper $xr5, \base, $xr31, (THREAD_FPR5-THREAD_FPR0)
lasx_restore_upper $xr6, \base, $xr31, (THREAD_FPR6-THREAD_FPR0)
lasx_restore_upper $xr7, \base, $xr31, (THREAD_FPR7-THREAD_FPR0)
lasx_restore_upper $xr8, \base, $xr31, (THREAD_FPR8-THREAD_FPR0)
lasx_restore_upper $xr9, \base, $xr31, (THREAD_FPR9-THREAD_FPR0)
lasx_restore_upper $xr10, \base, $xr31, (THREAD_FPR10-THREAD_FPR0)
lasx_restore_upper $xr11, \base, $xr31, (THREAD_FPR11-THREAD_FPR0)
lasx_restore_upper $xr12, \base, $xr31, (THREAD_FPR12-THREAD_FPR0)
lasx_restore_upper $xr13, \base, $xr31, (THREAD_FPR13-THREAD_FPR0)
lasx_restore_upper $xr14, \base, $xr31, (THREAD_FPR14-THREAD_FPR0)
lasx_restore_upper $xr15, \base, $xr31, (THREAD_FPR15-THREAD_FPR0)
lasx_restore_upper $xr16, \base, $xr31, (THREAD_FPR16-THREAD_FPR0)
lasx_restore_upper $xr17, \base, $xr31, (THREAD_FPR17-THREAD_FPR0)
lasx_restore_upper $xr18, \base, $xr31, (THREAD_FPR18-THREAD_FPR0)
lasx_restore_upper $xr19, \base, $xr31, (THREAD_FPR19-THREAD_FPR0)
lasx_restore_upper $xr20, \base, $xr31, (THREAD_FPR20-THREAD_FPR0)
lasx_restore_upper $xr21, \base, $xr31, (THREAD_FPR21-THREAD_FPR0)
lasx_restore_upper $xr22, \base, $xr31, (THREAD_FPR22-THREAD_FPR0)
lasx_restore_upper $xr23, \base, $xr31, (THREAD_FPR23-THREAD_FPR0)
lasx_restore_upper $xr24, \base, $xr31, (THREAD_FPR24-THREAD_FPR0)
lasx_restore_upper $xr25, \base, $xr31, (THREAD_FPR25-THREAD_FPR0)
lasx_restore_upper $xr26, \base, $xr31, (THREAD_FPR26-THREAD_FPR0)
lasx_restore_upper $xr27, \base, $xr31, (THREAD_FPR27-THREAD_FPR0)
lasx_restore_upper $xr28, \base, $xr31, (THREAD_FPR28-THREAD_FPR0)
lasx_restore_upper $xr29, \base, $xr31, (THREAD_FPR29-THREAD_FPR0)
lasx_restore_upper $xr30, \base, $xr31, (THREAD_FPR30-THREAD_FPR0)
lasx_restore_upper $xr31, \base, $xr31, (THREAD_FPR31-THREAD_FPR0)
/* Restore $vr31, xvinsgr2vr opcode is 0x76ebe */
.word (0x76ebe << 12 | 0 << 10 | 0x11 << 5 | 31)
.word (0x76ebe << 12 | 1 << 10 | 0x12 << 5 | 31)
.endm
.macro lasx_init_upper xd tmp
parse_xr __xd, \xd
parse_r __tmp, \tmp
/* xvinsgr2vr opcode is 0x76ebe */
.word (0x76ebe << 12 | 2 << 10 | __tmp << 5 | __xd)
.word (0x76ebe << 12 | 3 << 10 | __tmp << 5 | __xd)
.endm
.macro lasx_init_all_upper tmp
not \tmp, zero
lasx_init_upper $xr0 \tmp
lasx_init_upper $xr1 \tmp
lasx_init_upper $xr2 \tmp
lasx_init_upper $xr3 \tmp
lasx_init_upper $xr4 \tmp
lasx_init_upper $xr5 \tmp
lasx_init_upper $xr6 \tmp
lasx_init_upper $xr7 \tmp
lasx_init_upper $xr8 \tmp
lasx_init_upper $xr9 \tmp
lasx_init_upper $xr10 \tmp
lasx_init_upper $xr11 \tmp
lasx_init_upper $xr12 \tmp
lasx_init_upper $xr13 \tmp
lasx_init_upper $xr14 \tmp
lasx_init_upper $xr15 \tmp
lasx_init_upper $xr16 \tmp
lasx_init_upper $xr17 \tmp
lasx_init_upper $xr18 \tmp
lasx_init_upper $xr19 \tmp
lasx_init_upper $xr20 \tmp
lasx_init_upper $xr21 \tmp
lasx_init_upper $xr22 \tmp
lasx_init_upper $xr23 \tmp
lasx_init_upper $xr24 \tmp
lasx_init_upper $xr25 \tmp
lasx_init_upper $xr26 \tmp
lasx_init_upper $xr27 \tmp
lasx_init_upper $xr28 \tmp
lasx_init_upper $xr29 \tmp
lasx_init_upper $xr30 \tmp
lasx_init_upper $xr31 \tmp
.endm
.macro not dst src
nor \dst, \src, zero
.endm
......
......@@ -25,6 +25,26 @@ extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
extern void _save_lsx(struct loongarch_fpu *fpu);
extern void _restore_lsx(struct loongarch_fpu *fpu);
extern void _init_lsx_upper(void);
extern void _restore_lsx_upper(struct loongarch_fpu *fpu);
extern void _save_lasx(struct loongarch_fpu *fpu);
extern void _restore_lasx(struct loongarch_fpu *fpu);
extern void _init_lasx_upper(void);
extern void _restore_lasx_upper(struct loongarch_fpu *fpu);
static inline void enable_lsx(void);
static inline void disable_lsx(void);
static inline void save_lsx(struct task_struct *t);
static inline void restore_lsx(struct task_struct *t);
static inline void enable_lasx(void);
static inline void disable_lasx(void);
static inline void save_lasx(struct task_struct *t);
static inline void restore_lasx(struct task_struct *t);
/*
* Mask the FCSR Cause bits according to the Enable bits, observing
* that Unimplemented is always enabled.
......@@ -41,6 +61,29 @@ static inline int is_fp_enabled(void)
1 : 0;
}
static inline int is_lsx_enabled(void)
{
if (!cpu_has_lsx)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ?
1 : 0;
}
static inline int is_lasx_enabled(void)
{
if (!cpu_has_lasx)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ?
1 : 0;
}
static inline int is_simd_enabled(void)
{
return is_lsx_enabled() | is_lasx_enabled();
}
#define enable_fpu() set_csr_euen(CSR_EUEN_FPEN)
#define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN)
......@@ -78,9 +121,22 @@ static inline void own_fpu(int restore)
static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
{
if (is_fpu_owner()) {
if (is_simd_enabled()) {
if (save) {
if (is_lasx_enabled())
save_lasx(tsk);
else
save_lsx(tsk);
}
disable_fpu();
disable_lsx();
disable_lasx();
clear_tsk_thread_flag(tsk, TIF_USEDSIMD);
} else {
if (save)
_save_fp(&tsk->thread.fpu);
disable_fpu();
}
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
}
KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
......@@ -126,4 +182,140 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
return tsk->thread.fpu.fpr;
}
enum {
CTX_LSX = 1,
CTX_LASX = 2,
};
static inline int is_simd_owner(void)
{
return test_thread_flag(TIF_USEDSIMD);
}
#ifdef CONFIG_CPU_HAS_LSX
static inline void enable_lsx(void)
{
if (cpu_has_lsx)
csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lsx(void)
{
if (cpu_has_lsx)
csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}
static inline void save_lsx(struct task_struct *t)
{
if (cpu_has_lsx)
_save_lsx(&t->thread.fpu);
}
static inline void restore_lsx(struct task_struct *t)
{
if (cpu_has_lsx)
_restore_lsx(&t->thread.fpu);
}
static inline void init_lsx_upper(void)
{
/*
* Check cpu_has_lsx only if it's a constant. This will allow the
* compiler to optimise out code for CPUs without LSX without adding
* an extra redundant check for CPUs with LSX.
*/
if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
return;
_init_lsx_upper();
}
static inline void restore_lsx_upper(struct task_struct *t)
{
if (cpu_has_lsx)
_restore_lsx_upper(&t->thread.fpu);
}
#else
static inline void enable_lsx(void) {}
static inline void disable_lsx(void) {}
static inline void save_lsx(struct task_struct *t) {}
static inline void restore_lsx(struct task_struct *t) {}
static inline void init_lsx_upper(void) {}
static inline void restore_lsx_upper(struct task_struct *t) {}
#endif
#ifdef CONFIG_CPU_HAS_LASX
static inline void enable_lasx(void)
{
if (cpu_has_lasx)
csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lasx(void)
{
if (cpu_has_lasx)
csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}
static inline void save_lasx(struct task_struct *t)
{
if (cpu_has_lasx)
_save_lasx(&t->thread.fpu);
}
static inline void restore_lasx(struct task_struct *t)
{
if (cpu_has_lasx)
_restore_lasx(&t->thread.fpu);
}
static inline void init_lasx_upper(void)
{
if (cpu_has_lasx)
_init_lasx_upper();
}
static inline void restore_lasx_upper(struct task_struct *t)
{
if (cpu_has_lasx)
_restore_lasx_upper(&t->thread.fpu);
}
#else
static inline void enable_lasx(void) {}
static inline void disable_lasx(void) {}
static inline void save_lasx(struct task_struct *t) {}
static inline void restore_lasx(struct task_struct *t) {}
static inline void init_lasx_upper(void) {}
static inline void restore_lasx_upper(struct task_struct *t) {}
#endif
static inline int thread_lsx_context_live(void)
{
int ret = 0;
if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
goto out;
ret = test_thread_flag(TIF_LSX_CTX_LIVE) ? CTX_LSX : 0;
out:
return ret;
}
static inline int thread_lasx_context_live(void)
{
int ret = 0;
if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
goto out;
ret = test_thread_flag(TIF_LASX_CTX_LIVE) ? CTX_LASX : 0;
out:
return ret;
}
#endif /* _ASM_FPU_H */
......@@ -111,6 +111,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_FPU;
elf_hwcap |= HWCAP_LOONGARCH_FPU;
}
#ifdef CONFIG_CPU_HAS_LSX
if (config & CPUCFG2_LSX) {
c->options |= LOONGARCH_CPU_LSX;
elf_hwcap |= HWCAP_LOONGARCH_LSX;
}
#endif
#ifdef CONFIG_CPU_HAS_LASX
if (config & CPUCFG2_LASX) {
c->options |= LOONGARCH_CPU_LASX;
elf_hwcap |= HWCAP_LOONGARCH_LASX;
}
#endif
if (config & CPUCFG2_COMPLEX) {
c->options |= LOONGARCH_CPU_COMPLEX;
elf_hwcap |= HWCAP_LOONGARCH_COMPLEX;
......
......@@ -26,6 +26,32 @@
.previous
.endm
.macro EX_V insn, reg, src, offs
parse_v __insn, \insn
parse_v __offs, \offs
parse_r __src, \src
parse_vr __reg, \reg
.ex\@:
.word __insn << 22 | __offs << 10 | __src << 5 | __reg
.section __ex_table,"a"
PTR .ex\@, fault
.previous
.endm
.macro EX_XV insn, reg, src, offs
parse_v __insn, \insn
parse_v __offs, \offs
parse_r __src, \src
parse_xr __reg, \reg
.ex\@:
.word __insn << 22 | __offs << 10 | __src << 5 | __reg
.section __ex_table,"a"
PTR .ex\@, fault
.previous
.endm
.macro sc_save_fp base
EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
......@@ -146,6 +172,146 @@
movgr2fcsr fcsr0, \tmp0
.endm
.macro sc_save_lsx base
EX_V 0xb1 $vr0, \base, (0 * LSX_REG_WIDTH)
EX_V 0xb1 $vr1, \base, (1 * LSX_REG_WIDTH)
EX_V 0xb1 $vr2, \base, (2 * LSX_REG_WIDTH)
EX_V 0xb1 $vr3, \base, (3 * LSX_REG_WIDTH)
EX_V 0xb1 $vr4, \base, (4 * LSX_REG_WIDTH)
EX_V 0xb1 $vr5, \base, (5 * LSX_REG_WIDTH)
EX_V 0xb1 $vr6, \base, (6 * LSX_REG_WIDTH)
EX_V 0xb1 $vr7, \base, (7 * LSX_REG_WIDTH)
EX_V 0xb1 $vr8, \base, (8 * LSX_REG_WIDTH)
EX_V 0xb1 $vr9, \base, (9 * LSX_REG_WIDTH)
EX_V 0xb1 $vr10, \base, (10 * LSX_REG_WIDTH)
EX_V 0xb1 $vr11, \base, (11 * LSX_REG_WIDTH)
EX_V 0xb1 $vr12, \base, (12 * LSX_REG_WIDTH)
EX_V 0xb1 $vr13, \base, (13 * LSX_REG_WIDTH)
EX_V 0xb1 $vr14, \base, (14 * LSX_REG_WIDTH)
EX_V 0xb1 $vr15, \base, (15 * LSX_REG_WIDTH)
EX_V 0xb1 $vr16, \base, (16 * LSX_REG_WIDTH)
EX_V 0xb1 $vr17, \base, (17 * LSX_REG_WIDTH)
EX_V 0xb1 $vr18, \base, (18 * LSX_REG_WIDTH)
EX_V 0xb1 $vr19, \base, (19 * LSX_REG_WIDTH)
EX_V 0xb1 $vr20, \base, (20 * LSX_REG_WIDTH)
EX_V 0xb1 $vr21, \base, (21 * LSX_REG_WIDTH)
EX_V 0xb1 $vr22, \base, (22 * LSX_REG_WIDTH)
EX_V 0xb1 $vr23, \base, (23 * LSX_REG_WIDTH)
EX_V 0xb1 $vr24, \base, (24 * LSX_REG_WIDTH)
EX_V 0xb1 $vr25, \base, (25 * LSX_REG_WIDTH)
EX_V 0xb1 $vr26, \base, (26 * LSX_REG_WIDTH)
EX_V 0xb1 $vr27, \base, (27 * LSX_REG_WIDTH)
EX_V 0xb1 $vr28, \base, (28 * LSX_REG_WIDTH)
EX_V 0xb1 $vr29, \base, (29 * LSX_REG_WIDTH)
EX_V 0xb1 $vr30, \base, (30 * LSX_REG_WIDTH)
EX_V 0xb1 $vr31, \base, (31 * LSX_REG_WIDTH)
.endm
.macro sc_restore_lsx base
EX_V 0xb0 $vr0, \base, (0 * LSX_REG_WIDTH)
EX_V 0xb0 $vr1, \base, (1 * LSX_REG_WIDTH)
EX_V 0xb0 $vr2, \base, (2 * LSX_REG_WIDTH)
EX_V 0xb0 $vr3, \base, (3 * LSX_REG_WIDTH)
EX_V 0xb0 $vr4, \base, (4 * LSX_REG_WIDTH)
EX_V 0xb0 $vr5, \base, (5 * LSX_REG_WIDTH)
EX_V 0xb0 $vr6, \base, (6 * LSX_REG_WIDTH)
EX_V 0xb0 $vr7, \base, (7 * LSX_REG_WIDTH)
EX_V 0xb0 $vr8, \base, (8 * LSX_REG_WIDTH)
EX_V 0xb0 $vr9, \base, (9 * LSX_REG_WIDTH)
EX_V 0xb0 $vr10, \base, (10 * LSX_REG_WIDTH)
EX_V 0xb0 $vr11, \base, (11 * LSX_REG_WIDTH)
EX_V 0xb0 $vr12, \base, (12 * LSX_REG_WIDTH)
EX_V 0xb0 $vr13, \base, (13 * LSX_REG_WIDTH)
EX_V 0xb0 $vr14, \base, (14 * LSX_REG_WIDTH)
EX_V 0xb0 $vr15, \base, (15 * LSX_REG_WIDTH)
EX_V 0xb0 $vr16, \base, (16 * LSX_REG_WIDTH)
EX_V 0xb0 $vr17, \base, (17 * LSX_REG_WIDTH)
EX_V 0xb0 $vr18, \base, (18 * LSX_REG_WIDTH)
EX_V 0xb0 $vr19, \base, (19 * LSX_REG_WIDTH)
EX_V 0xb0 $vr20, \base, (20 * LSX_REG_WIDTH)
EX_V 0xb0 $vr21, \base, (21 * LSX_REG_WIDTH)
EX_V 0xb0 $vr22, \base, (22 * LSX_REG_WIDTH)
EX_V 0xb0 $vr23, \base, (23 * LSX_REG_WIDTH)
EX_V 0xb0 $vr24, \base, (24 * LSX_REG_WIDTH)
EX_V 0xb0 $vr25, \base, (25 * LSX_REG_WIDTH)
EX_V 0xb0 $vr26, \base, (26 * LSX_REG_WIDTH)
EX_V 0xb0 $vr27, \base, (27 * LSX_REG_WIDTH)
EX_V 0xb0 $vr28, \base, (28 * LSX_REG_WIDTH)
EX_V 0xb0 $vr29, \base, (29 * LSX_REG_WIDTH)
EX_V 0xb0 $vr30, \base, (30 * LSX_REG_WIDTH)
EX_V 0xb0 $vr31, \base, (31 * LSX_REG_WIDTH)
.endm
.macro sc_save_lasx base
EX_XV 0xb3 $xr0, \base, (0 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr1, \base, (1 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr2, \base, (2 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr3, \base, (3 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr4, \base, (4 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr5, \base, (5 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr6, \base, (6 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr7, \base, (7 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr8, \base, (8 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr9, \base, (9 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr10, \base, (10 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr11, \base, (11 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr12, \base, (12 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr13, \base, (13 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr14, \base, (14 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr15, \base, (15 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr16, \base, (16 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr17, \base, (17 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr18, \base, (18 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr19, \base, (19 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr20, \base, (20 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr21, \base, (21 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr22, \base, (22 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr23, \base, (23 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr24, \base, (24 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr25, \base, (25 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr26, \base, (26 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr27, \base, (27 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr28, \base, (28 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr29, \base, (29 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr30, \base, (30 * LASX_REG_WIDTH)
EX_XV 0xb3 $xr31, \base, (31 * LASX_REG_WIDTH)
.endm
.macro sc_restore_lasx base
EX_XV 0xb2 $xr0, \base, (0 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr1, \base, (1 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr2, \base, (2 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr3, \base, (3 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr4, \base, (4 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr5, \base, (5 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr6, \base, (6 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr7, \base, (7 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr8, \base, (8 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr9, \base, (9 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr10, \base, (10 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr11, \base, (11 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr12, \base, (12 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr13, \base, (13 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr14, \base, (14 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr15, \base, (15 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr16, \base, (16 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr17, \base, (17 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr18, \base, (18 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr19, \base, (19 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr20, \base, (20 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr21, \base, (21 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr22, \base, (22 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr23, \base, (23 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr24, \base, (24 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr25, \base, (25 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr26, \base, (26 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr27, \base, (27 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr28, \base, (28 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr29, \base, (29 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr30, \base, (30 * LASX_REG_WIDTH)
EX_XV 0xb2 $xr31, \base, (31 * LASX_REG_WIDTH)
.endm
/*
* Save a thread's fp context.
*/
......@@ -167,6 +333,76 @@ SYM_FUNC_START(_restore_fp)
jr ra
SYM_FUNC_END(_restore_fp)
#ifdef CONFIG_CPU_HAS_LSX
/*
* Save a thread's LSX vector context.
*/
SYM_FUNC_START(_save_lsx)
lsx_save_all a0 t1 t2
jirl zero, ra, 0
SYM_FUNC_END(_save_lsx)
EXPORT_SYMBOL(_save_lsx)
/*
* Restore a thread's LSX vector context.
*/
SYM_FUNC_START(_restore_lsx)
lsx_restore_all a0 t1 t2
jirl zero, ra, 0
SYM_FUNC_END(_restore_lsx)
SYM_FUNC_START(_save_lsx_upper)
lsx_save_all_upper a0 t0 t1
jirl zero, ra, 0
SYM_FUNC_END(_save_lsx_upper)
SYM_FUNC_START(_restore_lsx_upper)
lsx_restore_all_upper a0 t0 t1
jirl zero, ra, 0
SYM_FUNC_END(_restore_lsx_upper)
SYM_FUNC_START(_init_lsx_upper)
lsx_init_all_upper t1
jirl zero, ra, 0
SYM_FUNC_END(_init_lsx_upper)
#endif
#ifdef CONFIG_CPU_HAS_LASX
/*
* Save a thread's LASX vector context.
*/
SYM_FUNC_START(_save_lasx)
lasx_save_all a0 t1 t2
jirl zero, ra, 0
SYM_FUNC_END(_save_lasx)
EXPORT_SYMBOL(_save_lasx)
/*
* Restore a thread's LASX vector context.
*/
SYM_FUNC_START(_restore_lasx)
lasx_restore_all a0 t1 t2
jirl zero, ra, 0
SYM_FUNC_END(_restore_lasx)
SYM_FUNC_START(_save_lasx_upper)
lasx_save_all_upper a0 t0 t1
jirl zero, ra, 0
SYM_FUNC_END(_save_lasx_upper)
SYM_FUNC_START(_restore_lasx_upper)
lasx_restore_all_upper a0 t0 t1
jirl zero, ra, 0
SYM_FUNC_END(_restore_lasx_upper)
SYM_FUNC_START(_init_lasx_upper)
lasx_init_all_upper t1
jirl zero, ra, 0
SYM_FUNC_END(_init_lasx_upper)
#endif
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
......@@ -245,6 +481,58 @@ SYM_FUNC_START(_restore_fp_context)
jr ra
SYM_FUNC_END(_restore_fp_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_save_lsx_context)
sc_save_fcc a1, t0, t1
sc_save_fcsr a2, t0
sc_save_lsx a0
li.w a0, 0 # success
jirl zero, ra, 0
SYM_FUNC_END(_save_lsx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_restore_lsx_context)
sc_restore_lsx a0
sc_restore_fcc a1, t1, t2
sc_restore_fcsr a2, t1
li.w a0, 0 # success
jirl zero, ra, 0
SYM_FUNC_END(_restore_lsx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_save_lasx_context)
sc_save_fcc a1, t0, t1
sc_save_fcsr a2, t0
sc_save_lasx a0
li.w a0, 0 # success
jirl zero, ra, 0
SYM_FUNC_END(_save_lasx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_restore_lasx_context)
sc_restore_lasx a0
sc_restore_fcc a1, t1, t2
sc_restore_fcsr a2, t1
li.w a0, 0 # success
jirl zero, ra, 0
SYM_FUNC_END(_restore_lasx_context)
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
jr ra
......
......@@ -105,8 +105,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*/
preempt_disable();
if (is_fpu_owner())
if (is_fpu_owner()) {
if (is_lasx_enabled())
save_lasx(current);
else if (is_lsx_enabled())
save_lsx(current);
else
save_fp(current);
}
preempt_enable();
......
......@@ -246,6 +246,90 @@ static int cfg_set(struct task_struct *target,
return 0;
}
#ifdef CONFIG_CPU_HAS_LSX
static void copy_pad_fprs(struct task_struct *target,
const struct user_regset *regset,
struct membuf *to, unsigned int live_sz)
{
int i, j;
unsigned long long fill = ~0ull;
unsigned int cp_sz, pad_sz;
cp_sz = min(regset->size, live_sz);
pad_sz = regset->size - cp_sz;
WARN_ON(pad_sz % sizeof(fill));
for (i = 0; i < NUM_FPU_REGS; i++) {
membuf_write(to, &target->thread.fpu.fpr[i], cp_sz);
for (j = 0; j < (pad_sz / sizeof(fill)); j++) {
membuf_store(to, fill);
}
}
}
static int simd_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
if (!tsk_used_math(target)) {
/* The task hasn't used FP or LSX, fill with 0xff */
copy_pad_fprs(target, regset, &to, 0);
} else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) {
/* Copy scalar FP context, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, 8);
#ifdef CONFIG_CPU_HAS_LASX
} else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) {
/* Copy LSX 128 Bit context, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, 16);
#endif
} else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
/* Trivially copy the vector registers */
membuf_write(&to, &target->thread.fpu.fpr, wr_size);
} else {
/* Copy as much context as possible, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0]));
}
return 0;
}
static int simd_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
unsigned int cp_sz;
int i, err, start;
init_fp_ctx(target);
if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
/* Trivially copy the vector registers */
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fpr,
0, wr_size);
} else {
/* Copy as much context as possible */
cp_sz = min_t(unsigned int, regset->size,
sizeof(target->thread.fpu.fpr[0]));
i = start = err = 0;
for (; i < NUM_FPU_REGS; i++, start += regset->size) {
err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fpr[i],
start, start + cp_sz);
}
}
return err;
}
#endif /* CONFIG_CPU_HAS_LSX */
struct pt_regs_offset {
const char *name;
int offset;
......@@ -319,6 +403,12 @@ enum loongarch_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_CPUCFG,
#ifdef CONFIG_CPU_HAS_LSX
REGSET_LSX,
#endif
#ifdef CONFIG_CPU_HAS_LASX
REGSET_LASX,
#endif
};
static const struct user_regset loongarch64_regsets[] = {
......@@ -346,6 +436,26 @@ static const struct user_regset loongarch64_regsets[] = {
.regset_get = cfg_get,
.set = cfg_set,
},
#ifdef CONFIG_CPU_HAS_LSX
[REGSET_LSX] = {
.core_note_type = NT_LOONGARCH_LSX,
.n = NUM_FPU_REGS,
.size = 16,
.align = 16,
.regset_get = simd_get,
.set = simd_set,
},
#endif
#ifdef CONFIG_CPU_HAS_LASX
[REGSET_LASX] = {
.core_note_type = NT_LOONGARCH_LASX,
.n = NUM_FPU_REGS,
.size = 32,
.align = 32,
.regset_get = simd_get,
.set = simd_set,
},
#endif
};
static const struct user_regset_view user_loongarch64_view = {
......
......@@ -50,6 +50,16 @@ extern asmlinkage int
_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr);
extern asmlinkage int
_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr);
extern asmlinkage int
_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int _save_lsx_all_upper(void __user *buf);
extern asmlinkage int _restore_lsx_all_upper(void __user *buf);
struct rt_sigframe {
struct siginfo rs_info;
......@@ -65,9 +75,29 @@ struct extctx_layout {
unsigned long size;
unsigned int flags;
struct _ctx_layout fpu;
struct _ctx_layout lsx;
struct _ctx_layout lasx;
struct _ctx_layout end;
};
/* LSX context */
#define LSX_CTX_MAGIC 0x53580001
#define LSX_CTX_ALIGN 16
struct lsx_context {
__u64 regs[2*32];
__u64 fcc;
__u32 fcsr;
};
/* LASX context */
#define LASX_CTX_MAGIC 0x41535801
#define LASX_CTX_ALIGN 32
struct lasx_context {
__u64 regs[4*32];
__u64 fcc;
__u32 fcsr;
};
static void __user *get_ctx_through_ctxinfo(struct sctx_info *info)
{
return (void __user *)((char *)info + sizeof(struct sctx_info));
......@@ -115,6 +145,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx)
return err;
}
static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx)
{
int i;
int err = 0;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 0),
&regs[2*i]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
&regs[2*i+1]);
}
err |= __put_user(current->thread.fpu.fcc, fcc);
err |= __put_user(current->thread.fpu.fcsr, fcsr);
return err;
}
static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx)
{
int i;
int err = 0;
u64 fpr_val;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __get_user(fpr_val, &regs[2*i]);
set_fpr64(&current->thread.fpu.fpr[i], 0, fpr_val);
err |= __get_user(fpr_val, &regs[2*i+1]);
set_fpr64(&current->thread.fpu.fpr[i], 1, fpr_val);
}
err |= __get_user(current->thread.fpu.fcc, fcc);
err |= __get_user(current->thread.fpu.fcsr, fcsr);
return err;
}
static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx)
{
int i;
int err = 0;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 0),
&regs[4*i]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
&regs[4*i+1]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 2),
&regs[4*i+2]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 3),
&regs[4*i+3]);
}
err |= __put_user(current->thread.fpu.fcc, fcc);
err |= __put_user(current->thread.fpu.fcsr, fcsr);
return err;
}
static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
{
int i;
int err = 0;
u64 fpr_val;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __get_user(fpr_val, &regs[4*i]);
set_fpr64(&current->thread.fpu.fpr[i], 0, fpr_val);
err |= __get_user(fpr_val, &regs[4*i+1]);
set_fpr64(&current->thread.fpu.fpr[i], 1, fpr_val);
err |= __get_user(fpr_val, &regs[4*i+2]);
set_fpr64(&current->thread.fpu.fpr[i], 2, fpr_val);
err |= __get_user(fpr_val, &regs[4*i+3]);
set_fpr64(&current->thread.fpu.fpr[i], 3, fpr_val);
}
err |= __get_user(current->thread.fpu.fcc, fcc);
err |= __get_user(current->thread.fpu.fcsr, fcsr);
return err;
}
/*
* Wrappers for the assembly _{save,restore}_fp_context functions.
*/
......@@ -136,6 +256,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx)
return _restore_fp_context(regs, fcc, fcsr);
}
static int save_hw_lsx_context(struct lsx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _save_lsx_context(regs, fcc, fcsr);
}
static int restore_hw_lsx_context(struct lsx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _restore_lsx_context(regs, fcc, fcsr);
}
static int save_hw_lasx_context(struct lasx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _save_lasx_context(regs, fcc, fcsr);
}
static int restore_hw_lasx_context(struct lasx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _restore_lasx_context(regs, fcc, fcsr);
}
static int fcsr_pending(unsigned int __user *fcsr)
{
int err, sig = 0;
......@@ -227,6 +383,146 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx)
return err ?: sig;
}
static int protected_save_lsx_context(struct extctx_layout *extctx)
{
int err = 0;
struct sctx_info __user *info = extctx->lsx.addr;
struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs;
uint64_t __user *fcc = &lsx_ctx->fcc;
uint32_t __user *fcsr = &lsx_ctx->fcsr;
while (1) {
lock_fpu_owner();
if (is_lsx_enabled())
err = save_hw_lsx_context(lsx_ctx);
else
err = copy_lsx_to_sigcontext(lsx_ctx);
unlock_fpu_owner();
err |= __put_user(LSX_CTX_MAGIC, &info->magic);
err |= __put_user(extctx->lsx.size, &info->size);
if (likely(!err))
break;
/* Touch the LSX context and try again */
err = __put_user(0, &regs[0]) |
__put_user(0, &regs[32*2-1]) |
__put_user(0, fcc) |
__put_user(0, fcsr);
if (err)
return err; /* really bad sigcontext */
}
return err;
}
static int protected_restore_lsx_context(struct extctx_layout *extctx)
{
int err = 0, sig = 0, tmp __maybe_unused;
struct sctx_info __user *info = extctx->lsx.addr;
struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs;
uint64_t __user *fcc = &lsx_ctx->fcc;
uint32_t __user *fcsr = &lsx_ctx->fcsr;
err = sig = fcsr_pending(fcsr);
if (err < 0)
return err;
while (1) {
lock_fpu_owner();
if (is_lsx_enabled())
err = restore_hw_lsx_context(lsx_ctx);
else
err = copy_lsx_from_sigcontext(lsx_ctx);
unlock_fpu_owner();
if (likely(!err))
break;
/* Touch the LSX context and try again */
err = __get_user(tmp, &regs[0]) |
__get_user(tmp, &regs[32*2-1]) |
__get_user(tmp, fcc) |
__get_user(tmp, fcsr);
if (err)
break; /* really bad sigcontext */
}
return err ?: sig;
}
static int protected_save_lasx_context(struct extctx_layout *extctx)
{
int err = 0;
struct sctx_info __user *info = extctx->lasx.addr;
struct lasx_context __user *lasx_ctx =
(struct lasx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs;
uint64_t __user *fcc = &lasx_ctx->fcc;
uint32_t __user *fcsr = &lasx_ctx->fcsr;
while (1) {
lock_fpu_owner();
if (is_lasx_enabled())
err = save_hw_lasx_context(lasx_ctx);
else
err = copy_lasx_to_sigcontext(lasx_ctx);
unlock_fpu_owner();
err |= __put_user(LASX_CTX_MAGIC, &info->magic);
err |= __put_user(extctx->lasx.size, &info->size);
if (likely(!err))
break;
/* Touch the LASX context and try again */
err = __put_user(0, &regs[0]) |
__put_user(0, &regs[32*4-1]) |
__put_user(0, fcc) |
__put_user(0, fcsr);
if (err)
return err; /* really bad sigcontext */
}
return err;
}
static int protected_restore_lasx_context(struct extctx_layout *extctx)
{
int err = 0, sig = 0, tmp __maybe_unused;
struct sctx_info __user *info = extctx->lasx.addr;
struct lasx_context __user *lasx_ctx =
(struct lasx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs;
uint64_t __user *fcc = &lasx_ctx->fcc;
uint32_t __user *fcsr = &lasx_ctx->fcsr;
err = sig = fcsr_pending(fcsr);
if (err < 0)
return err;
while (1) {
lock_fpu_owner();
if (is_lasx_enabled())
err = restore_hw_lasx_context(lasx_ctx);
else
err = copy_lasx_from_sigcontext(lasx_ctx);
unlock_fpu_owner();
if (likely(!err))
break;
/* Touch the LASX context and try again */
err = __get_user(tmp, &regs[0]) |
__get_user(tmp, &regs[32*4-1]) |
__get_user(tmp, fcc) |
__get_user(tmp, fcsr);
if (err)
break; /* really bad sigcontext */
}
return err ?: sig;
}
static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
struct extctx_layout *extctx)
{
......@@ -240,7 +536,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
for (i = 1; i < 32; i++)
err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
if (extctx->fpu.addr)
if (extctx->lasx.addr)
err |= protected_save_lasx_context(extctx);
else if (extctx->lsx.addr)
err |= protected_save_lsx_context(extctx);
else if (extctx->fpu.addr)
err |= protected_save_fpu_context(extctx);
/* Set the "end" magic */
......@@ -274,6 +574,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
extctx->fpu.addr = info;
break;
case LSX_CTX_MAGIC:
if (size < (sizeof(struct sctx_info) +
sizeof(struct lsx_context)))
goto invalid;
extctx->lsx.addr = info;
break;
case LASX_CTX_MAGIC:
if (size < (sizeof(struct sctx_info) +
sizeof(struct lasx_context)))
goto invalid;
extctx->lasx.addr = info;
break;
default:
goto invalid;
}
......@@ -319,7 +633,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
for (i = 1; i < 32; i++)
err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
if (extctx.fpu.addr)
if (extctx.lasx.addr)
err |= protected_restore_lasx_context(&extctx);
else if (extctx.lsx.addr)
err |= protected_restore_lsx_context(&extctx);
else if (extctx.fpu.addr)
err |= protected_restore_fpu_context(&extctx);
bad:
......@@ -375,7 +693,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
extctx->size += extctx->end.size;
if (extctx->flags & SC_USED_FP) {
if (cpu_has_fpu)
if (cpu_has_lasx && thread_lasx_context_live())
new_sp = extframe_alloc(extctx, &extctx->lasx,
sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp);
else if (cpu_has_lsx && thread_lsx_context_live())
new_sp = extframe_alloc(extctx, &extctx->lsx,
sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp);
else if (cpu_has_fpu)
new_sp = extframe_alloc(extctx, &extctx->fpu,
sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
}
......
......@@ -514,12 +514,67 @@ static void init_restore_fp(void)
BUG_ON(!is_fp_enabled());
}
static void init_restore_lsx(void)
{
enable_lsx();
if (!thread_lsx_context_live()) {
/* First time LSX context user */
init_restore_fp();
init_lsx_upper();
set_thread_flag(TIF_LSX_CTX_LIVE);
} else {
if (!is_simd_owner()) {
if (is_fpu_owner()) {
restore_lsx_upper(current);
} else {
__own_fpu();
restore_lsx(current);
}
}
}
set_thread_flag(TIF_USEDSIMD);
BUG_ON(!is_fp_enabled());
BUG_ON(!is_lsx_enabled());
}
static void init_restore_lasx(void)
{
enable_lasx();
if (!thread_lasx_context_live()) {
/* First time LASX context user */
init_restore_lsx();
init_lasx_upper();
set_thread_flag(TIF_LASX_CTX_LIVE);
} else {
if (is_fpu_owner() || is_simd_owner()) {
init_restore_lsx();
restore_lasx_upper(current);
} else {
__own_fpu();
enable_lsx();
restore_lasx(current);
}
}
set_thread_flag(TIF_USEDSIMD);
BUG_ON(!is_fp_enabled());
BUG_ON(!is_lsx_enabled());
BUG_ON(!is_lasx_enabled());
}
asmlinkage void noinstr do_fpu(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
die_if_kernel("do_fpu invoked from kernel context!", regs);
BUG_ON(is_lsx_enabled());
BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_fp();
......@@ -534,7 +589,19 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
if (!cpu_has_lsx) {
force_sig(SIGILL);
goto out;
}
die_if_kernel("do_lsx invoked from kernel context!", regs);
BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_lsx();
preempt_enable();
out:
local_irq_disable();
irqentry_exit(regs, state);
......@@ -545,7 +612,18 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
if (!cpu_has_lasx) {
force_sig(SIGILL);
goto out;
}
die_if_kernel("do_lasx invoked from kernel context!", regs);
preempt_disable();
init_restore_lasx();
preempt_enable();
out:
local_irq_disable();
irqentry_exit(regs, state);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册