summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/dev-tools/kasan.rst4
-rw-r--r--Documentation/features/debug/KASAN/arch-support.txt2
-rw-r--r--Documentation/features/debug/kcov/arch-support.txt2
-rw-r--r--Documentation/features/debug/kgdb/arch-support.txt2
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kasan.rst2
-rw-r--r--arch/loongarch/Kconfig26
-rw-r--r--arch/loongarch/Makefile3
-rw-r--r--arch/loongarch/configs/loongson3_defconfig74
-rw-r--r--arch/loongarch/include/asm/asm-prototypes.h1
-rw-r--r--arch/loongarch/include/asm/asmmacro.h158
-rw-r--r--arch/loongarch/include/asm/kasan.h126
-rw-r--r--arch/loongarch/include/asm/kfence.h61
-rw-r--r--arch/loongarch/include/asm/kgdb.h97
-rw-r--r--arch/loongarch/include/asm/lbt.h109
-rw-r--r--arch/loongarch/include/asm/loongarch.h47
-rw-r--r--arch/loongarch/include/asm/mmzone.h2
-rw-r--r--arch/loongarch/include/asm/page.h7
-rw-r--r--arch/loongarch/include/asm/pgalloc.h1
-rw-r--r--arch/loongarch/include/asm/pgtable.h31
-rw-r--r--arch/loongarch/include/asm/processor.h26
-rw-r--r--arch/loongarch/include/asm/setup.h8
-rw-r--r--arch/loongarch/include/asm/stackframe.h4
-rw-r--r--arch/loongarch/include/asm/string.h20
-rw-r--r--arch/loongarch/include/asm/switch_to.h2
-rw-r--r--arch/loongarch/include/asm/thread_info.h4
-rw-r--r--arch/loongarch/include/asm/xor.h68
-rw-r--r--arch/loongarch/include/asm/xor_simd.h34
-rw-r--r--arch/loongarch/include/uapi/asm/ptrace.h6
-rw-r--r--arch/loongarch/include/uapi/asm/sigcontext.h10
-rw-r--r--arch/loongarch/kernel/Makefile9
-rw-r--r--arch/loongarch/kernel/asm-offsets.c18
-rw-r--r--arch/loongarch/kernel/cpu-probe.c14
-rw-r--r--arch/loongarch/kernel/entry.S5
-rw-r--r--arch/loongarch/kernel/fpu.S14
-rw-r--r--arch/loongarch/kernel/head.S13
-rw-r--r--arch/loongarch/kernel/kfpu.c55
-rw-r--r--arch/loongarch/kernel/kgdb.c727
-rw-r--r--arch/loongarch/kernel/lbt.S155
-rw-r--r--arch/loongarch/kernel/numa.c35
-rw-r--r--arch/loongarch/kernel/process.c15
-rw-r--r--arch/loongarch/kernel/ptrace.c54
-rw-r--r--arch/loongarch/kernel/relocate.c8
-rw-r--r--arch/loongarch/kernel/setup.c4
-rw-r--r--arch/loongarch/kernel/signal.c188
-rw-r--r--arch/loongarch/kernel/stacktrace.c18
-rw-r--r--arch/loongarch/kernel/traps.c50
-rw-r--r--arch/loongarch/lib/Makefile2
-rw-r--r--arch/loongarch/lib/clear_user.S87
-rw-r--r--arch/loongarch/lib/copy_user.S161
-rw-r--r--arch/loongarch/lib/memcpy.S8
-rw-r--r--arch/loongarch/lib/memmove.S20
-rw-r--r--arch/loongarch/lib/memset.S8
-rw-r--r--arch/loongarch/lib/xor_simd.c93
-rw-r--r--arch/loongarch/lib/xor_simd.h38
-rw-r--r--arch/loongarch/lib/xor_simd_glue.c72
-rw-r--r--arch/loongarch/lib/xor_template.c110
-rw-r--r--arch/loongarch/mm/Makefile3
-rw-r--r--arch/loongarch/mm/cache.c1
-rw-r--r--arch/loongarch/mm/fault.c22
-rw-r--r--arch/loongarch/mm/init.c71
-rw-r--r--arch/loongarch/mm/kasan_init.c243
-rw-r--r--arch/loongarch/mm/mmap.c13
-rw-r--r--arch/loongarch/mm/pgtable.c12
-rw-r--r--arch/loongarch/vdso/Makefile3
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/raid/pq.h4
-rw-r--r--lib/raid6/Makefile1
-rw-r--r--lib/raid6/algos.c16
-rw-r--r--lib/raid6/loongarch.h38
-rw-r--r--lib/raid6/loongarch_simd.c422
-rw-r--r--lib/raid6/recov_loongarch_simd.c513
-rw-r--r--lib/raid6/test/Makefile12
-rw-r--r--mm/kasan/init.c18
-rw-r--r--mm/kasan/kasan.h6
-rw-r--r--mm/kfence/core.c5
75 files changed, 3862 insertions, 461 deletions
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index f4acf9c2e90f..382818a7197a 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -41,8 +41,8 @@ Support
Architectures
~~~~~~~~~~~~~
-Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
-xtensa, and the tag-based KASAN modes are supported only on arm64.
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
Compilers
~~~~~~~~~
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index bf0124fae643..c4581c2edb28 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -13,7 +13,7 @@
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
- | loongarch: | TODO |
+ | loongarch: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt
index ffcc9f2b1d74..de84cefbcdd3 100644
--- a/Documentation/features/debug/kcov/arch-support.txt
+++ b/Documentation/features/debug/kcov/arch-support.txt
@@ -13,7 +13,7 @@
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
- | loongarch: | TODO |
+ | loongarch: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt
index 958498f9f2a4..5e91ec78c80b 100644
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@@ -13,7 +13,7 @@
| csky: | TODO |
| hexagon: | ok |
| ia64: | TODO |
- | loongarch: | TODO |
+ | loongarch: | ok |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
diff --git a/Documentation/translations/zh_CN/dev-tools/kasan.rst b/Documentation/translations/zh_CN/dev-tools/kasan.rst
index 05ef904dbcfb..8fdb20c9665b 100644
--- a/Documentation/translations/zh_CN/dev-tools/kasan.rst
+++ b/Documentation/translations/zh_CN/dev-tools/kasan.rst
@@ -42,7 +42,7 @@ KASAN有三种模式:
体系架构
~~~~~~~~
-在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
+在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
而基于标签的KASAN模式只在arm64上支持。
编译器
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ecf282dee513..e14396a2ddcb 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -8,11 +8,13 @@ config LOONGARCH
select ACPI_PPTT if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_BINFMT_ELF_STATE
+ select ARCH_DISABLE_KASAN_INLINE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_KCOV
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,9 @@ config LOONGARCH
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
+ select HAVE_ARCH_KASAN
+ select HAVE_ARCH_KFENCE
+ select HAVE_ARCH_KGDB if PERF_EVENTS
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
@@ -115,6 +120,7 @@ config LOONGARCH
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IOREMAP_PROT
@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
config AS_HAS_LASX_EXTENSION
def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
+config AS_HAS_LBT_EXTENSION
+ def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
menu "Kernel type and options"
source "kernel/Kconfig.hz"
@@ -534,6 +543,18 @@ config CPU_HAS_LASX
If unsure, say Y.
+config CPU_HAS_LBT
+ bool "Support for the Loongson Binary Translation Extension"
+ depends on AS_HAS_LBT_EXTENSION
+ help
+ Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+ to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+ Enabling this option allows the kernel to allocate and switch registers
+ specific to LBT.
+
+ If you want to use this feature, such as the Loongson Architecture
+ Translator (LAT), say Y.
+
config CPU_HAS_PREFETCH
bool
default y
@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
config ARCH_SUPPORTS_UPROBES
def_bool y
+config KASAN_SHADOW_OFFSET
+ hex
+ default 0x0
+ depends on KASAN
+
menu "Power management options"
config ARCH_SUSPEND_POSSIBLE
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index ef87bab46754..fb0fada43197 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -84,7 +84,10 @@ LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext
endif
cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
load-y = 0x9000000000200000
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index d64849b4cba1..a3b52aaa83b3 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
@@ -47,8 +46,12 @@ CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
CONFIG_NR_CPUS=64
CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
CONFIG_SUSPEND=y
CONFIG_HIBERNATION=y
CONFIG_ACPI=y
@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
CONFIG_EFI_CAPSULE_LOADER=m
CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=m
CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
CONFIG_INET_ESP=m
CONFIG_INET_UDP_DIAG=y
CONFIG_TCP_CONG_ADVANCED=y
@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
CONFIG_IP_VS_IPV6=y
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
CONFIG_IP_VS_NFCT=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_DUP_IPV4=m
@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_TARGET_NETMAP=m
CONFIG_IP_NF_TARGET_REDIRECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_RAM=m
CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
CONFIG_PARPORT_SERIAL=y
@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
CONFIG_ZRAM=m
CONFIG_ZRAM_DEF_COMP_ZSTD=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_PIIX4=y
CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_RESTART=y
CONFIG_POWER_RESET_SYSCON=y
@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
CONFIG_SENSORS_LM93=m
CONFIG_SENSORS_W83795=m
CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
CONFIG_RC_CORE=m
CONFIG_LIRC=y
CONFIG_RC_DECODERS=y
@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
CONFIG_DRM_AST=y
CONFIG_DRM_QXL=m
CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
CONFIG_FB=y
CONFIG_FB_EFI=y
CONFIG_FB_RADEON=y
@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
CONFIG_INFINIBAND=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
CONFIG_DMADEVICES=y
CONFIG_UIO=m
CONFIG_UIO_PDRV_GENIRQ=m
@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
CONFIG_COMEDI_NI_PCIDIO=m
CONFIG_COMEDI_NI_PCIMIO=m
CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
CONFIG_PM_DEVFREQ=y
CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=y
CONFIG_OVERLAY_FS_INDEX=y
CONFIG_OVERLAY_FS_XINO_AUTO=y
CONFIG_OVERLAY_FS_METACOPY=y
CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=936
CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
CONFIG_HFS_FS=m
CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
@@ -807,6 +867,10 @@ CONFIG_NFSD=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
CONFIG_CIFS=m
# CONFIG_CIFS_DEBUG is not set
CONFIG_9P_FS=y
@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_936=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
CONFIG_KEY_DH_OPERATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_SELINUX=y
@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_PRINTK_TIME=y
CONFIG_STRIP_ASM_SYMS=y
diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index ed06d3997420..cf8e1a4e7c19 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/uaccess.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/ftrace.h>
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index 79e1d53fea89..c9544f358c33 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -10,113 +10,6 @@
#include <asm/fpregdef.h>
#include <asm/loongarch.h>
- .macro parse_v var val
- \var = \val
- .endm
-
- .macro parse_r var r
- \var = -1
- .ifc \r, $r0
- \var = 0
- .endif
- .ifc \r, $r1
- \var = 1
- .endif
- .ifc \r, $r2
- \var = 2
- .endif
- .ifc \r, $r3
- \var = 3
- .endif
- .ifc \r, $r4
- \var = 4
- .endif
- .ifc \r, $r5
- \var = 5
- .endif
- .ifc \r, $r6
- \var = 6
- .endif
- .ifc \r, $r7
- \var = 7
- .endif
- .ifc \r, $r8
- \var = 8
- .endif
- .ifc \r, $r9
- \var = 9
- .endif
- .ifc \r, $r10
- \var = 10
- .endif
- .ifc \r, $r11
- \var = 11
- .endif
- .ifc \r, $r12
- \var = 12
- .endif
- .ifc \r, $r13
- \var = 13
- .endif
- .ifc \r, $r14
- \var = 14
- .endif
- .ifc \r, $r15
- \var = 15
- .endif
- .ifc \r, $r16
- \var = 16
- .endif
- .ifc \r, $r17
- \var = 17
- .endif
- .ifc \r, $r18
- \var = 18
- .endif
- .ifc \r, $r19
- \var = 19
- .endif
- .ifc \r, $r20
- \var = 20
- .endif
- .ifc \r, $r21
- \var = 21
- .endif
- .ifc \r, $r22
- \var = 22
- .endif
- .ifc \r, $r23
- \var = 23
- .endif
- .ifc \r, $r24
- \var = 24
- .endif
- .ifc \r, $r25
- \var = 25
- .endif
- .ifc \r, $r26
- \var = 26
- .endif
- .ifc \r, $r27
- \var = 27
- .endif
- .ifc \r, $r28
- \var = 28
- .endif
- .ifc \r, $r29
- \var = 29
- .endif
- .ifc \r, $r30
- \var = 30
- .endif
- .ifc \r, $r31
- \var = 31
- .endif
- .iflt \var
- .error "Unable to parse register name \r"
- .endif
- .endm
-
.macro cpu_save_nonscratch thread
stptr.d s0, \thread, THREAD_REG23
stptr.d s1, \thread, THREAD_REG24
@@ -148,12 +41,51 @@
.macro fpu_save_csr thread tmp
movfcsr2gr \tmp, fcsr0
- stptr.w \tmp, \thread, THREAD_FCSR
+ stptr.w \tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+ /* TM bit is always 0 if LBT not supported */
+ andi \tmp, \tmp, FPU_CSR_TM
+ beqz \tmp, 1f
+ /* Save FTOP */
+ x86mftop \tmp
+ stptr.w \tmp, \thread, THREAD_FTOP
+ /* Turn off TM to ensure the order of FPR in memory independent of TM */
+ x86clrtm
+1:
+#endif
.endm
- .macro fpu_restore_csr thread tmp
- ldptr.w \tmp, \thread, THREAD_FCSR
- movgr2fcsr fcsr0, \tmp
+ .macro fpu_restore_csr thread tmp0 tmp1
+ ldptr.w \tmp0, \thread, THREAD_FCSR
+ movgr2fcsr fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+ /* TM bit is always 0 if LBT not supported */
+ andi \tmp0, \tmp0, FPU_CSR_TM
+ beqz \tmp0, 2f
+ /* Restore FTOP */
+ ldptr.w \tmp0, \thread, THREAD_FTOP
+ andi \tmp0, \tmp0, 0x7
+ la.pcrel \tmp1, 1f
+ alsl.d \tmp1, \tmp0, \tmp1, 3
+ jr \tmp1
+1:
+ x86mttop 0
+ b 2f
+ x86mttop 1
+ b 2f
+ x86mttop 2
+ b 2f
+ x86mttop 3
+ b 2f
+ x86mttop 4
+ b 2f
+ x86mttop 5
+ b 2f
+ x86mttop 6
+ b 2f
+ x86mttop 7
+2:
+#endif
.endm
.macro fpu_save_cc thread tmp0 tmp1
@@ -353,7 +285,7 @@
.macro lsx_restore_all thread tmp0 tmp1
lsx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
- fpu_restore_csr \thread, \tmp0
+ fpu_restore_csr \thread, \tmp0, \tmp1
.endm
.macro lsx_save_upper vd base tmp off
@@ -563,7 +495,7 @@
.macro lasx_restore_all thread tmp0 tmp1
lasx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
- fpu_restore_csr \thread, \tmp0
+ fpu_restore_csr \thread, \tmp0, \tmp1
.endm
.macro lasx_save_upper xd base tmp off
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
new file mode 100644
index 000000000000..deeff8158f45
--- /dev/null
+++ b/arch/loongarch/include/asm/kasan.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG (0x8000)
+#define XKPRANGE_CC_SEG (0x9000)
+#define XKVRANGE_VC_SEG (0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START CACHE_BASE
+#define XKPRANGE_CC_SIZE XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET (0)
+#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START UNCACHE_BASE
+#define XKPRANGE_UC_SIZE XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached) */
+#define XKVRANGE_VC_START MODULES_VADDR
+#define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+ return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+ if (!kasan_arch_is_ready()) {
+ return (void *)(kasan_early_shadow_page);
+ } else {
+ unsigned long maddr = (unsigned long)addr;
+ unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+ unsigned long offset = 0;
+
+ maddr &= XRANGE_SHADOW_MASK;
+ switch (xrange) {
+ case XKPRANGE_CC_SEG:
+ offset = XKPRANGE_CC_SHADOW_OFFSET;
+ break;
+ case XKPRANGE_UC_SEG:
+ offset = XKPRANGE_UC_SHADOW_OFFSET;
+ break;
+ case XKVRANGE_VC_SEG:
+ offset = XKVRANGE_VC_SHADOW_OFFSET;
+ break;
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+ }
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+ unsigned long addr = (unsigned long)shadow_addr;
+
+ if (unlikely(addr > KASAN_SHADOW_END) ||
+ unlikely(addr < KASAN_SHADOW_START)) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+ return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+ else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+ return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+ else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+ return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+ else {
+ WARN_ON(1);
+ return NULL;
+ }
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644
index 000000000000..6c82aea1c993
--- /dev/null
+++ b/arch/loongarch/include/asm/kfence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+ int err;
+ char *kfence_pool = __kfence_pool;
+ struct vm_struct *area;
+
+ area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+ KFENCE_AREA_START, KFENCE_AREA_END,
+ __builtin_return_address(0));
+ if (!area)
+ return false;
+
+ __kfence_pool = (char *)area->addr;
+ err = ioremap_page_range((unsigned long)__kfence_pool,
+ (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+ virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+ if (err) {
+ free_vm_area(area);
+ __kfence_pool = kfence_pool;
+ return false;
+ }
+
+ return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ pte_t *pte = virt_to_kpte(addr);
+
+ if (WARN_ON(!pte) || pte_none(*pte))
+ return false;
+
+ if (protect)
+ set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+ else
+ set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+ preempt_disable();
+ local_flush_tlb_one(addr);
+ preempt_enable();
+
+ return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h
new file mode 100644
index 000000000000..2041ae58b161
--- /dev/null
+++ b/arch/loongarch/include/asm/kgdb.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ * r0-r31: 64 bit
+ * orig_a0: 64 bit
+ * pc : 64 bit
+ * csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE 0
+#define DBG_PT_REGS_NUM 35
+#define DBG_PT_REGS_END (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ * f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE (DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM 32
+#define DBG_FPR_END (DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ * fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE (DBG_FPR_END + 1)
+#define DBG_FCC_NUM 8
+#define DBG_FCC_END (DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ * fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM 1
+#define DBG_FCSR (DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM (DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE 4
+#define CACHE_FLUSH_IS_SAFE 0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+ DBG_LOONGARCH_ZERO = 0,
+ DBG_LOONGARCH_RA,
+ DBG_LOONGARCH_TP,
+ DBG_LOONGARCH_SP,
+ DBG_LOONGARCH_A0,
+ DBG_LOONGARCH_FP = 22,
+ DBG_LOONGARCH_S0,
+ DBG_LOONGARCH_S1,
+ DBG_LOONGARCH_S2,
+ DBG_LOONGARCH_S3,
+ DBG_LOONGARCH_S4,
+ DBG_LOONGARCH_S5,
+ DBG_LOONGARCH_S6,
+ DBG_LOONGARCH_S7,
+ DBG_LOONGARCH_S8,
+ DBG_LOONGARCH_ORIG_A0,
+ DBG_LOONGARCH_PC,
+ DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h
new file mode 100644
index 000000000000..e671978bf552
--- /dev/null
+++ b/arch/loongarch/include/asm/lbt.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ * Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+ if (!cpu_has_lbt)
+ return 0;
+
+ return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+ 1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+ return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+ if (cpu_has_lbt)
+ csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+ if (cpu_has_lbt)
+ csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+ enable_lbt();
+ set_thread_flag(TIF_USEDLBT);
+ KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+ if (cpu_has_lbt && !is_lbt_owner()) {
+ __own_lbt();
+ if (restore)
+ _restore_lbt(&current->thread.lbt);
+ }
+}
+
+static inline void own_lbt(int restore)
+{
+ preempt_disable();
+ own_lbt_inatomic(restore);
+ preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+ if (cpu_has_lbt && is_lbt_owner()) {
+ if (save)
+ _save_lbt(&tsk->thread.lbt);
+
+ disable_lbt();
+ clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+ }
+ KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+ preempt_disable();
+ lose_lbt_inatomic(save, current);
+ preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+ __own_lbt();
+ _init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+ if (!cpu_has_lbt)
+ return 0;
+
+ return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 10748a20a2ab..33531d432b49 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -12,49 +12,6 @@
#ifndef __ASSEMBLY__
#include <larchintrin.h>
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- * "#invtlb op, 0, %0\n\t"
- * ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- * : "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n) \
- ".ifc \\r, $r" #n "\n\t" \
- "\\var = " #n "\n\t" \
- ".endif\n\t"
-
-__asm__(".macro parse_r var r\n\t"
- "\\var = -1\n\t"
- _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3)
- _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7)
- _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11)
- _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
- _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
- _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
- _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
- _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
- ".iflt \\var\n\t"
- ".error \"Unable to parse register name \\r\"\n\t"
- ".endif\n\t"
- ".endm");
-
-#undef _IFC_REG
-
/* CPUCFG */
#define read_cpucfg(reg) __cpucfg(reg)
@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
#define FPU_CSR_RU 0x200 /* towards +Infinity */
#define FPU_CSR_RD 0x300 /* towards -Infinity */
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT 0x6
+#define FPU_CSR_TM (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
#define read_fcsr(source) \
({ \
unsigned int __res; \
diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h
index fe67d0b4b33d..2b9a90727e19 100644
--- a/arch/loongarch/include/asm/mmzone.h
+++ b/arch/loongarch/include/asm/mmzone.h
@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[(nid)])
-extern void setup_zero_pages(void);
-
#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 26e8dccb6619..63f137ce82a4 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? \
+ dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 23f5b1107246..79470f0b4f1d 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
#endif /* __PAGETABLE_PUD_FOLDED */
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
#endif /* _ASM_PGALLOC_H */
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 06963a172319..29d9b12298bc 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -70,12 +70,9 @@ struct vm_area_struct;
* for zero-mapped memory areas etc..
*/
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) \
- (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
/*
* TLB refill handlers may also map the vmalloc area into xkvrange.
@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
#define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
#define MODULES_END (MODULES_VADDR + SZ_256M)
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE 0
+#endif
+
#define VMALLOC_START MODULES_END
+
+#ifndef CONFIG_KASAN
#define VMALLOC_END \
(vm_map_base + \
- min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END \
+ (vm_map_base + \
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
+#define KFENCE_AREA_START (VMEMMAP_END + 1)
+#define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
#ifndef __PAGETABLE_PMD_FOLDED
@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
extern pgd_t swapper_pg_dir[];
extern pgd_t invalid_pg_dir[];
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
}
#endif /* CONFIG_NUMA_BALANCING */
+#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0)
+
/*
* We provide our own get_unmapped area to cope with the virtual aliasing
* constraints placed on us by the cache architecture.
diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
index 636e1c66398c..c3bc44b5f5b3 100644
--- a/arch/loongarch/include/asm/processor.h
+++ b/arch/loongarch/include/asm/processor.h
@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
BUILD_FPR_ACCESS(64)
struct loongarch_fpu {
- unsigned int fcsr;
uint64_t fcc; /* 8x8 */
+ uint32_t fcsr;
+ uint32_t ftop;
union fpureg fpr[NUM_FPU_REGS];
};
+struct loongarch_lbt {
+ /* Scratch registers */
+ unsigned long scr0;
+ unsigned long scr1;
+ unsigned long scr2;
+ unsigned long scr3;
+ /* Eflags register */
+ unsigned long eflags;
+};
+
#define INIT_CPUMASK { \
{0,} \
}
@@ -113,15 +124,6 @@ struct thread_struct {
unsigned long csr_ecfg;
unsigned long csr_badvaddr; /* Last user fault */
- /* Scratch registers */
- unsigned long scr0;
- unsigned long scr1;
- unsigned long scr2;
- unsigned long scr3;
-
- /* Eflags register */
- unsigned long eflags;
-
/* Other stuff associated with the thread. */
unsigned long trap_nr;
unsigned long error_code;
@@ -133,6 +135,7 @@ struct thread_struct {
* context because they are conditionally copied at fork().
*/
struct loongarch_fpu fpu FPU_ALIGN;
+ struct loongarch_lbt lbt; /* Also conditionally copied */
/* Hardware breakpoints pinned to this task. */
struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
@@ -174,8 +177,9 @@ struct thread_struct {
* FPU & vector registers \
*/ \
.fpu = { \
- .fcsr = 0, \
.fcc = 0, \
+ .fcsr = 0, \
+ .ftop = 0, \
.fpr = {{{0,},},}, \
}, \
.hbp_break = {0}, \
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index be05c0e706a2..a0bc159ce8bd 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -7,6 +7,7 @@
#define _LOONGARCH_SETUP_H
#include <linux/types.h>
+#include <asm/sections.h>
#include <uapi/asm/setup.h>
#define VECSIZE 0x200
@@ -33,8 +34,13 @@ extern long __la_abs_end;
extern long __rela_dyn_begin;
extern long __rela_dyn_end;
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
#endif
+static inline unsigned long kaslr_offset(void)
+{
+ return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
#endif /* __SETUP_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 7df80e6ae9d2..4fb1e6408b98 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -158,6 +158,10 @@
cfi_st u0, PT_R21, \docfi
csrrd u0, PERCPU_BASE_KS
9:
+#ifdef CONFIG_KGDB
+ li.w t0, CSR_CRMD_WE
+ csrxchg t0, t0, LOONGARCH_CSR_CRMD
+#endif
.endm
.macro SAVE_ALL docfi=0
diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h
index 7b29cc9c70aa..5bb5a90d2681 100644
--- a/arch/loongarch/include/asm/string.h
+++ b/arch/loongarch/include/asm/string.h
@@ -7,11 +7,31 @@
#define __HAVE_ARCH_MEMSET
extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
#endif /* _ASM_STRING_H */
diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h
index 24e3094bebab..5b225aff3ba2 100644
--- a/arch/loongarch/include/asm/switch_to.h
+++ b/arch/loongarch/include/asm/switch_to.h
@@ -7,6 +7,7 @@
#include <asm/cpu-features.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
struct task_struct;
@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
#define switch_to(prev, next, last) \
do { \
lose_fpu_inatomic(1, prev); \
+ lose_lbt_inatomic(1, prev); \
hw_breakpoint_thread_switch(next); \
(last) = __switch_to(prev, next, task_thread_info(next), \
__builtin_return_address(0), __builtin_frame_address(0)); \
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 1a3354ca056e..8cb653d49a54 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
#define TIF_SINGLESTEP 16 /* Single Step */
#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */
#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */
+#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE)
#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT (1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE)
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
new file mode 100644
index 000000000000..12467fffee46
--- /dev/null
+++ b/arch/loongarch/include/asm/xor.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+ .name = "lsx",
+ .do_2 = xor_lsx_2,
+ .do_3 = xor_lsx_3,
+ .do_4 = xor_lsx_4,
+ .do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX() \
+ do { \
+ if (cpu_has_lsx) \
+ xor_speed(&xor_block_lsx); \
+ } while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+ .name = "lasx",
+ .do_2 = xor_lasx_2,
+ .do_3 = xor_lasx_3,
+ .do_4 = xor_lasx_4,
+ .do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX() \
+ do { \
+ if (cpu_has_lasx) \
+ xor_speed(&xor_block_lasx); \
+ } while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_8regs_p); \
+ xor_speed(&xor_block_32regs); \
+ xor_speed(&xor_block_32regs_p); \
+ XOR_SPEED_LSX(); \
+ XOR_SPEED_LASX(); \
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
new file mode 100644
index 000000000000..471b96332f38
--- /dev/null
+++ b/arch/loongarch/include/asm/xor_simd.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h
index 06e3be52cb04..ac915f841650 100644
--- a/arch/loongarch/include/uapi/asm/ptrace.h
+++ b/arch/loongarch/include/uapi/asm/ptrace.h
@@ -56,6 +56,12 @@ struct user_lasx_state {
uint64_t vregs[32*4];
};
+struct user_lbt_state {
+ uint64_t scr[4];
+ uint32_t eflags;
+ uint32_t ftop;
+};
+
struct user_watch_state {
uint64_t dbg_info;
struct {
diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h
index 4cd7d16f7037..6c22f616b8f1 100644
--- a/arch/loongarch/include/uapi/asm/sigcontext.h
+++ b/arch/loongarch/include/uapi/asm/sigcontext.h
@@ -59,4 +59,14 @@ struct lasx_context {
__u32 fcsr;
};
+/* LBT context */
+#define LBT_CTX_MAGIC 0x42540001
+#define LBT_CTX_ALIGN 8
+struct lbt_context {
+ __u64 regs[4];
+ __u32 eflags;
+ __u32 ftop;
+};
+
+
#endif /* _UAPI_ASM_SIGCONTEXT_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 8e279f04f9e7..c56ea0b75448 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
+obj-$(CONFIG_CPU_HAS_LBT) += lbt.o
+
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
ifdef CONFIG_FUNCTION_TRACER
@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
endif
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o
obj-$(CONFIG_UPROBES) += uprobes.o
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 505e4bf59603..8da0726777ed 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -118,13 +118,6 @@ void output_thread_defines(void)
OFFSET(THREAD_CSRECFG, task_struct,
thread.csr_ecfg);
- OFFSET(THREAD_SCR0, task_struct, thread.scr0);
- OFFSET(THREAD_SCR1, task_struct, thread.scr1);
- OFFSET(THREAD_SCR2, task_struct, thread.scr2);
- OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
- OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
OFFSET(THREAD_FPU, task_struct, thread.fpu);
OFFSET(THREAD_BVADDR, task_struct, \
@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
OFFSET(THREAD_FCC, loongarch_fpu, fcc);
+ OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+ BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+ OFFSET(THREAD_SCR0, loongarch_lbt, scr0);
+ OFFSET(THREAD_SCR1, loongarch_lbt, scr1);
+ OFFSET(THREAD_SCR2, loongarch_lbt, scr2);
+ OFFSET(THREAD_SCR3, loongarch_lbt, scr3);
+ OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
BLANK();
}
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index e925579c7a71..55320813ee08 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_LVZ;
elf_hwcap |= HWCAP_LOONGARCH_LVZ;
}
+#ifdef CONFIG_CPU_HAS_LBT
+ if (config & CPUCFG2_X86BT) {
+ c->options |= LOONGARCH_CPU_LBT_X86;
+ elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+ }
+ if (config & CPUCFG2_ARMBT) {
+ c->options |= LOONGARCH_CPU_LBT_ARM;
+ elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+ }
+ if (config & CPUCFG2_MIPSBT) {
+ c->options |= LOONGARCH_CPU_LBT_MIPS;
+ elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+ }
+#endif
config = read_cpucfg(LOONGARCH_CPUCFG6);
if (config & CPUCFG6_PMP)
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index d737e3cf42d3..65518bb8f472 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
SAVE_STATIC
+#ifdef CONFIG_KGDB
+ li.w t1, CSR_CRMD_WE
+ csrxchg t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
move u0, t0
li.d tp, ~_THREAD_MASK
and tp, tp, sp
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index 501094a09f5d..d53ab10f4644 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -22,7 +22,7 @@
.macro EX insn, reg, src, offs
.ex\@: \insn \reg, \src, \offs
- _asm_extable .ex\@, fault
+ _asm_extable .ex\@, .L_fpu_fault
.endm
.macro sc_save_fp base
@@ -138,6 +138,13 @@
.macro sc_save_fcsr base, tmp0
movfcsr2gr \tmp0, fcsr0
EX st.w \tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+ /* TM bit is always 0 if LBT not supported */
+ andi \tmp0, \tmp0, FPU_CSR_TM
+ beqz \tmp0, 1f
+ x86clrtm
+1:
+#endif
.endm
.macro sc_restore_fcsr base, tmp0
@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
*/
SYM_FUNC_START(_restore_fp)
fpu_restore_double a0 t1 # clobbers t1
- fpu_restore_csr a0 t1
+ fpu_restore_csr a0 t1 t2
fpu_restore_cc a0 t1 t2 # clobbers t1, t2
jr ra
SYM_FUNC_END(_restore_fp)
@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
jr ra
SYM_FUNC_END(_restore_lasx_context)
-SYM_FUNC_START(fault)
+.L_fpu_fault:
li.w a0, -EFAULT # failure
jr ra
-SYM_FUNC_END(fault)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 5e828a8bc0a0..53b883db0786 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry) # kernel entry point
PTR_LI sp, (_THREAD_SIZE - PT_SIZE)
PTR_ADD sp, sp, tp
set_saved_sp sp, t0, t1
-#endif
- /* relocate_kernel() returns the new kernel entry point */
- jr a0
- ASM_BUG()
+ /* Jump to the new kernel: new_pc = current_pc + random_offset */
+ pcaddi t0, 0
+ add.d t0, t0, a0
+ jirl zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
#endif
bl start_kernel
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
index 5c46ae8c6cac..ec5b28e570c9 100644
--- a/arch/loongarch/kernel/kfpu.c
+++ b/arch/loongarch/kernel/kfpu.c
@@ -8,19 +8,40 @@
#include <asm/fpu.h>
#include <asm/smp.h>
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
void kernel_fpu_begin(void)
{
+ unsigned int *euen_curr;
+
preempt_disable();
WARN_ON(this_cpu_read(in_kernel_fpu));
this_cpu_write(in_kernel_fpu, true);
+ euen_curr = this_cpu_ptr(&euen_current);
- if (!is_fpu_owner())
- enable_fpu();
+ *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+ if (*euen_curr & CSR_EUEN_LASXEN)
+ _save_lasx(&current->thread.fpu);
+ else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ if (*euen_curr & CSR_EUEN_LSXEN)
+ _save_lsx(&current->thread.fpu);
else
+#endif
+ if (*euen_curr & CSR_EUEN_FPEN)
_save_fp(&current->thread.fpu);
write_fcsr(LOONGARCH_FCSR0, 0);
@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
+ unsigned int *euen_curr;
+
WARN_ON(!this_cpu_read(in_kernel_fpu));
- if (!is_fpu_owner())
- disable_fpu();
+ euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+ if (*euen_curr & CSR_EUEN_LASXEN)
+ _restore_lasx(&current->thread.fpu);
else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ if (*euen_curr & CSR_EUEN_LSXEN)
+ _restore_lsx(&current->thread.fpu);
+ else
+#endif
+ if (*euen_curr & CSR_EUEN_FPEN)
_restore_fp(&current->thread.fpu);
+ *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+ if (cpu_has_lsx)
+ euen_mask |= CSR_EUEN_LSXEN;
+
+ if (cpu_has_lasx)
+ euen_mask |= CSR_EUEN_LASXEN;
+
+ return 0;
+}
+arch_initcall(init_euen_mask);
diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c
new file mode 100644
index 000000000000..445c452d72a7
--- /dev/null
+++ b/arch/loongarch/kernel/kgdb.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+ { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+ { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+ { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+ { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+ { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+ { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+ { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+ { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+ { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+ { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+ { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+ { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+ { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+ { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+ { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+ { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+ { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+ { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+ { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+ { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+ { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+ { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+ { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+ { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+ { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+ { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+ { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+ { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+ { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+ { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+ { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+ { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+ { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+ { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+ { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+ { "f0", GDB_SIZEOF_REG, 0 },
+ { "f1", GDB_SIZEOF_REG, 1 },
+ { "f2", GDB_SIZEOF_REG, 2 },
+ { "f3", GDB_SIZEOF_REG, 3 },
+ { "f4", GDB_SIZEOF_REG, 4 },
+ { "f5", GDB_SIZEOF_REG, 5 },
+ { "f6", GDB_SIZEOF_REG, 6 },
+ { "f7", GDB_SIZEOF_REG, 7 },
+ { "f8", GDB_SIZEOF_REG, 8 },
+ { "f9", GDB_SIZEOF_REG, 9 },
+ { "f10", GDB_SIZEOF_REG, 10 },
+ { "f11", GDB_SIZEOF_REG, 11 },
+ { "f12", GDB_SIZEOF_REG, 12 },
+ { "f13", GDB_SIZEOF_REG, 13 },
+ { "f14", GDB_SIZEOF_REG, 14 },
+ { "f15", GDB_SIZEOF_REG, 15 },
+ { "f16", GDB_SIZEOF_REG, 16 },
+ { "f17", GDB_SIZEOF_REG, 17 },
+ { "f18", GDB_SIZEOF_REG, 18 },
+ { "f19", GDB_SIZEOF_REG, 19 },
+ { "f20", GDB_SIZEOF_REG, 20 },
+ { "f21", GDB_SIZEOF_REG, 21 },
+ { "f22", GDB_SIZEOF_REG, 22 },
+ { "f23", GDB_SIZEOF_REG, 23 },
+ { "f24", GDB_SIZEOF_REG, 24 },
+ { "f25", GDB_SIZEOF_REG, 25 },
+ { "f26", GDB_SIZEOF_REG, 26 },
+ { "f27", GDB_SIZEOF_REG, 27 },
+ { "f28", GDB_SIZEOF_REG, 28 },
+ { "f29", GDB_SIZEOF_REG, 29 },
+ { "f30", GDB_SIZEOF_REG, 30 },
+ { "f31", GDB_SIZEOF_REG, 31 },
+ { "fcc0", 1, 0 },
+ { "fcc1", 1, 1 },
+ { "fcc2", 1, 2 },
+ { "fcc3", 1, 3 },
+ { "fcc4", 1, 4 },
+ { "fcc5", 1, 5 },
+ { "fcc6", 1, 6 },
+ { "fcc7", 1, 7 },
+ { "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ int reg_offset, reg_size;
+
+ if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+ return NULL;
+
+ reg_offset = dbg_reg_def[regno].offset;
+ reg_size = dbg_reg_def[regno].size;
+
+ if (reg_offset == -1)
+ goto out;
+
+ /* Handle general-purpose/orig_a0/pc/badv registers */
+ if (regno <= DBG_PT_REGS_END) {
+ memcpy(mem, (void *)regs + reg_offset, reg_size);
+ goto out;
+ }
+
+ if (!(regs->csr_euen & CSR_EUEN_FPEN))
+ goto out;
+
+ save_fp(current);
+
+ /* Handle FP registers */
+ switch (regno) {
+ case DBG_FCSR: /* Process the fcsr */
+ memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
+ break;
+ case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */
+ memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
+ break;
+ case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */
+ memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
+ break;
+ default:
+ break;
+ }
+
+out:
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ int reg_offset, reg_size;
+
+ if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+ return -EINVAL;
+
+ reg_offset = dbg_reg_def[regno].offset;
+ reg_size = dbg_reg_def[regno].size;
+
+ if (reg_offset == -1)
+ return 0;
+
+ /* Handle general-purpose/orig_a0/pc/badv registers */
+ if (regno <= DBG_PT_REGS_END) {
+ memcpy((void *)regs + reg_offset, mem, reg_size);
+ return 0;
+ }
+
+ if (!(regs->csr_euen & CSR_EUEN_FPEN))
+ return 0;
+
+ /* Handle FP registers */
+ switch (regno) {
+ case DBG_FCSR: /* Process the fcsr */
+ memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
+ break;
+ case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */
+ memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
+ break;
+ case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */
+ memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
+ break;
+ default:
+ break;
+ }
+
+ restore_fp(current);
+
+ return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ /* Initialize to zero */
+ memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+ gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+ gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+ gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+ /* S0 - S8 */
+ gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+ gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+ gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+ gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+ gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+ gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+ gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+ gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+ gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+ /*
+ * PC use return address (RA), i.e. the moment after return from __switch_to()
+ */
+ gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+ __asm__ __volatile__ ( \
+ ".globl kgdb_breakinst\n\t" \
+ "nop\n" \
+ "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ struct die_args *args = (struct die_args *)ptr;
+ struct pt_regs *regs = args->regs;
+
+ /* Userspace events, ignore. */
+ if (user_mode(regs))
+ return NOTIFY_DONE;
+
+ if (!kgdb_io_module_registered)
+ return NOTIFY_DONE;
+
+ if (atomic_read(&kgdb_active) != -1)
+ kgdb_nmicallback(smp_processor_id(), regs);
+
+ if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+ return NOTIFY_DONE;
+
+ if (atomic_read(&kgdb_setting_breakpoint))
+ if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+ regs->csr_era += LOONGARCH_INSN_SIZE;
+
+ return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = "Break",
+ .err = BRK_KDB,
+ .trapnr = read_csr_excode(),
+ .signr = SIGTRAP,
+
+ };
+
+ return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+ char *remcom_in_buffer)
+{
+ unsigned long addr;
+ char *ptr;
+
+ ptr = &remcom_in_buffer[1];
+ if (kgdb_hex2long(&ptr, &addr))
+ regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+ char cj_val;
+ unsigned int si, si_l, si_h, rd, rj, cj;
+ unsigned long pc = instruction_pointer(regs);
+ union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+ if (pc & 3) {
+ pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+ return -EINVAL;
+ }
+
+ *next_addr = pc + LOONGARCH_INSN_SIZE;
+
+ si_h = ip->reg0i26_format.immediate_h;
+ si_l = ip->reg0i26_format.immediate_l;
+ switch (ip->reg0i26_format.opcode) {
+ case b_op:
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+ return 0;
+ case bl_op:
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+ regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+ return 0;
+ }
+
+ rj = ip->reg1i21_format.rj;
+ cj = (rj & 0x07) + DBG_FCC_BASE;
+ si_l = ip->reg1i21_format.immediate_l;
+ si_h = ip->reg1i21_format.immediate_h;
+ dbg_get_reg(cj, &cj_val, regs);
+ switch (ip->reg1i21_format.opcode) {
+ case beqz_op:
+ if (regs->regs[rj] == 0)
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ return 0;
+ case bnez_op:
+ if (regs->regs[rj] != 0)
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ return 0;
+ case bceqz_op: /* bceqz_op = bcnez_op */
+ if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ return 0;
+ }
+
+ rj = ip->reg2i16_format.rj;
+ rd = ip->reg2i16_format.rd;
+ si = ip->reg2i16_format.immediate;
+ switch (ip->reg2i16_format.opcode) {
+ case beq_op:
+ if (regs->regs[rj] == regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bne_op:
+ if (regs->regs[rj] != regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case blt_op:
+ if ((long)regs->regs[rj] < (long)regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bge_op:
+ if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bltu_op:
+ if (regs->regs[rj] < regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bgeu_op:
+ if (regs->regs[rj] >= regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case jirl_op:
+ regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+ *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+ int error = 0;
+ unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+ error = get_step_address(regs, &addr);
+ if (error)
+ return error;
+
+ /* Store the opcode in the stepped address */
+ error = get_kernel_nofault(stepped_opcode, (void *)addr);
+ if (error)
+ return error;
+
+ stepped_address = addr;
+
+ /* Replace the opcode with the break instruction */
+ error = copy_to_kernel_nofault((void *)stepped_address,
+ arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+ flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+ if (error) {
+ stepped_opcode = 0;
+ stepped_address = 0;
+ } else {
+ kgdb_single_step = 1;
+ atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+ }
+
+ return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+ if (stepped_opcode) {
+ copy_to_kernel_nofault((void *)stepped_address,
+ (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+ flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+ }
+
+ stepped_opcode = 0;
+ stepped_address = 0;
+ kgdb_single_step = 0;
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *regs)
+{
+ int ret = 0;
+
+ undo_single_step(regs);
+ regs->csr_prmd |= CSR_PRMD_PWE;
+
+ switch (remcom_in_buffer[0]) {
+ case 'D':
+ case 'k':
+ regs->csr_prmd &= ~CSR_PRMD_PWE;
+ fallthrough;
+ case 'c':
+ kgdb_arch_update_addr(regs, remcom_in_buffer);
+ break;
+ case 's':
+ kgdb_arch_update_addr(regs, remcom_in_buffer);
+ ret = do_single_step(regs);
+ break;
+ default:
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static struct hw_breakpoint {
+ unsigned int enabled;
+ unsigned long addr;
+ int len;
+ int type;
+ struct perf_event * __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+ int cpu, cnt = 0;
+ struct perf_event **pevent;
+
+ for_each_online_cpu(cpu) {
+ cnt++;
+ pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+ if (dbg_reserve_bp_slot(*pevent))
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ for_each_online_cpu(cpu) {
+ cnt--;
+ if (!cnt)
+ break;
+ pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+ dbg_release_bp_slot(*pevent);
+ }
+
+ return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+ int cpu;
+ struct perf_event **pevent;
+
+ if (dbg_is_early)
+ return 0;
+
+ for_each_online_cpu(cpu) {
+ pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+ if (dbg_release_bp_slot(*pevent))
+ /*
+ * The debugger is responsible for handing the retry on
+ * remove failure.
+ */
+ return -1;
+ }
+
+ return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+ int i;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+ if (!breakinfo[i].enabled)
+ break;
+
+ if (i == LOONGARCH_MAX_BRP)
+ return -1;
+
+ switch (bptype) {
+ case BP_HARDWARE_BREAKPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_X;
+ break;
+ case BP_READ_WATCHPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_R;
+ break;
+ case BP_WRITE_WATCHPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_W;
+ break;
+ case BP_ACCESS_WATCHPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_RW;
+ break;
+ default:
+ return -1;
+ }
+
+ switch (len) {
+ case 1:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+ break;
+ case 2:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+ break;
+ case 4:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+ break;
+ case 8:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+ break;
+ default:
+ return -1;
+ }
+
+ breakinfo[i].addr = addr;
+ if (hw_break_reserve_slot(i)) {
+ breakinfo[i].addr = 0;
+ return -1;
+ }
+ breakinfo[i].enabled = 1;
+
+ return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+ int i;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+ if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+ break;
+
+ if (i == LOONGARCH_MAX_BRP)
+ return -1;
+
+ if (hw_break_release_slot(i)) {
+ pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+ return -1;
+ }
+ breakinfo[i].enabled = 0;
+
+ return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+ int i;
+ int cpu = raw_smp_processor_id();
+ struct perf_event *bp;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (!breakinfo[i].enabled)
+ continue;
+
+ bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (bp->attr.disabled == 1)
+ continue;
+
+ arch_uninstall_hw_breakpoint(bp);
+ bp->attr.disabled = 1;
+ }
+
+ /* Disable hardware debugging while we are in kgdb */
+ csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+ int i;
+ int cpu = raw_smp_processor_id();
+ struct perf_event *bp;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (!breakinfo[i].enabled)
+ continue;
+
+ bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (!bp->attr.disabled) {
+ arch_uninstall_hw_breakpoint(bp);
+ bp->attr.disabled = 1;
+ continue;
+ }
+
+ if (hw_break_release_slot(i))
+ pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+ breakinfo[i].enabled = 0;
+ }
+
+ csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+ kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+ int i, activated = 0;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ struct perf_event *bp;
+ int val;
+ int cpu = raw_smp_processor_id();
+
+ if (!breakinfo[i].enabled)
+ continue;
+
+ bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (bp->attr.disabled != 1)
+ continue;
+
+ bp->attr.bp_addr = breakinfo[i].addr;
+ bp->attr.bp_len = breakinfo[i].len;
+ bp->attr.bp_type = breakinfo[i].type;
+
+ val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+ if (val)
+ return;
+
+ val = arch_install_hw_breakpoint(bp);
+ if (!val)
+ bp->attr.disabled = 0;
+ activated = 1;
+ }
+
+ csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+ kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+ .flags = KGDB_HW_BREAKPOINT,
+ .set_hw_breakpoint = kgdb_set_hw_break,
+ .remove_hw_breakpoint = kgdb_remove_hw_break,
+ .disable_hw_break = kgdb_disable_hw_break,
+ .remove_all_hw_break = kgdb_remove_all_hw_break,
+ .correct_hw_break = kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+ return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+ int i, cpu;
+ struct perf_event_attr attr;
+ struct perf_event **pevent;
+
+ hw_breakpoint_init(&attr);
+
+ attr.bp_addr = (unsigned long)kgdb_arch_init;
+ attr.bp_len = HW_BREAKPOINT_LEN_4;
+ attr.bp_type = HW_BREAKPOINT_W;
+ attr.disabled = 1;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (breakinfo[i].pev)
+ continue;
+
+ breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+ if (IS_ERR((void * __force)breakinfo[i].pev)) {
+ pr_err("kgdb: Could not allocate hw breakpoints.\n");
+ breakinfo[i].pev = NULL;
+ return;
+ }
+
+ for_each_online_cpu(cpu) {
+ pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (pevent[0]->destroy) {
+ pevent[0]->destroy = NULL;
+ release_bp_slot(*pevent);
+ }
+ }
+ }
+}
+
+void kgdb_arch_exit(void)
+{
+ int i;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (breakinfo[i].pev) {
+ unregister_wide_hw_breakpoint(breakinfo[i].pev);
+ breakinfo[i].pev = NULL;
+ }
+ }
+
+ unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
new file mode 100644
index 000000000000..9c75120a26d8
--- /dev/null
+++ b/arch/loongarch/kernel/lbt.S
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ * Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+ .macro EX insn, reg, src, offs
+.ex\@: \insn \reg, \src, \offs
+ _asm_extable .ex\@, .L_lbt_fault
+ .endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+ movscr2gr t1, $scr0 # save scr
+ stptr.d t1, a0, THREAD_SCR0
+ movscr2gr t1, $scr1
+ stptr.d t1, a0, THREAD_SCR1
+ movscr2gr t1, $scr2
+ stptr.d t1, a0, THREAD_SCR2
+ movscr2gr t1, $scr3
+ stptr.d t1, a0, THREAD_SCR3
+
+ x86mfflag t1, 0x3f # save eflags
+ stptr.d t1, a0, THREAD_EFLAGS
+ jr ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+ ldptr.d t1, a0, THREAD_SCR0 # restore scr
+ movgr2scr $scr0, t1
+ ldptr.d t1, a0, THREAD_SCR1
+ movgr2scr $scr1, t1
+ ldptr.d t1, a0, THREAD_SCR2
+ movgr2scr $scr2, t1
+ ldptr.d t1, a0, THREAD_SCR3
+ movgr2scr $scr3, t1
+
+ ldptr.d t1, a0, THREAD_EFLAGS # restore eflags
+ x86mtflag t1, 0x3f
+ jr ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+ movgr2scr $scr0, zero
+ movgr2scr $scr1, zero
+ movgr2scr $scr2, zero
+ movgr2scr $scr3, zero
+
+ x86mtflag zero, 0x3f
+ jr ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+ movscr2gr t1, $scr0 # save scr
+ EX st.d t1, a0, (0 * SCR_REG_WIDTH)
+ movscr2gr t1, $scr1
+ EX st.d t1, a0, (1 * SCR_REG_WIDTH)
+ movscr2gr t1, $scr2
+ EX st.d t1, a0, (2 * SCR_REG_WIDTH)
+ movscr2gr t1, $scr3
+ EX st.d t1, a0, (3 * SCR_REG_WIDTH)
+
+ x86mfflag t1, 0x3f # save eflags
+ EX st.w t1, a1, 0
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+ EX ld.d t1, a0, (0 * SCR_REG_WIDTH) # restore scr
+ movgr2scr $scr0, t1
+ EX ld.d t1, a0, (1 * SCR_REG_WIDTH)
+ movgr2scr $scr1, t1
+ EX ld.d t1, a0, (2 * SCR_REG_WIDTH)
+ movgr2scr $scr2, t1
+ EX ld.d t1, a0, (3 * SCR_REG_WIDTH)
+ movgr2scr $scr3, t1
+
+ EX ld.w t1, a1, 0 # restore eflags
+ x86mtflag t1, 0x3f
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+ x86mftop t1
+ st.w t1, a0, 0
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+ ld.w t1, a0, 0
+ andi t1, t1, 0x7
+ la.pcrel a0, 1f
+ alsl.d a0, t1, a0, 3
+ jr a0
+1:
+ x86mttop 0
+ b 2f
+ x86mttop 1
+ b 2f
+ x86mttop 2
+ b 2f
+ x86mttop 3
+ b 2f
+ x86mttop 4
+ b 2f
+ x86mttop 5
+ b 2f
+ x86mttop 6
+ b 2f
+ x86mttop 7
+2:
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+ li.w a0, -EFAULT # failure
+ jr ra
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 708665895b47..c7d33c489e04 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
void __init pcpu_populate_pte(unsigned long addr)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d = p4d_offset(pgd, addr);
- pud_t *pud;
- pmd_t *pmd;
-
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
- pud_init(new);
-#endif
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init(new);
-#endif
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pmd, new);
- }
+ populate_kernel_pte(addr);
}
void __init setup_per_cpu_areas(void)
@@ -470,7 +438,6 @@ void __init mem_init(void)
{
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
memblock_free_all();
- setup_zero_pages(); /* This comes from node 0 */
}
int pcibus_to_node(struct pci_bus *bus)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index ba457e43f5be..3cb082e0c992 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -38,6 +38,7 @@
#include <asm/cpu.h>
#include <asm/elf.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
regs->csr_euen = euen;
lose_fpu(0);
+ lose_lbt(0);
clear_thread_flag(TIF_LSX_CTX_LIVE);
clear_thread_flag(TIF_LASX_CTX_LIVE);
+ clear_thread_flag(TIF_LBT_CTX_LIVE);
clear_used_math();
regs->csr_era = pc;
regs->regs[3] = sp;
@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
preempt_enable();
- if (used_math())
- memcpy(dst, src, sizeof(struct task_struct));
- else
+ if (!used_math())
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+ else
+ memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+ memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
return 0;
}
@@ -189,8 +196,10 @@ out:
ptrace_hw_copy_thread(p);
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDSIMD);
+ clear_tsk_thread_flag(p, TIF_USEDLBT);
clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+ clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
return 0;
}
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index f72adbf530c6..c114c5ef1332 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -38,6 +38,7 @@
#include <asm/cpu.h>
#include <asm/cpu-info.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/loongarch.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
#endif /* CONFIG_CPU_HAS_LSX */
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ int r;
+
+ r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+ r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+ r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+ r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+ r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+ r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+ return r;
+}
+
+static int lbt_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int err = 0;
+ const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+ const int ftop_start = eflags_start + sizeof(u32);
+
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.lbt.scr0,
+ 0, 4 * sizeof(target->thread.lbt.scr0));
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.lbt.eflags,
+ eflags_start, ftop_start);
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.ftop,
+ ftop_start, ftop_start + sizeof(u32));
+
+ return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
@@ -802,6 +843,9 @@ enum loongarch_regset {
#ifdef CONFIG_CPU_HAS_LASX
REGSET_LASX,
#endif
+#ifdef CONFIG_CPU_HAS_LBT
+ REGSET_LBT,
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
REGSET_HW_BREAK,
REGSET_HW_WATCH,
@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
.set = simd_set,
},
#endif
+#ifdef CONFIG_CPU_HAS_LBT
+ [REGSET_LBT] = {
+ .core_note_type = NT_LOONGARCH_LBT,
+ .n = 5,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = lbt_get,
+ .set = lbt_set,
+ },
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
.core_note_type = NT_LOONGARCH_HW_BREAK,
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 01f94d1e3edf..6c3eff9af9fb 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
*new_addr = (unsigned long)reloc_offset;
}
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
{
unsigned long kernel_length;
unsigned long random_offset = 0;
void *location_new = _text; /* Default to original kernel start */
- void *kernel_entry = start_kernel; /* Default to original kernel entry point */
char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
reloc_offset += random_offset;
- /* Return the new kernel's entry point */
- kernel_entry = RELOCATED_KASLR(start_kernel);
-
/* The current thread is now within the relocated kernel */
__current_thread_info = RELOCATED_KASLR(__current_thread_info);
@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
relocate_absolute(random_offset);
- return kernel_entry;
+ return random_offset;
}
/*
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 9d830ab4e302..7783f0a3d742 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
#endif
paging_init();
+
+#ifdef CONFIG_KASAN
+ kasan_init();
+#endif
}
diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c
index ceb899366c0a..504fdfe85203 100644
--- a/arch/loongarch/kernel/signal.c
+++ b/arch/loongarch/kernel/signal.c
@@ -32,6 +32,7 @@
#include <asm/cacheflush.h>
#include <asm/cpu-features.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
@@ -44,6 +45,9 @@
/* Make sure we will not lose FPU ownership */
#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); })
/* Assembly functions to move context to/from the FPU */
extern asmlinkage int
@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
struct rt_sigframe {
struct siginfo rs_info;
struct ucontext rs_uctx;
@@ -75,6 +86,7 @@ struct extctx_layout {
struct _ctx_layout fpu;
struct _ctx_layout lsx;
struct _ctx_layout lasx;
+ struct _ctx_layout lbt;
struct _ctx_layout end;
};
@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
return err;
}
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+ int err = 0;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ err |= __put_user(current->thread.lbt.scr0, &regs[0]);
+ err |= __put_user(current->thread.lbt.scr1, &regs[1]);
+ err |= __put_user(current->thread.lbt.scr2, &regs[2]);
+ err |= __put_user(current->thread.lbt.scr3, &regs[3]);
+ err |= __put_user(current->thread.lbt.eflags, eflags);
+
+ return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+ int err = 0;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ err |= __get_user(current->thread.lbt.scr0, &regs[0]);
+ err |= __get_user(current->thread.lbt.scr1, &regs[1]);
+ err |= __get_user(current->thread.lbt.scr2, &regs[2]);
+ err |= __get_user(current->thread.lbt.scr3, &regs[3]);
+ err |= __get_user(current->thread.lbt.eflags, eflags);
+
+ return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
/*
* Wrappers for the assembly _{save,restore}_fp_context functions.
*/
@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
return _restore_lasx_context(regs, fcc, fcsr);
}
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return _restore_ftop_context(ftop);
+}
+#endif
+
static int fcsr_pending(unsigned int __user *fcsr)
{
int err, sig = 0;
@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
return err ?: sig;
}
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+ int err = 0;
+ struct sctx_info __user *info = extctx->lbt.addr;
+ struct lbt_context __user *lbt_ctx =
+ (struct lbt_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+ while (1) {
+ lock_lbt_owner();
+ if (is_lbt_owner())
+ err |= save_hw_lbt_context(lbt_ctx);
+ else
+ err |= copy_lbt_to_sigcontext(lbt_ctx);
+ if (is_fpu_owner())
+ err |= save_hw_ftop_context(lbt_ctx);
+ else
+ err |= copy_ftop_to_sigcontext(lbt_ctx);
+ unlock_lbt_owner();
+
+ err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+ err |= __put_user(extctx->lbt.size, &info->size);
+
+ if (likely(!err))
+ break;
+ /* Touch the LBT context and try again */
+ err = __put_user(0, &regs[0]) | __put_user(0, eflags);
+
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+ int err = 0, tmp __maybe_unused;
+ struct sctx_info __user *info = extctx->lbt.addr;
+ struct lbt_context __user *lbt_ctx =
+ (struct lbt_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+ while (1) {
+ lock_lbt_owner();
+ if (is_lbt_owner())
+ err |= restore_hw_lbt_context(lbt_ctx);
+ else
+ err |= copy_lbt_from_sigcontext(lbt_ctx);
+ if (is_fpu_owner())
+ err |= restore_hw_ftop_context(lbt_ctx);
+ else
+ err |= copy_ftop_from_sigcontext(lbt_ctx);
+ unlock_lbt_owner();
+
+ if (likely(!err))
+ break;
+ /* Touch the LBT context and try again */
+ err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
+
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+#endif
+
static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
struct extctx_layout *extctx)
{
@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
else if (extctx->fpu.addr)
err |= protected_save_fpu_context(extctx);
+#ifdef CONFIG_CPU_HAS_LBT
+ if (extctx->lbt.addr)
+ err |= protected_save_lbt_context(extctx);
+#endif
+
/* Set the "end" magic */
info = (struct sctx_info *)extctx->end.addr;
err |= __put_user(0, &info->magic);
@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
extctx->lasx.addr = info;
break;
+ case LBT_CTX_MAGIC:
+ if (size < (sizeof(struct sctx_info) +
+ sizeof(struct lbt_context)))
+ goto invalid;
+ extctx->lbt.addr = info;
+ break;
+
default:
goto invalid;
}
@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
else if (extctx.fpu.addr)
err |= protected_restore_fpu_context(&extctx);
+#ifdef CONFIG_CPU_HAS_LBT
+ if (extctx.lbt.addr)
+ err |= protected_restore_lbt_context(&extctx);
+#endif
+
bad:
return err;
}
@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
}
+#ifdef CONFIG_CPU_HAS_LBT
+ if (cpu_has_lbt && thread_lbt_context_live()) {
+ new_sp = extframe_alloc(extctx, &extctx->lbt,
+ sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+ }
+#endif
+
return new_sp;
}
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 2463d2fea21f..92270f14db94 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct pt_regs dummyregs;
struct unwind_state state;
- regs = &dummyregs;
+ if (!regs) {
+ regs = &dummyregs;
- if (task == current) {
- regs->regs[3] = (unsigned long)__builtin_frame_address(0);
- regs->csr_era = (unsigned long)__builtin_return_address(0);
- } else {
- regs->regs[3] = thread_saved_fp(task);
- regs->csr_era = thread_saved_ra(task);
+ if (task == current) {
+ regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+ regs->csr_era = (unsigned long)__builtin_return_address(0);
+ } else {
+ regs->regs[3] = thread_saved_fp(task);
+ regs->csr_era = thread_saved_ra(task);
+ }
+ regs->regs[1] = 0;
}
- regs->regs[1] = 0;
for (unwind_start(&state, task, regs);
!unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 89699db45cec..65214774ef7c 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -36,7 +36,9 @@
#include <asm/break.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/inst.h>
+#include <asm/kgdb.h>
#include <asm/loongarch.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
* pertain to them.
*/
switch (bcode) {
+ case BRK_KDB:
+ if (kgdb_breakpoint_handler(regs))
+ goto out;
+ else
+ break;
case BRK_KPROBE_BP:
if (kprobe_breakpoint_handler(regs))
goto out;
@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
#ifndef CONFIG_HAVE_HW_BREAKPOINT
pr_warn("Hardware watch point handler not implemented!\n");
#else
+ if (kgdb_breakpoint_handler(regs))
+ goto out;
+
if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
unsigned long pc = instruction_pointer(regs);
@@ -966,13 +976,47 @@ out:
irqentry_exit(regs, state);
}
+static void init_restore_lbt(void)
+{
+ if (!thread_lbt_context_live()) {
+ /* First time LBT context user */
+ init_lbt();
+ set_thread_flag(TIF_LBT_CTX_LIVE);
+ } else {
+ if (!is_lbt_owner())
+ own_lbt_inatomic(1);
+ }
+
+ BUG_ON(!is_lbt_enabled());
+}
+
asmlinkage void noinstr do_lbt(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
- local_irq_enable();
- force_sig(SIGILL);
- local_irq_disable();
+ /*
+ * BTD (Binary Translation Disable exception) can be triggered
+ * during FP save/restore if TM (Top Mode) is on, which may
+ * cause irq_enable during 'switch_to'. To avoid this situation
+ * (including the user using 'MOVGR2GCSR' to turn on TM, which
+ * will not trigger the BTE), we need to check PRMD first.
+ */
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_enable();
+
+ if (!cpu_has_lbt) {
+ force_sig(SIGILL);
+ goto out;
+ }
+ BUG_ON(is_lbt_enabled());
+
+ preempt_disable();
+ init_restore_lbt();
+ preempt_enable();
+
+out:
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_disable();
irqentry_exit(regs, state);
}
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index d60d4e096cfa..a77bf160bfc4 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -6,4 +6,6 @@
lib-y += delay.o memset.o memcpy.o memmove.o \
clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 0790eadce166..be741544e62b 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -11,19 +11,6 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
- sub.d a0, a2, a0
- addi.d a0, a0, (\to) * (-8)
- jr ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
- addi.d a0, a1, -\to
- jr ra
-.endr
-
SYM_FUNC_START(__clear_user)
/*
* Some CPUs support hardware unaligned access
@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
2: move a0, a1
jr ra
- _asm_extable 1b, .L_fixup_handle_s0
+ _asm_extable 1b, 2b
SYM_FUNC_END(__clear_user_generic)
/*
@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
jr ra
/* fixup and ex_table */
- _asm_extable 0b, .L_fixup_handle_0
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_1
- _asm_extable 3b, .L_fixup_handle_2
- _asm_extable 4b, .L_fixup_handle_3
- _asm_extable 5b, .L_fixup_handle_4
- _asm_extable 6b, .L_fixup_handle_5
- _asm_extable 7b, .L_fixup_handle_6
- _asm_extable 8b, .L_fixup_handle_7
- _asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_1
- _asm_extable 11b, .L_fixup_handle_2
- _asm_extable 12b, .L_fixup_handle_3
- _asm_extable 13b, .L_fixup_handle_0
- _asm_extable 14b, .L_fixup_handle_1
- _asm_extable 15b, .L_fixup_handle_0
- _asm_extable 16b, .L_fixup_handle_0
- _asm_extable 17b, .L_fixup_handle_s0
- _asm_extable 18b, .L_fixup_handle_s0
- _asm_extable 19b, .L_fixup_handle_s0
- _asm_extable 20b, .L_fixup_handle_s2
- _asm_extable 21b, .L_fixup_handle_s0
- _asm_extable 22b, .L_fixup_handle_s0
- _asm_extable 23b, .L_fixup_handle_s4
- _asm_extable 24b, .L_fixup_handle_s0
- _asm_extable 25b, .L_fixup_handle_s4
- _asm_extable 26b, .L_fixup_handle_s0
- _asm_extable 27b, .L_fixup_handle_s4
- _asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+ sub.d a1, a2, a0
+
+.Lsmall_fixup:
+29: st.b zero, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, -1
+ bgt a1, zero, 29b
+
+.Lexit:
+ move a0, a1
+ jr ra
+
+ _asm_extable 0b, .Lsmall_fixup
+ _asm_extable 1b, .Llarge_fixup
+ _asm_extable 2b, .Llarge_fixup
+ _asm_extable 3b, .Llarge_fixup
+ _asm_extable 4b, .Llarge_fixup
+ _asm_extable 5b, .Llarge_fixup
+ _asm_extable 6b, .Llarge_fixup
+ _asm_extable 7b, .Llarge_fixup
+ _asm_extable 8b, .Llarge_fixup
+ _asm_extable 9b, .Llarge_fixup
+ _asm_extable 10b, .Llarge_fixup
+ _asm_extable 11b, .Llarge_fixup
+ _asm_extable 12b, .Llarge_fixup
+ _asm_extable 13b, .Llarge_fixup
+ _asm_extable 14b, .Llarge_fixup
+ _asm_extable 15b, .Llarge_fixup
+ _asm_extable 16b, .Llarge_fixup
+ _asm_extable 17b, .Lexit
+ _asm_extable 18b, .Lsmall_fixup
+ _asm_extable 19b, .Lsmall_fixup
+ _asm_extable 20b, .Lsmall_fixup
+ _asm_extable 21b, .Lsmall_fixup
+ _asm_extable 22b, .Lsmall_fixup
+ _asm_extable 23b, .Lsmall_fixup
+ _asm_extable 24b, .Lsmall_fixup
+ _asm_extable 25b, .Lsmall_fixup
+ _asm_extable 26b, .Lsmall_fixup
+ _asm_extable 27b, .Lsmall_fixup
+ _asm_extable 28b, .Lsmall_fixup
+ _asm_extable 29b, .Lexit
SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index bfe3d2793d00..feec3d362803 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -11,19 +11,6 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
- sub.d a0, a2, a0
- addi.d a0, a0, (\to) * (-8)
- jr ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
- addi.d a0, a2, -\to
- jr ra
-.endr
-
SYM_FUNC_START(__copy_user)
/*
* Some CPUs support hardware unaligned access
@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2
jr ra
- _asm_extable 1b, .L_fixup_handle_s0
- _asm_extable 2b, .L_fixup_handle_s0
+ _asm_extable 1b, 3b
+ _asm_extable 2b, 3b
SYM_FUNC_END(__copy_user_generic)
/*
@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
sltui t0, a2, 9
bnez t0, .Lsmall
- add.d a3, a1, a2
- add.d a2, a0, a2
0: ld.d t0, a1, 0
1: st.d t0, a0, 0
+ add.d a3, a1, a2
+ add.d a2, a0, a2
/* align up destination address */
andi t1, a0, 7
@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
7: ld.d t5, a1, 40
8: ld.d t6, a1, 48
9: ld.d t7, a1, 56
- addi.d a1, a1, 64
10: st.d t0, a0, 0
11: st.d t1, a0, 8
12: st.d t2, a0, 16
@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
15: st.d t5, a0, 40
16: st.d t6, a0, 48
17: st.d t7, a0, 56
+ addi.d a1, a1, 64
addi.d a0, a0, 64
bltu a1, a4, .Lloop64
@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
19: ld.d t1, a1, 8
20: ld.d t2, a1, 16
21: ld.d t3, a1, 24
- addi.d a1, a1, 32
22: st.d t0, a0, 0
23: st.d t1, a0, 8
24: st.d t2, a0, 16
25: st.d t3, a0, 24
+ addi.d a1, a1, 32
addi.d a0, a0, 32
.Llt32:
@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
bgeu a1, a4, .Llt16
26: ld.d t0, a1, 0
27: ld.d t1, a1, 8
- addi.d a1, a1, 16
28: st.d t0, a0, 0
29: st.d t1, a0, 8
+ addi.d a1, a1, 16
addi.d a0, a0, 16
.Llt16:
@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
bgeu a1, a4, .Llt8
30: ld.d t0, a1, 0
31: st.d t0, a0, 0
+ addi.d a1, a1, 8
addi.d a0, a0, 8
.Llt8:
@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
jr ra
/* fixup and ex_table */
- _asm_extable 0b, .L_fixup_handle_0
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_0
- _asm_extable 3b, .L_fixup_handle_0
- _asm_extable 4b, .L_fixup_handle_0
- _asm_extable 5b, .L_fixup_handle_0
- _asm_extable 6b, .L_fixup_handle_0
- _asm_extable 7b, .L_fixup_handle_0
- _asm_extable 8b, .L_fixup_handle_0
- _asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_0
- _asm_extable 11b, .L_fixup_handle_1
- _asm_extable 12b, .L_fixup_handle_2
- _asm_extable 13b, .L_fixup_handle_3
- _asm_extable 14b, .L_fixup_handle_4
- _asm_extable 15b, .L_fixup_handle_5
- _asm_extable 16b, .L_fixup_handle_6
- _asm_extable 17b, .L_fixup_handle_7
- _asm_extable 18b, .L_fixup_handle_0
- _asm_extable 19b, .L_fixup_handle_0
- _asm_extable 20b, .L_fixup_handle_0
- _asm_extable 21b, .L_fixup_handle_0
- _asm_extable 22b, .L_fixup_handle_0
- _asm_extable 23b, .L_fixup_handle_1
- _asm_extable 24b, .L_fixup_handle_2
- _asm_extable 25b, .L_fixup_handle_3
- _asm_extable 26b, .L_fixup_handle_0
- _asm_extable 27b, .L_fixup_handle_0
- _asm_extable 28b, .L_fixup_handle_0
- _asm_extable 29b, .L_fixup_handle_1
- _asm_extable 30b, .L_fixup_handle_0
- _asm_extable 31b, .L_fixup_handle_0
- _asm_extable 32b, .L_fixup_handle_0
- _asm_extable 33b, .L_fixup_handle_0
- _asm_extable 34b, .L_fixup_handle_s0
- _asm_extable 35b, .L_fixup_handle_s0
- _asm_extable 36b, .L_fixup_handle_s0
- _asm_extable 37b, .L_fixup_handle_s0
- _asm_extable 38b, .L_fixup_handle_s0
- _asm_extable 39b, .L_fixup_handle_s0
- _asm_extable 40b, .L_fixup_handle_s0
- _asm_extable 41b, .L_fixup_handle_s2
- _asm_extable 42b, .L_fixup_handle_s0
- _asm_extable 43b, .L_fixup_handle_s0
- _asm_extable 44b, .L_fixup_handle_s0
- _asm_extable 45b, .L_fixup_handle_s0
- _asm_extable 46b, .L_fixup_handle_s0
- _asm_extable 47b, .L_fixup_handle_s4
- _asm_extable 48b, .L_fixup_handle_s0
- _asm_extable 49b, .L_fixup_handle_s0
- _asm_extable 50b, .L_fixup_handle_s0
- _asm_extable 51b, .L_fixup_handle_s4
- _asm_extable 52b, .L_fixup_handle_s0
- _asm_extable 53b, .L_fixup_handle_s0
- _asm_extable 54b, .L_fixup_handle_s0
- _asm_extable 55b, .L_fixup_handle_s4
- _asm_extable 56b, .L_fixup_handle_s0
- _asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+ sub.d a2, a2, a0
+
+.Lsmall_fixup:
+58: ld.b t0, a1, 0
+59: st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 58b
+
+.Lexit:
+ move a0, a2
+ jr ra
+
+ _asm_extable 0b, .Lsmall_fixup
+ _asm_extable 1b, .Lsmall_fixup
+ _asm_extable 2b, .Llarge_fixup
+ _asm_extable 3b, .Llarge_fixup
+ _asm_extable 4b, .Llarge_fixup
+ _asm_extable 5b, .Llarge_fixup
+ _asm_extable 6b, .Llarge_fixup
+ _asm_extable 7b, .Llarge_fixup
+ _asm_extable 8b, .Llarge_fixup
+ _asm_extable 9b, .Llarge_fixup
+ _asm_extable 10b, .Llarge_fixup
+ _asm_extable 11b, .Llarge_fixup
+ _asm_extable 12b, .Llarge_fixup
+ _asm_extable 13b, .Llarge_fixup
+ _asm_extable 14b, .Llarge_fixup
+ _asm_extable 15b, .Llarge_fixup
+ _asm_extable 16b, .Llarge_fixup
+ _asm_extable 17b, .Llarge_fixup
+ _asm_extable 18b, .Llarge_fixup
+ _asm_extable 19b, .Llarge_fixup
+ _asm_extable 20b, .Llarge_fixup
+ _asm_extable 21b, .Llarge_fixup
+ _asm_extable 22b, .Llarge_fixup
+ _asm_extable 23b, .Llarge_fixup
+ _asm_extable 24b, .Llarge_fixup
+ _asm_extable 25b, .Llarge_fixup
+ _asm_extable 26b, .Llarge_fixup
+ _asm_extable 27b, .Llarge_fixup
+ _asm_extable 28b, .Llarge_fixup
+ _asm_extable 29b, .Llarge_fixup
+ _asm_extable 30b, .Llarge_fixup
+ _asm_extable 31b, .Llarge_fixup
+ _asm_extable 32b, .Llarge_fixup
+ _asm_extable 33b, .Llarge_fixup
+ _asm_extable 34b, .Lexit
+ _asm_extable 35b, .Lexit
+ _asm_extable 36b, .Lsmall_fixup
+ _asm_extable 37b, .Lsmall_fixup
+ _asm_extable 38b, .Lsmall_fixup
+ _asm_extable 39b, .Lsmall_fixup
+ _asm_extable 40b, .Lsmall_fixup
+ _asm_extable 41b, .Lsmall_fixup
+ _asm_extable 42b, .Lsmall_fixup
+ _asm_extable 43b, .Lsmall_fixup
+ _asm_extable 44b, .Lsmall_fixup
+ _asm_extable 45b, .Lsmall_fixup
+ _asm_extable 46b, .Lsmall_fixup
+ _asm_extable 47b, .Lsmall_fixup
+ _asm_extable 48b, .Lsmall_fixup
+ _asm_extable 49b, .Lsmall_fixup
+ _asm_extable 50b, .Lsmall_fixup
+ _asm_extable 51b, .Lsmall_fixup
+ _asm_extable 52b, .Lsmall_fixup
+ _asm_extable 53b, .Lsmall_fixup
+ _asm_extable 54b, .Lsmall_fixup
+ _asm_extable 55b, .Lsmall_fixup
+ _asm_extable 56b, .Lsmall_fixup
+ _asm_extable 57b, .Lsmall_fixup
+ _asm_extable 58b, .Lexit
+ _asm_extable 59b, .Lexit
SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index cc30b3b6252f..fa1148878d2b 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -10,6 +10,8 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
+.section .noinstr.text, "ax"
+
SYM_FUNC_START(memcpy)
/*
* Some CPUs support hardware unaligned access
@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
ALTERNATIVE "b __memcpy_generic", \
"b __memcpy_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
/*
* void *__memcpy_generic(void *dst, const void *src, size_t n)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index 7dc76d1484b6..82dae062fec8 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -10,23 +10,29 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
+.section .noinstr.text, "ax"
+
SYM_FUNC_START(memmove)
- blt a0, a1, memcpy /* dst < src, memcpy */
- blt a1, a0, rmemcpy /* src < dst, rmemcpy */
- jr ra /* dst == src, return */
+ blt a0, a1, __memcpy /* dst < src, memcpy */
+ blt a1, a0, __rmemcpy /* src < dst, rmemcpy */
+ jr ra /* dst == src, return */
SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
/*
* Some CPUs support hardware unaligned access
*/
ALTERNATIVE "b __rmemcpy_generic", \
"b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
/*
* void *__rmemcpy_generic(void *dst, const void *src, size_t n)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index 3f20f7996e8e..06d3ca54cbfe 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -16,6 +16,8 @@
bstrins.d \r0, \r0, 63, 32
.endm
+.section .noinstr.text, "ax"
+
SYM_FUNC_START(memset)
/*
* Some CPUs support hardware unaligned access
@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
ALTERNATIVE "b __memset_generic", \
"b __memset_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
/*
* void *__memset_generic(void *s, int c, size_t n)
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644
index 000000000000..84cd24b728c4
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset) \
+ "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset) \
+ "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base) \
+ LD(0, base, 0) \
+ LD(1, base, 16) \
+ LD(2, base, 32) \
+ LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base) \
+ LD(4, base, 0) \
+ LD(5, base, 16) \
+ LD(6, base, 32) \
+ LD(7, base, 48) \
+ XOR(0, 4) \
+ XOR(1, 5) \
+ XOR(2, 6) \
+ XOR(3, 7)
+
+#define ST_LINE(base) \
+ ST(0, base, 0) \
+ ST(1, base, 16) \
+ ST(2, base, 32) \
+ ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset) \
+ "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset) \
+ "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base) \
+ LD(0, base, 0) \
+ LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base) \
+ LD(2, base, 0) \
+ LD(3, base, 32) \
+ XOR(0, 2) \
+ XOR(1, 3)
+
+#define ST_LINE(base) \
+ ST(0, base, 0) \
+ ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
new file mode 100644
index 000000000000..f50f32514d80
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
new file mode 100644
index 000000000000..393f689dbcf6
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd_glue.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor) \
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_2(bytes, p1, p2); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor) \
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2, \
+ const unsigned long * __restrict p3) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_3(bytes, p1, p2, p3); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor) \
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2, \
+ const unsigned long * __restrict p3, \
+ const unsigned long * __restrict p4) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_4(bytes, p1, p2, p3, p4); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor) \
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2, \
+ const unsigned long * __restrict p3, \
+ const unsigned long * __restrict p4, \
+ const unsigned long * __restrict p5) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor) \
+ MAKE_XOR_GLUE_2(flavor); \
+ MAKE_XOR_GLUE_3(flavor); \
+ MAKE_XOR_GLUE_4(flavor); \
+ MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
new file mode 100644
index 000000000000..0358ced7fe33
--- /dev/null
+++ b/arch/loongarch/lib/xor_template.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2,
+ const unsigned long * __restrict v3)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ LD_AND_XOR_LINE(v3)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ v3 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2,
+ const unsigned long * __restrict v3,
+ const unsigned long * __restrict v4)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ LD_AND_XOR_LINE(v3)
+ LD_AND_XOR_LINE(v4)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+ : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ v3 += LINE_WIDTH / sizeof(unsigned long);
+ v4 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2,
+ const unsigned long * __restrict v3,
+ const unsigned long * __restrict v4,
+ const unsigned long * __restrict v5)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ LD_AND_XOR_LINE(v3)
+ LD_AND_XOR_LINE(v4)
+ LD_AND_XOR_LINE(v5)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+ [v5] "r"(v5) : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ v3 += LINE_WIDTH / sizeof(unsigned long);
+ v4 += LINE_WIDTH / sizeof(unsigned long);
+ v5 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile
index 8ffc6383f836..e4d1e581dbae 100644
--- a/arch/loongarch/mm/Makefile
+++ b/arch/loongarch/mm/Makefile
@@ -7,3 +7,6 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \
fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_KASAN) += kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c
index 72685a48eaf0..6be04d36ca07 100644
--- a/arch/loongarch/mm/cache.c
+++ b/arch/loongarch/mm/cache.c
@@ -156,7 +156,6 @@ void cpu_cache_init(void)
current_cpu_data.cache_leaves_present = leaf;
current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
- shm_align_mask = PAGE_SIZE - 1;
}
static const pgprot_t protection_map[16] = {
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index da5b6d518cdb..e6376e3dce86 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -23,6 +23,7 @@
#include <linux/kprobes.h>
#include <linux/perf_event.h>
#include <linux/uaccess.h>
+#include <linux/kfence.h>
#include <asm/branch.h>
#include <asm/mmu_context.h>
@@ -30,7 +31,8 @@
int show_unhandled_signals = 1;
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+ unsigned long write, unsigned long address)
{
const int field = sizeof(unsigned long) * 2;
@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
if (fixup_exception(regs))
return;
+ if (kfence_handle_page_fault(address, write, regs))
+ return;
+
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
die("Oops", regs);
}
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+ unsigned long write, unsigned long address)
{
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
if (!user_mode(regs)) {
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
pagefault_out_of_memory();
@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
{
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
*/
if (address & __UA_LIMIT) {
if (!user_mode(regs))
- no_context(regs, address);
+ no_context(regs, write, address);
else
do_sigsegv(regs, write, address, si_code);
return;
@@ -211,7 +217,7 @@ good_area:
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
@@ -232,7 +238,7 @@ good_area:
if (unlikely(fault & VM_FAULT_ERROR)) {
mmap_read_unlock(mm);
if (fault & VM_FAULT_OOM) {
- do_out_of_memory(regs, address);
+ do_out_of_memory(regs, write, address);
return;
} else if (fault & VM_FAULT_SIGSEGV) {
do_sigsegv(regs, write, address, si_code);
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 3b7d8129570b..f3fe8c06ba4d 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -35,33 +35,8 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed. Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
- unsigned int order, i;
- struct page *page;
-
- order = 0;
-
- empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (!empty_zero_page)
- panic("Oh boy, that early out of memory?");
-
- page = virt_to_page((void *)empty_zero_page);
- split_page(page, order);
- for (i = 0; i < (1 << order); i++, page++)
- mark_page_reserved(page);
-
- zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
@@ -106,7 +81,6 @@ void __init mem_init(void)
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
memblock_free_all();
- setup_zero_pages(); /* Setup zeroed pages. */
}
#endif /* !CONFIG_NUMA */
@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
#endif
#endif
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
{
- pgd_t *pgd;
- p4d_t *p4d;
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud;
pmd_t *pmd;
- pgd = pgd_offset_k(addr);
- p4d = p4d_offset(pgd, addr);
-
- if (pgd_none(*pgd)) {
- pud_t *new __maybe_unused;
-
- new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- pgd_populate(&init_mm, pgd, new);
+ if (p4d_none(*p4d)) {
+ pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pud)
+ panic("%s: Failed to allocate memory\n", __func__);
+ p4d_populate(&init_mm, p4d, pud);
#ifndef __PAGETABLE_PUD_FOLDED
- pud_init(new);
+ pud_init(pud);
#endif
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
- pmd_t *new __maybe_unused;
-
- new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- pud_populate(&init_mm, pud, new);
+ pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pmd)
+ panic("%s: Failed to allocate memory\n", __func__);
+ pud_populate(&init_mm, pud, pmd);
#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init(new);
+ pmd_init(pmd);
#endif
}
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- pte_t *new __maybe_unused;
+ if (!pmd_present(*pmd)) {
+ pte_t *pte;
- new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pmd, new);
+ pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pte)
+ panic("%s: Failed to allocate memory\n", __func__);
+ pmd_populate_kernel(&init_mm, pmd, pte);
}
return pte_offset_kernel(pmd, addr);
@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
- ptep = fixmap_pte(addr);
+ ptep = populate_kernel_pte(addr);
if (!pte_none(*ptep)) {
pte_ERROR(*ptep);
return;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
new file mode 100644
index 000000000000..da68bc1a4643
--- /dev/null
+++ b/arch/loongarch/mm/kasan_init.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+ void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+ __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+ if (!p)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+ return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+ if (__pmd_none(early, READ_ONCE(*pmdp))) {
+ phys_addr_t pte_phys = early ?
+ __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+ pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+ }
+
+ return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+ if (__pud_none(early, READ_ONCE(*pudp))) {
+ phys_addr_t pmd_phys = early ?
+ __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+ pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+ }
+
+ return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+ if (__p4d_none(early, READ_ONCE(*p4dp))) {
+ phys_addr_t pud_phys = early ?
+ __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+ p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+ }
+
+ return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+ do {
+ phys_addr_t page_phys = early ?
+ __pa_symbol(kasan_early_shadow_page)
+ : kasan_alloc_zeroed_page(node);
+ next = addr + PAGE_SIZE;
+ set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+ } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+ do {
+ next = pmd_addr_end(addr, end);
+ kasan_pte_populate(pmdp, addr, next, node, early);
+ } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+ do {
+ next = pud_addr_end(addr, end);
+ kasan_pmd_populate(pudp, addr, next, node, early);
+ } while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+ do {
+ next = p4d_addr_end(addr, end);
+ kasan_pud_populate(p4dp, addr, next, node, early);
+ } while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+ int node, bool early)
+{
+ unsigned long next;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset_k(addr);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_p4d_populate(pgdp, addr, next, node, early);
+ } while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+ int node)
+{
+ kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+ WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+ /*
+ * Remove references to kasan page tables from
+ * swapper_pg_dir. pgd_clear() can't be used
+ * here because it's nop on 2,3-level pagetable setups
+ */
+ for (; start < end; start += PGDIR_SIZE)
+ kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+ u64 i;
+ phys_addr_t pa_start, pa_end;
+
+ /*
+ * PGD was populated as invalid_pmd_table or invalid_pud_table
+ * in pagetable_init() which depends on how many levels of page
+ * table you are using, but we had to clean the gpd of kasan
+ * shadow memory, as the pgd value is none-zero.
+ * The assertion pgd_none is going to be false and the formal populate
+ * afterwards is not going to create any new pgd at all.
+ */
+ memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+ csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+ local_flush_tlb_all();
+
+ clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ /* Maps everything to a single page of zeroes */
+ kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+ kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+ kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+ kasan_early_stage = false;
+
+ /* Populate the linear mapping */
+ for_each_mem_range(i, &pa_start, &pa_end) {
+ void *start = (void *)phys_to_virt(pa_start);
+ void *end = (void *)phys_to_virt(pa_end);
+
+ if (start >= end)
+ break;
+
+ kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+ (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+ }
+
+ /* Populate modules mapping */
+ kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+ (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+ /*
+ * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+ * should make sure that it maps the zero page read-only.
+ */
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_early_shadow_pte[i],
+ pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+ local_flush_tlb_all();
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KernelAddressSanitizer initialized.\n");
+}
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index fbe1a4856fc4..a9630a81b38a 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -8,12 +8,11 @@
#include <linux/mm.h>
#include <linux/mman.h>
-unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK (SHMLBA - 1)
-#define COLOUR_ALIGN(addr, pgoff) \
- ((((addr) + shm_align_mask) & ~shm_align_mask) + \
- (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff) \
+ ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \
+ + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
enum mmap_allocation_direction {UP, DOWN};
@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
* cache aliasing constraints.
*/
if ((flags & MAP_SHARED) &&
- ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+ ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
return -EINVAL;
return addr;
}
@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
}
info.length = len;
- info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+ info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (dir == DOWN) {
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index b14343e211b6..71d0539e2d0b 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -9,6 +9,18 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+ return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+ return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *init, *ret = NULL;
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index a50308b6fc25..5c97d1463328 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# Objects to go into the VDSO.
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 819b6bc8ac08..3df5499f7936 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
int kasan_populate_early_shadow(const void *shadow_start,
const void *shadow_end);
+#ifndef __HAVE_ARCH_SHADOW_MAP
static inline void *kasan_mem_to_shadow(const void *addr)
{
return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ KASAN_SHADOW_OFFSET;
}
+#endif
int kasan_add_zero_shadow(void *start, unsigned long size);
void kasan_remove_zero_shadow(void *start, unsigned long size);
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index f29aaaf2eb21..006e18decfad 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1;
extern const struct raid6_calls raid6_vpermxor2;
extern const struct raid6_calls raid6_vpermxor4;
extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
@@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
extern const struct raid6_recov_calls raid6_recov_avx512;
extern const struct raid6_recov_calls raid6_recov_s390xc;
extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 45e17619422b..035b0a4db476 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
hostprogs += mktables
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index a22a05c9af8a..0ec534faf019 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_neonx2,
&raid6_neonx1,
#endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+ &raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ &raid6_lsx,
+#endif
+#endif
#if defined(__ia64__)
&raid6_intx32,
&raid6_intx16,
@@ -104,6 +112,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
#if defined(CONFIG_KERNEL_MODE_NEON)
&raid6_recov_neon,
#endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+ &raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ &raid6_recov_lsx,
+#endif
+#endif
&raid6_recov_intx1,
NULL
};
diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h
new file mode 100644
index 000000000000..acfc33ce7056
--- /dev/null
+++ b/lib/raid6/loongarch.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX (1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX (1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c
new file mode 100644
index 000000000000..aa5d9f924ca3
--- /dev/null
+++ b/lib/raid6/loongarch_simd.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+ return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $vr0, $vr1, $vr2, $vr3: wp
+ * $vr4, $vr5, $vr6, $vr7: wq
+ * $vr8, $vr9, $vr10, $vr11: wd
+ * $vr12, $vr13, $vr14, $vr15: w2
+ * $vr16, $vr17, $vr18, $vr19: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*4) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+ asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+ asm volatile("vori.b $vr4, $vr0, 0");
+ asm volatile("vori.b $vr5, $vr1, 0");
+ asm volatile("vori.b $vr6, $vr2, 0");
+ asm volatile("vori.b $vr7, $vr3, 0");
+ for (z = z0-1; z >= 0; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+ asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("vxor.v $vr0, $vr0, $vr8");
+ asm volatile("vxor.v $vr1, $vr1, $vr9");
+ asm volatile("vxor.v $vr2, $vr2, $vr10");
+ asm volatile("vxor.v $vr3, $vr3, $vr11");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("vslti.b $vr12, $vr4, 0");
+ asm volatile("vslti.b $vr13, $vr5, 0");
+ asm volatile("vslti.b $vr14, $vr6, 0");
+ asm volatile("vslti.b $vr15, $vr7, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("vslli.b $vr16, $vr4, 1");
+ asm volatile("vslli.b $vr17, $vr5, 1");
+ asm volatile("vslli.b $vr18, $vr6, 1");
+ asm volatile("vslli.b $vr19, $vr7, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("vandi.b $vr12, $vr12, 0x1d");
+ asm volatile("vandi.b $vr13, $vr13, 0x1d");
+ asm volatile("vandi.b $vr14, $vr14, 0x1d");
+ asm volatile("vandi.b $vr15, $vr15, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("vxor.v $vr16, $vr16, $vr12");
+ asm volatile("vxor.v $vr17, $vr17, $vr13");
+ asm volatile("vxor.v $vr18, $vr18, $vr14");
+ asm volatile("vxor.v $vr19, $vr19, $vr15");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("vxor.v $vr4, $vr16, $vr8");
+ asm volatile("vxor.v $vr5, $vr17, $vr9");
+ asm volatile("vxor.v $vr6, $vr18, $vr10");
+ asm volatile("vxor.v $vr7, $vr19, $vr11");
+ }
+ /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+ asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+ asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+ asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+ asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+ /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+ asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+ asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+ asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+ asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $vr0, $vr1, $vr2, $vr3: wp
+ * $vr4, $vr5, $vr6, $vr7: wq
+ * $vr8, $vr9, $vr10, $vr11: wd
+ * $vr12, $vr13, $vr14, $vr15: w2
+ * $vr16, $vr17, $vr18, $vr19: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*4) {
+ /* P/Q data pages */
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+ asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+ asm volatile("vori.b $vr4, $vr0, 0");
+ asm volatile("vori.b $vr5, $vr1, 0");
+ asm volatile("vori.b $vr6, $vr2, 0");
+ asm volatile("vori.b $vr7, $vr3, 0");
+ for (z = z0-1; z >= start; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+ asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("vxor.v $vr0, $vr0, $vr8");
+ asm volatile("vxor.v $vr1, $vr1, $vr9");
+ asm volatile("vxor.v $vr2, $vr2, $vr10");
+ asm volatile("vxor.v $vr3, $vr3, $vr11");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("vslti.b $vr12, $vr4, 0");
+ asm volatile("vslti.b $vr13, $vr5, 0");
+ asm volatile("vslti.b $vr14, $vr6, 0");
+ asm volatile("vslti.b $vr15, $vr7, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("vslli.b $vr16, $vr4, 1");
+ asm volatile("vslli.b $vr17, $vr5, 1");
+ asm volatile("vslli.b $vr18, $vr6, 1");
+ asm volatile("vslli.b $vr19, $vr7, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("vandi.b $vr12, $vr12, 0x1d");
+ asm volatile("vandi.b $vr13, $vr13, 0x1d");
+ asm volatile("vandi.b $vr14, $vr14, 0x1d");
+ asm volatile("vandi.b $vr15, $vr15, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("vxor.v $vr16, $vr16, $vr12");
+ asm volatile("vxor.v $vr17, $vr17, $vr13");
+ asm volatile("vxor.v $vr18, $vr18, $vr14");
+ asm volatile("vxor.v $vr19, $vr19, $vr15");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("vxor.v $vr4, $vr16, $vr8");
+ asm volatile("vxor.v $vr5, $vr17, $vr9");
+ asm volatile("vxor.v $vr6, $vr18, $vr10");
+ asm volatile("vxor.v $vr7, $vr19, $vr11");
+ }
+
+ /* P/Q left side optimization */
+ for (z = start-1; z >= 0; z--) {
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("vslti.b $vr12, $vr4, 0");
+ asm volatile("vslti.b $vr13, $vr5, 0");
+ asm volatile("vslti.b $vr14, $vr6, 0");
+ asm volatile("vslti.b $vr15, $vr7, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("vslli.b $vr16, $vr4, 1");
+ asm volatile("vslli.b $vr17, $vr5, 1");
+ asm volatile("vslli.b $vr18, $vr6, 1");
+ asm volatile("vslli.b $vr19, $vr7, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("vandi.b $vr12, $vr12, 0x1d");
+ asm volatile("vandi.b $vr13, $vr13, 0x1d");
+ asm volatile("vandi.b $vr14, $vr14, 0x1d");
+ asm volatile("vandi.b $vr15, $vr15, 0x1d");
+ /* wq$$ = w1$$ ^ w2$$; */
+ asm volatile("vxor.v $vr4, $vr16, $vr12");
+ asm volatile("vxor.v $vr5, $vr17, $vr13");
+ asm volatile("vxor.v $vr6, $vr18, $vr14");
+ asm volatile("vxor.v $vr7, $vr19, $vr15");
+ }
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ */
+ asm volatile(
+ "vld $vr20, %0\n\t"
+ "vld $vr21, %1\n\t"
+ "vld $vr22, %2\n\t"
+ "vld $vr23, %3\n\t"
+ "vld $vr24, %4\n\t"
+ "vld $vr25, %5\n\t"
+ "vld $vr26, %6\n\t"
+ "vld $vr27, %7\n\t"
+ "vxor.v $vr20, $vr20, $vr0\n\t"
+ "vxor.v $vr21, $vr21, $vr1\n\t"
+ "vxor.v $vr22, $vr22, $vr2\n\t"
+ "vxor.v $vr23, $vr23, $vr3\n\t"
+ "vxor.v $vr24, $vr24, $vr4\n\t"
+ "vxor.v $vr25, $vr25, $vr5\n\t"
+ "vxor.v $vr26, $vr26, $vr6\n\t"
+ "vxor.v $vr27, $vr27, $vr7\n\t"
+ "vst $vr20, %0\n\t"
+ "vst $vr21, %1\n\t"
+ "vst $vr22, %2\n\t"
+ "vst $vr23, %3\n\t"
+ "vst $vr24, %4\n\t"
+ "vst $vr25, %5\n\t"
+ "vst $vr26, %6\n\t"
+ "vst $vr27, %7\n\t"
+ : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+ "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+ "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+ "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+ );
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+ raid6_lsx_gen_syndrome,
+ raid6_lsx_xor_syndrome,
+ raid6_has_lsx,
+ "lsx",
+ .priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+ return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $xr0, $xr1: wp
+ * $xr2, $xr3: wq
+ * $xr4, $xr5: wd
+ * $xr6, $xr7: w2
+ * $xr8, $xr9: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*2) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("xvori.b $xr2, $xr0, 0");
+ asm volatile("xvori.b $xr3, $xr1, 0");
+ for (z = z0-1; z >= 0; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("xvslti.b $xr6, $xr2, 0");
+ asm volatile("xvslti.b $xr7, $xr3, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("xvslli.b $xr8, $xr2, 1");
+ asm volatile("xvslli.b $xr9, $xr3, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+ asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("xvxor.v $xr8, $xr8, $xr6");
+ asm volatile("xvxor.v $xr9, $xr9, $xr7");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("xvxor.v $xr2, $xr8, $xr4");
+ asm volatile("xvxor.v $xr3, $xr9, $xr5");
+ }
+ /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+ asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+ asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+ /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+ asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+ asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $xr0, $xr1: wp
+ * $xr2, $xr3: wq
+ * $xr4, $xr5: wd
+ * $xr6, $xr7: w2
+ * $xr8, $xr9: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*2) {
+ /* P/Q data pages */
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("xvori.b $xr2, $xr0, 0");
+ asm volatile("xvori.b $xr3, $xr1, 0");
+ for (z = z0-1; z >= start; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("xvslti.b $xr6, $xr2, 0");
+ asm volatile("xvslti.b $xr7, $xr3, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("xvslli.b $xr8, $xr2, 1");
+ asm volatile("xvslli.b $xr9, $xr3, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+ asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("xvxor.v $xr8, $xr8, $xr6");
+ asm volatile("xvxor.v $xr9, $xr9, $xr7");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("xvxor.v $xr2, $xr8, $xr4");
+ asm volatile("xvxor.v $xr3, $xr9, $xr5");
+ }
+
+ /* P/Q left side optimization */
+ for (z = start-1; z >= 0; z--) {
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("xvslti.b $xr6, $xr2, 0");
+ asm volatile("xvslti.b $xr7, $xr3, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("xvslli.b $xr8, $xr2, 1");
+ asm volatile("xvslli.b $xr9, $xr3, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+ asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+ /* wq$$ = w1$$ ^ w2$$; */
+ asm volatile("xvxor.v $xr2, $xr8, $xr6");
+ asm volatile("xvxor.v $xr3, $xr9, $xr7");
+ }
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ */
+ asm volatile(
+ "xvld $xr10, %0\n\t"
+ "xvld $xr11, %1\n\t"
+ "xvld $xr12, %2\n\t"
+ "xvld $xr13, %3\n\t"
+ "xvxor.v $xr10, $xr10, $xr0\n\t"
+ "xvxor.v $xr11, $xr11, $xr1\n\t"
+ "xvxor.v $xr12, $xr12, $xr2\n\t"
+ "xvxor.v $xr13, $xr13, $xr3\n\t"
+ "xvst $xr10, %0\n\t"
+ "xvst $xr11, %1\n\t"
+ "xvst $xr12, %2\n\t"
+ "xvst $xr13, %3\n\t"
+ : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+ "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+ );
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+ raid6_lasx_gen_syndrome,
+ raid6_lasx_xor_syndrome,
+ raid6_has_lasx,
+ "lasx",
+ .priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c
new file mode 100644
index 000000000000..94aeac85e6f7
--- /dev/null
+++ b/lib/raid6/recov_loongarch_simd.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+ return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks - 2] = p;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /*
+ * vr20, vr21: qmul
+ * vr22, vr23: pbmul
+ */
+ asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+ asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+ asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+ asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+ while (bytes) {
+ /* vr4 - vr7: Q */
+ asm volatile("vld $vr4, %0" : : "m" (q[0]));
+ asm volatile("vld $vr5, %0" : : "m" (q[16]));
+ asm volatile("vld $vr6, %0" : : "m" (q[32]));
+ asm volatile("vld $vr7, %0" : : "m" (q[48]));
+ /* vr4 - vr7: Q + Qxy */
+ asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+ asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+ asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+ asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+ asm volatile("vxor.v $vr4, $vr4, $vr8");
+ asm volatile("vxor.v $vr5, $vr5, $vr9");
+ asm volatile("vxor.v $vr6, $vr6, $vr10");
+ asm volatile("vxor.v $vr7, $vr7, $vr11");
+ /* vr0 - vr3: P */
+ asm volatile("vld $vr0, %0" : : "m" (p[0]));
+ asm volatile("vld $vr1, %0" : : "m" (p[16]));
+ asm volatile("vld $vr2, %0" : : "m" (p[32]));
+ asm volatile("vld $vr3, %0" : : "m" (p[48]));
+ /* vr0 - vr3: P + Pxy */
+ asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+ asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+ asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+ asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+ asm volatile("vxor.v $vr0, $vr0, $vr8");
+ asm volatile("vxor.v $vr1, $vr1, $vr9");
+ asm volatile("vxor.v $vr2, $vr2, $vr10");
+ asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+ /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+ asm volatile("vsrli.b $vr8, $vr4, 4");
+ asm volatile("vsrli.b $vr9, $vr5, 4");
+ asm volatile("vsrli.b $vr10, $vr6, 4");
+ asm volatile("vsrli.b $vr11, $vr7, 4");
+ /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+ asm volatile("vandi.b $vr4, $vr4, 0x0f");
+ asm volatile("vandi.b $vr5, $vr5, 0x0f");
+ asm volatile("vandi.b $vr6, $vr6, 0x0f");
+ asm volatile("vandi.b $vr7, $vr7, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+ asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+ asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+ asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+ /* lookup from qmul[16] */
+ asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+ asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+ asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+ asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+ /* vr16 - vr19: B(Q + Qxy) */
+ asm volatile("vxor.v $vr16, $vr8, $vr4");
+ asm volatile("vxor.v $vr17, $vr9, $vr5");
+ asm volatile("vxor.v $vr18, $vr10, $vr6");
+ asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+ /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+ asm volatile("vsrli.b $vr4, $vr0, 4");
+ asm volatile("vsrli.b $vr5, $vr1, 4");
+ asm volatile("vsrli.b $vr6, $vr2, 4");
+ asm volatile("vsrli.b $vr7, $vr3, 4");
+ /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+ asm volatile("vandi.b $vr12, $vr0, 0x0f");
+ asm volatile("vandi.b $vr13, $vr1, 0x0f");
+ asm volatile("vandi.b $vr14, $vr2, 0x0f");
+ asm volatile("vandi.b $vr15, $vr3, 0x0f");
+ /* lookup from pbmul[0] */
+ asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+ asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+ asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+ asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+ /* lookup from pbmul[16] */
+ asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+ asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+ asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+ asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+ /* vr4 - vr7: A(P + Pxy) */
+ asm volatile("vxor.v $vr4, $vr4, $vr12");
+ asm volatile("vxor.v $vr5, $vr5, $vr13");
+ asm volatile("vxor.v $vr6, $vr6, $vr14");
+ asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+ /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+ asm volatile("vxor.v $vr4, $vr4, $vr16");
+ asm volatile("vxor.v $vr5, $vr5, $vr17");
+ asm volatile("vxor.v $vr6, $vr6, $vr18");
+ asm volatile("vxor.v $vr7, $vr7, $vr19");
+ asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+ asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+ asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+ asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+ /* vr0 - vr3: P + Pxy + Dx = Dy */
+ asm volatile("vxor.v $vr0, $vr0, $vr4");
+ asm volatile("vxor.v $vr1, $vr1, $vr5");
+ asm volatile("vxor.v $vr2, $vr2, $vr6");
+ asm volatile("vxor.v $vr3, $vr3, $vr7");
+ asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+ asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+ asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+ asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ /* vr22, vr23: qmul */
+ asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+ asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+ while (bytes) {
+ /* vr0 - vr3: P + Dx */
+ asm volatile("vld $vr0, %0" : : "m" (p[0]));
+ asm volatile("vld $vr1, %0" : : "m" (p[16]));
+ asm volatile("vld $vr2, %0" : : "m" (p[32]));
+ asm volatile("vld $vr3, %0" : : "m" (p[48]));
+ /* vr4 - vr7: Qx */
+ asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+ asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+ asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+ asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+ /* vr4 - vr7: Q + Qx */
+ asm volatile("vld $vr8, %0" : : "m" (q[0]));
+ asm volatile("vld $vr9, %0" : : "m" (q[16]));
+ asm volatile("vld $vr10, %0" : : "m" (q[32]));
+ asm volatile("vld $vr11, %0" : : "m" (q[48]));
+ asm volatile("vxor.v $vr4, $vr4, $vr8");
+ asm volatile("vxor.v $vr5, $vr5, $vr9");
+ asm volatile("vxor.v $vr6, $vr6, $vr10");
+ asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+ /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+ asm volatile("vsrli.b $vr8, $vr4, 4");
+ asm volatile("vsrli.b $vr9, $vr5, 4");
+ asm volatile("vsrli.b $vr10, $vr6, 4");
+ asm volatile("vsrli.b $vr11, $vr7, 4");
+ /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+ asm volatile("vandi.b $vr4, $vr4, 0x0f");
+ asm volatile("vandi.b $vr5, $vr5, 0x0f");
+ asm volatile("vandi.b $vr6, $vr6, 0x0f");
+ asm volatile("vandi.b $vr7, $vr7, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+ asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+ asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+ asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+ /* lookup from qmul[16] */
+ asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+ asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+ asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+ asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+ /* vr4 - vr7: qmul(Q + Qx) = Dx */
+ asm volatile("vxor.v $vr4, $vr4, $vr8");
+ asm volatile("vxor.v $vr5, $vr5, $vr9");
+ asm volatile("vxor.v $vr6, $vr6, $vr10");
+ asm volatile("vxor.v $vr7, $vr7, $vr11");
+ asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+ asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+ asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+ asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+ /* vr0 - vr3: P + Dx + Dx = P */
+ asm volatile("vxor.v $vr0, $vr0, $vr4");
+ asm volatile("vxor.v $vr1, $vr1, $vr5");
+ asm volatile("vxor.v $vr2, $vr2, $vr6");
+ asm volatile("vxor.v $vr3, $vr3, $vr7");
+ asm volatile("vst $vr0, %0" : "=m" (p[0]));
+ asm volatile("vst $vr1, %0" : "=m" (p[16]));
+ asm volatile("vst $vr2, %0" : "=m" (p[32]));
+ asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+ .data2 = raid6_2data_recov_lsx,
+ .datap = raid6_datap_recov_lsx,
+ .valid = raid6_has_lsx,
+ .name = "lsx",
+ .priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+ return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks - 2] = p;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /*
+ * xr20, xr21: qmul
+ * xr22, xr23: pbmul
+ */
+ asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+ asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+ asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+ asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+ asm volatile("xvreplve0.q $xr20, $xr20");
+ asm volatile("xvreplve0.q $xr21, $xr21");
+ asm volatile("xvreplve0.q $xr22, $xr22");
+ asm volatile("xvreplve0.q $xr23, $xr23");
+
+ while (bytes) {
+ /* xr0, xr1: Q */
+ asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+ asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+ /* xr0, xr1: Q + Qxy */
+ asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+ asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+ /* xr2, xr3: P */
+ asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+ asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+ /* xr2, xr3: P + Pxy */
+ asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+ asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+ asm volatile("xvxor.v $xr2, $xr2, $xr4");
+ asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+ /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+ asm volatile("xvsrli.b $xr4, $xr0, 4");
+ asm volatile("xvsrli.b $xr5, $xr1, 4");
+ /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+ asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+ asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+ asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+ /* lookup from qmul[16] */
+ asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+ asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+ /* xr6, xr7: B(Q + Qxy) */
+ asm volatile("xvxor.v $xr6, $xr4, $xr0");
+ asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+ /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+ asm volatile("xvsrli.b $xr4, $xr2, 4");
+ asm volatile("xvsrli.b $xr5, $xr3, 4");
+ /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+ asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+ asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+ /* lookup from pbmul[0] */
+ asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+ asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+ /* lookup from pbmul[16] */
+ asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+ asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+ /* xr0, xr1: A(P + Pxy) */
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+ /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+ asm volatile("xvxor.v $xr0, $xr0, $xr6");
+ asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+ /* xr2, xr3: P + Pxy + Dx = Dy */
+ asm volatile("xvxor.v $xr2, $xr2, $xr0");
+ asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+ asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+ asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+ asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+ asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ /* xr22, xr23: qmul */
+ asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+ asm volatile("xvreplve0.q $xr22, $xr22");
+ asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+ asm volatile("xvreplve0.q $xr23, $xr23");
+
+ while (bytes) {
+ /* xr0, xr1: P + Dx */
+ asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+ asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+ /* xr2, xr3: Qx */
+ asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+ asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+ /* xr2, xr3: Q + Qx */
+ asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+ asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+ asm volatile("xvxor.v $xr2, $xr2, $xr4");
+ asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+ /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+ asm volatile("xvsrli.b $xr4, $xr2, 4");
+ asm volatile("xvsrli.b $xr5, $xr3, 4");
+ /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+ asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+ asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+ asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+ /* lookup from qmul[16] */
+ asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+ asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+ /* xr2, xr3: qmul(Q + Qx) = Dx */
+ asm volatile("xvxor.v $xr2, $xr2, $xr4");
+ asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+ /* xr0, xr1: P + Dx + Dx = P */
+ asm volatile("xvxor.v $xr0, $xr0, $xr2");
+ asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+ asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+ asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+ asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+ asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+ .data2 = raid6_2data_recov_lasx,
+ .datap = raid6_datap_recov_lasx,
+ .valid = raid6_has_lasx,
+ .name = "lasx",
+ .priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 1f693ea3b980..2abe0076a636 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
endif
+ifeq ($(ARCH),loongarch64)
+ CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+ CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \
+ gcc -c -x assembler - >/dev/null 2>&1 && \
+ rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+ CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \
+ gcc -c -x assembler - >/dev/null 2>&1 && \
+ rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
ifeq ($(IS_X86),yes)
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
CFLAGS += -DCONFIG_X86
@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
CFLAGS += -DCONFIG_ALTIVEC
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+ OBJS += loongarch_simd.o recov_loongarch_simd.o
endif
.c.o:
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index dcfec277e839..89895f38f722 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
return 0;
}
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end)
{
@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
if (!p)
return -ENOMEM;
} else {
- pud_populate(&init_mm, pud,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pmd_init(p);
+ pud_populate(&init_mm, pud, p);
}
}
zero_pmd_populate(pud, addr, next);
@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
return 0;
}
+void __weak __meminit pud_init(void *addr)
+{
+}
+
static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
if (!p)
return -ENOMEM;
} else {
- p4d_populate(&init_mm, p4d,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pud_init(p);
+ p4d_populate(&init_mm, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 2e973b36fe07..f70e3d7a602e 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -291,16 +291,22 @@ struct kasan_stack_ring {
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+#ifndef __HAVE_ARCH_SHADOW_MAP
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
<< KASAN_SHADOW_SCALE_SHIFT);
}
+#endif
static __always_inline bool addr_has_metadata(const void *addr)
{
+#ifdef __HAVE_ARCH_SHADOW_MAP
+ return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
return (kasan_reset_tag(addr) >=
kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
}
/**
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 96fd0411f5c5..3872528d0963 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
*/
static unsigned long kfence_init_pool(void)
{
- unsigned long addr = (unsigned long)__kfence_pool;
+ unsigned long addr;
struct page *pages;
int i;
if (!arch_kfence_init_pool())
- return addr;
+ return (unsigned long)__kfence_pool;
+ addr = (unsigned long)__kfence_pool;
pages = virt_to_page(__kfence_pool);
/*