提交 f1dabf0b 编写于 作者: R Rafael J. Wysocki

Merge branches 'acpi-scan', 'acpi-sysfs', 'acpi-wdat' and 'acpi-tables'

* acpi-scan:
  ACPI / scan: Prefer devices without _HID/_CID for _ADR matching
  ACPI: Drop misplaced acpi_dma_deconfigure() call from acpi_bind_one()

* acpi-sysfs:
  ACPI / sysfs: Provide quirk mechanism to prevent GPE flooding

* acpi-wdat:
  ACPI / watchdog: Print out error number when device creation fails

* acpi-tables:
  ACPI / DMAR: Avoid passing NULL to acpi_put_table()
...@@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml \ ...@@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml \
kernel-api.xml filesystems.xml lsm.xml kgdb.xml \ kernel-api.xml filesystems.xml lsm.xml kgdb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
80211.xml sh.xml regulator.xml w1.xml \ sh.xml regulator.xml w1.xml \
writing_musb_glue_layer.xml iio.xml writing_musb_glue_layer.xml iio.xml
ifeq ($(DOCBOOKS),) ifeq ($(DOCBOOKS),)
......
...@@ -106,6 +106,16 @@ ...@@ -106,6 +106,16 @@
use by PCI use by PCI
Format: <irq>,<irq>... Format: <irq>,<irq>...
acpi_mask_gpe= [HW,ACPI]
Due to the existence of _Lxx/_Exx, some GPEs triggered
by unsupported hardware/firmware features can result in
GPE floodings that cannot be automatically disabled by
the GPE dispatcher.
This facility can be used to prevent such uncontrolled
GPE floodings.
Format: <int>
Support masking of GPEs numbered from 0x00 to 0x7f.
acpi_no_auto_serialize [HW,ACPI] acpi_no_auto_serialize [HW,ACPI]
Disable auto-serialization of AML methods Disable auto-serialization of AML methods
AML control methods that contain the opcodes to create AML control methods that contain the opcodes to create
......
...@@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2) ...@@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
#else #else
const u16 *a = (const u16 *)addr1; const u16 *a = (const u16 *)addr1;
const u16 *b = (const u16 *)addr2; const u16 *b = (const u16 *)addr2;
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
#endif #endif
} }
......
VERSION = 4 VERSION = 4
PATCHLEVEL = 10 PATCHLEVEL = 10
SUBLEVEL = 0 SUBLEVEL = 0
EXTRAVERSION = -rc1 EXTRAVERSION = -rc2
NAME = Roaring Lionus NAME = Roaring Lionus
# *DOCUMENTATION* # *DOCUMENTATION*
......
#ifndef __ASM_ASM_UACCESS_H
#define __ASM_ASM_UACCESS_H
#include <asm/alternative.h>
#include <asm/kernel-pgtable.h>
#include <asm/sysreg.h>
#include <asm/assembler.h>
/*
* User access enabling/disabling macros.
*/
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
.endm
.macro __uaccess_ttbr0_enable, tmp1
get_thread_info \tmp1
ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
.macro uaccess_ttbr0_disable, tmp1
alternative_if_not ARM64_HAS_PAN
__uaccess_ttbr0_disable \tmp1
alternative_else_nop_endif
.endm
.macro uaccess_ttbr0_enable, tmp1, tmp2
alternative_if_not ARM64_HAS_PAN
save_and_disable_irq \tmp2 // avoid preemption
__uaccess_ttbr0_enable \tmp1
restore_irq \tmp2
alternative_else_nop_endif
.endm
#else
.macro uaccess_ttbr0_disable, tmp1
.endm
.macro uaccess_ttbr0_enable, tmp1, tmp2
.endm
#endif
/*
* These macros are no-ops when UAO is present.
*/
.macro uaccess_disable_not_uao, tmp1
uaccess_ttbr0_disable \tmp1
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(1)
alternative_else_nop_endif
.endm
.macro uaccess_enable_not_uao, tmp1, tmp2
uaccess_ttbr0_enable \tmp1, \tmp2
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(0)
alternative_else_nop_endif
.endm
#endif
...@@ -22,8 +22,6 @@ ...@@ -22,8 +22,6 @@
#include <asm/kernel-pgtable.h> #include <asm/kernel-pgtable.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
#ifndef __ASSEMBLY__
/* /*
* User space memory access functions * User space memory access functions
*/ */
...@@ -424,66 +422,4 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); ...@@ -424,66 +422,4 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strlen_user(const char __user *str); extern __must_check long strlen_user(const char __user *str);
extern __must_check long strnlen_user(const char __user *str, long n); extern __must_check long strnlen_user(const char __user *str, long n);
#else /* __ASSEMBLY__ */
#include <asm/assembler.h>
/*
* User access enabling/disabling macros.
*/
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
.endm
.macro __uaccess_ttbr0_enable, tmp1
get_thread_info \tmp1
ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
.macro uaccess_ttbr0_disable, tmp1
alternative_if_not ARM64_HAS_PAN
__uaccess_ttbr0_disable \tmp1
alternative_else_nop_endif
.endm
.macro uaccess_ttbr0_enable, tmp1, tmp2
alternative_if_not ARM64_HAS_PAN
save_and_disable_irq \tmp2 // avoid preemption
__uaccess_ttbr0_enable \tmp1
restore_irq \tmp2
alternative_else_nop_endif
.endm
#else
.macro uaccess_ttbr0_disable, tmp1
.endm
.macro uaccess_ttbr0_enable, tmp1, tmp2
.endm
#endif
/*
* These macros are no-ops when UAO is present.
*/
.macro uaccess_disable_not_uao, tmp1
uaccess_ttbr0_disable \tmp1
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(1)
alternative_else_nop_endif
.endm
.macro uaccess_enable_not_uao, tmp1, tmp2
uaccess_ttbr0_enable \tmp1, \tmp2
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(0)
alternative_else_nop_endif
.endm
#endif /* __ASSEMBLY__ */
#endif /* __ASM_UACCESS_H */ #endif /* __ASM_UACCESS_H */
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
#include <asm/unistd.h> #include <asm/unistd.h>
/* /*
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
.text .text
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
/* /*
* Copy from user space to a kernel buffer (alignment handled by the hardware) * Copy from user space to a kernel buffer (alignment handled by the hardware)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
/* /*
* Copy from user space to user space (alignment handled by the hardware) * Copy from user space to user space (alignment handled by the hardware)
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
/* /*
* Copy to user space from a kernel buffer (alignment handled by the hardware) * Copy to user space from a kernel buffer (alignment handled by the hardware)
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include <asm/assembler.h> #include <asm/assembler.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/alternative.h> #include <asm/alternative.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
/* /*
* flush_icache_range(start,end) * flush_icache_range(start,end)
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/assembler.h> #include <asm/assembler.h>
#include <linux/uaccess.h> #include <asm/asm-uaccess.h>
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
......
...@@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) ...@@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
} }
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
CC_SET(s)
: CC_OUT(s) (negative), ADDR
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
// Let everybody know we have it
#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
/* /*
* __clear_bit_unlock - Clears a bit in memory * __clear_bit_unlock - Clears a bit in memory
* @nr: Bit to clear * @nr: Bit to clear
......
...@@ -1182,6 +1182,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) ...@@ -1182,6 +1182,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
const char *name = get_name(bank, NULL); const char *name = get_name(bank, NULL);
int err = 0; int err = 0;
if (!dev)
return -ENODEV;
if (is_shared_bank(bank)) { if (is_shared_bank(bank)) {
nb = node_to_amd_nb(amd_get_nb_id(cpu)); nb = node_to_amd_nb(amd_get_nb_id(cpu));
......
...@@ -1461,16 +1461,25 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1461,16 +1461,25 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
for (i = 0; i < ctcount; i++) { for (i = 0; i < ctcount; i++) {
unsigned int dlen = COMP_BUF_SIZE; unsigned int dlen = COMP_BUF_SIZE;
int ilen = ctemplate[i].inlen; int ilen = ctemplate[i].inlen;
void *input_vec;
input_vec = kmalloc(ilen, GFP_KERNEL);
if (!input_vec) {
ret = -ENOMEM;
goto out;
}
memcpy(input_vec, ctemplate[i].input, ilen);
memset(output, 0, dlen); memset(output, 0, dlen);
init_completion(&result.completion); init_completion(&result.completion);
sg_init_one(&src, ctemplate[i].input, ilen); sg_init_one(&src, input_vec, ilen);
sg_init_one(&dst, output, dlen); sg_init_one(&dst, output, dlen);
req = acomp_request_alloc(tfm); req = acomp_request_alloc(tfm);
if (!req) { if (!req) {
pr_err("alg: acomp: request alloc failed for %s\n", pr_err("alg: acomp: request alloc failed for %s\n",
algo); algo);
kfree(input_vec);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
...@@ -1483,6 +1492,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1483,6 +1492,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
if (ret) { if (ret) {
pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
i + 1, algo, -ret); i + 1, algo, -ret);
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
goto out; goto out;
} }
...@@ -1491,6 +1501,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1491,6 +1501,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n",
i + 1, algo, req->dlen); i + 1, algo, req->dlen);
ret = -EINVAL; ret = -EINVAL;
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
goto out; goto out;
} }
...@@ -1500,26 +1511,37 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1500,26 +1511,37 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
i + 1, algo); i + 1, algo);
hexdump(output, req->dlen); hexdump(output, req->dlen);
ret = -EINVAL; ret = -EINVAL;
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
goto out; goto out;
} }
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
} }
for (i = 0; i < dtcount; i++) { for (i = 0; i < dtcount; i++) {
unsigned int dlen = COMP_BUF_SIZE; unsigned int dlen = COMP_BUF_SIZE;
int ilen = dtemplate[i].inlen; int ilen = dtemplate[i].inlen;
void *input_vec;
input_vec = kmalloc(ilen, GFP_KERNEL);
if (!input_vec) {
ret = -ENOMEM;
goto out;
}
memcpy(input_vec, dtemplate[i].input, ilen);
memset(output, 0, dlen); memset(output, 0, dlen);
init_completion(&result.completion); init_completion(&result.completion);
sg_init_one(&src, dtemplate[i].input, ilen); sg_init_one(&src, input_vec, ilen);
sg_init_one(&dst, output, dlen); sg_init_one(&dst, output, dlen);
req = acomp_request_alloc(tfm); req = acomp_request_alloc(tfm);
if (!req) { if (!req) {
pr_err("alg: acomp: request alloc failed for %s\n", pr_err("alg: acomp: request alloc failed for %s\n",
algo); algo);
kfree(input_vec);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
...@@ -1532,6 +1554,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1532,6 +1554,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
if (ret) { if (ret) {
pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n", pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n",
i + 1, algo, -ret); i + 1, algo, -ret);
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
goto out; goto out;
} }
...@@ -1540,6 +1563,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1540,6 +1563,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n", pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n",
i + 1, algo, req->dlen); i + 1, algo, req->dlen);
ret = -EINVAL; ret = -EINVAL;
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
goto out; goto out;
} }
...@@ -1549,10 +1573,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, ...@@ -1549,10 +1573,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
i + 1, algo); i + 1, algo);
hexdump(output, req->dlen); hexdump(output, req->dlen);
ret = -EINVAL; ret = -EINVAL;
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
goto out; goto out;
} }
kfree(input_vec);
acomp_request_free(req); acomp_request_free(req);
} }
......
...@@ -114,7 +114,7 @@ void __init acpi_watchdog_init(void) ...@@ -114,7 +114,7 @@ void __init acpi_watchdog_init(void)
pdev = platform_device_register_simple("wdat_wdt", PLATFORM_DEVID_NONE, pdev = platform_device_register_simple("wdat_wdt", PLATFORM_DEVID_NONE,
resources, nresources); resources, nresources);
if (IS_ERR(pdev)) if (IS_ERR(pdev))
pr_err("Failed to create platform device\n"); pr_err("Device creation failed: %ld\n", PTR_ERR(pdev));
kfree(resources); kfree(resources);
......
...@@ -37,6 +37,7 @@ void acpi_amba_init(void); ...@@ -37,6 +37,7 @@ void acpi_amba_init(void);
static inline void acpi_amba_init(void) {} static inline void acpi_amba_init(void) {}
#endif #endif
int acpi_sysfs_init(void); int acpi_sysfs_init(void);
void acpi_gpe_apply_masked_gpes(void);
void acpi_container_init(void); void acpi_container_init(void);
void acpi_memory_hotplug_init(void); void acpi_memory_hotplug_init(void);
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
......
...@@ -2074,6 +2074,7 @@ int __init acpi_scan_init(void) ...@@ -2074,6 +2074,7 @@ int __init acpi_scan_init(void)
} }
} }
acpi_gpe_apply_masked_gpes();
acpi_update_all_gpes(); acpi_update_all_gpes();
acpi_ec_ecdt_start(); acpi_ec_ecdt_start();
......
...@@ -708,6 +708,62 @@ static ssize_t counter_set(struct kobject *kobj, ...@@ -708,6 +708,62 @@ static ssize_t counter_set(struct kobject *kobj,
return result ? result : size; return result ? result : size;
} }
/*
* A Quirk Mechanism for GPE Flooding Prevention:
*
* Quirks may be needed to prevent GPE flooding on a specific GPE. The
* flooding typically cannot be detected and automatically prevented by
* ACPI_GPE_DISPATCH_NONE check because there is a _Lxx/_Exx prepared in
* the AML tables. This normally indicates a feature gap in Linux, thus
* instead of providing endless quirk tables, we provide a boot parameter
* for those who want this quirk. For example, if the users want to prevent
* the GPE flooding for GPE 00, they need to specify the following boot
* parameter:
* acpi_mask_gpe=0x00
* The masking status can be modified by the following runtime controlling
* interface:
* echo unmask > /sys/firmware/acpi/interrupts/gpe00
*/
/*
* Currently, the GPE flooding prevention only supports to mask the GPEs
* numbered from 00 to 7f.
*/
#define ACPI_MASKABLE_GPE_MAX 0x80
static u64 __initdata acpi_masked_gpes;
static int __init acpi_gpe_set_masked_gpes(char *val)
{
u8 gpe;
if (kstrtou8(val, 0, &gpe) || gpe > ACPI_MASKABLE_GPE_MAX)
return -EINVAL;
acpi_masked_gpes |= ((u64)1<<gpe);
return 1;
}
__setup("acpi_mask_gpe=", acpi_gpe_set_masked_gpes);
void __init acpi_gpe_apply_masked_gpes(void)
{
acpi_handle handle;
acpi_status status;
u8 gpe;
for (gpe = 0;
gpe < min_t(u8, ACPI_MASKABLE_GPE_MAX, acpi_current_gpe_count);
gpe++) {
if (acpi_masked_gpes & ((u64)1<<gpe)) {
status = acpi_get_gpe_device(gpe, &handle);
if (ACPI_SUCCESS(status)) {
pr_info("Masking GPE 0x%x.\n", gpe);
(void)acpi_mask_gpe(handle, gpe, TRUE);
}
}
}
}
void acpi_irq_stats_init(void) void acpi_irq_stats_init(void)
{ {
acpi_status status; acpi_status status;
......
...@@ -273,7 +273,8 @@ struct mv_cesa_op_ctx { ...@@ -273,7 +273,8 @@ struct mv_cesa_op_ctx {
#define CESA_TDMA_SRC_IN_SRAM BIT(30) #define CESA_TDMA_SRC_IN_SRAM BIT(30)
#define CESA_TDMA_END_OF_REQ BIT(29) #define CESA_TDMA_END_OF_REQ BIT(29)
#define CESA_TDMA_BREAK_CHAIN BIT(28) #define CESA_TDMA_BREAK_CHAIN BIT(28)
#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) #define CESA_TDMA_SET_STATE BIT(27)
#define CESA_TDMA_TYPE_MSK GENMASK(26, 0)
#define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DUMMY 0
#define CESA_TDMA_DATA 1 #define CESA_TDMA_DATA 1
#define CESA_TDMA_OP 2 #define CESA_TDMA_OP 2
......
...@@ -280,13 +280,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) ...@@ -280,13 +280,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
sreq->offset = 0; sreq->offset = 0;
} }
static void mv_cesa_ahash_dma_step(struct ahash_request *req)
{
struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
struct mv_cesa_req *base = &creq->base;
/* We must explicitly set the digest state. */
if (base->chain.first->flags & CESA_TDMA_SET_STATE) {
struct mv_cesa_engine *engine = base->engine;
int i;
/* Set the hash state in the IVDIG regs. */
for (i = 0; i < ARRAY_SIZE(creq->state); i++)
writel_relaxed(creq->state[i], engine->regs +
CESA_IVDIG(i));
}
mv_cesa_dma_step(base);
}
static void mv_cesa_ahash_step(struct crypto_async_request *req) static void mv_cesa_ahash_step(struct crypto_async_request *req)
{ {
struct ahash_request *ahashreq = ahash_request_cast(req); struct ahash_request *ahashreq = ahash_request_cast(req);
struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
mv_cesa_dma_step(&creq->base); mv_cesa_ahash_dma_step(ahashreq);
else else
mv_cesa_ahash_std_step(ahashreq); mv_cesa_ahash_std_step(ahashreq);
} }
...@@ -584,12 +603,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) ...@@ -584,12 +603,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_ahash_dma_iter iter;
struct mv_cesa_op_ctx *op = NULL; struct mv_cesa_op_ctx *op = NULL;
unsigned int frag_len; unsigned int frag_len;
bool set_state = false;
int ret; int ret;
u32 type; u32 type;
basereq->chain.first = NULL; basereq->chain.first = NULL;
basereq->chain.last = NULL; basereq->chain.last = NULL;
if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl))
set_state = true;
if (creq->src_nents) { if (creq->src_nents) {
ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
DMA_TO_DEVICE); DMA_TO_DEVICE);
...@@ -683,6 +706,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) ...@@ -683,6 +706,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
if (type != CESA_TDMA_RESULT) if (type != CESA_TDMA_RESULT)
basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
if (set_state) {
/*
* Put the CESA_TDMA_SET_STATE flag on the first tdma desc to
* let the step logic know that the IVDIG registers should be
* explicitly set before launching a TDMA chain.
*/
basereq->chain.first->flags |= CESA_TDMA_SET_STATE;
}
return 0; return 0;
err_free_tdma: err_free_tdma:
......
...@@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, ...@@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
last->next = dreq->chain.first; last->next = dreq->chain.first;
engine->chain.last = dreq->chain.last; engine->chain.last = dreq->chain.last;
if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) /*
* Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on
* the last element of the current chain, or if the request
* being queued needs the IV regs to be set before lauching
* the request.
*/
if (!(last->flags & CESA_TDMA_BREAK_CHAIN) &&
!(dreq->chain.first->flags & CESA_TDMA_SET_STATE))
last->next_dma = dreq->chain.first->cur_dma; last->next_dma = dreq->chain.first->cur_dma;
} }
} }
......
...@@ -903,8 +903,10 @@ int __init detect_intel_iommu(void) ...@@ -903,8 +903,10 @@ int __init detect_intel_iommu(void)
x86_init.iommu.iommu_init = intel_iommu_init; x86_init.iommu.iommu_init = intel_iommu_init;
#endif #endif
acpi_put_table(dmar_tbl); if (dmar_tbl) {
dmar_tbl = NULL; acpi_put_table(dmar_tbl);
dmar_tbl = NULL;
}
up_write(&dmar_global_lock); up_write(&dmar_global_lock);
return ret ? 1 : -ENODEV; return ret ? 1 : -ENODEV;
......
...@@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work) ...@@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work)
DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR,
&lp->rx_dma_regs->dmasm); &lp->rx_dma_regs->dmasm);
korina_free_ring(dev);
napi_disable(&lp->napi); napi_disable(&lp->napi);
korina_free_ring(dev);
if (korina_init(dev) < 0) { if (korina_init(dev) < 0) {
printk(KERN_ERR "%s: cannot restart device\n", dev->name); printk(KERN_ERR "%s: cannot restart device\n", dev->name);
return; return;
...@@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev) ...@@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev)
tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR;
writel(tmp, &lp->rx_dma_regs->dmasm); writel(tmp, &lp->rx_dma_regs->dmasm);
korina_free_ring(dev);
napi_disable(&lp->napi); napi_disable(&lp->napi);
cancel_work_sync(&lp->restart_task); cancel_work_sync(&lp->restart_task);
korina_free_ring(dev);
free_irq(lp->rx_irq, dev); free_irq(lp->rx_irq, dev);
free_irq(lp->tx_irq, dev); free_irq(lp->tx_irq, dev);
free_irq(lp->ovr_irq, dev); free_irq(lp->ovr_irq, dev);
......
...@@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev) ...@@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev)
/* Configure tx cq's and rings */ /* Configure tx cq's and rings */
for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; u8 num_tx_rings_p_up = t == TX ?
priv->num_tx_rings_p_up : priv->tx_ring_num[t];
for (i = 0; i < priv->tx_ring_num[t]; i++) { for (i = 0; i < priv->tx_ring_num[t]; i++) {
/* Configure cq */ /* Configure cq */
......
...@@ -326,6 +326,7 @@ enum cfg_version { ...@@ -326,6 +326,7 @@ enum cfg_version {
static const struct pci_device_id rtl8169_pci_tbl[] = { static const struct pci_device_id rtl8169_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 },
......
...@@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, ...@@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_address = priv->hw->mii.addr;
unsigned int mii_data = priv->hw->mii.data; unsigned int mii_data = priv->hw->mii.data;
u32 value = MII_WRITE | MII_BUSY; u32 value = MII_BUSY;
value |= (phyaddr << priv->hw->mii.addr_shift) value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask; & priv->hw->mii.addr_mask;
...@@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, ...@@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
& priv->hw->mii.clk_csr_mask; & priv->hw->mii.clk_csr_mask;
if (priv->plat->has_gmac4) if (priv->plat->has_gmac4)
value |= MII_GMAC4_WRITE; value |= MII_GMAC4_WRITE;
else
value |= MII_WRITE;
/* Wait until any existing MII operation is complete */ /* Wait until any existing MII operation is complete */
if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
......
...@@ -99,6 +99,11 @@ struct ipvl_port { ...@@ -99,6 +99,11 @@ struct ipvl_port {
int count; int count;
}; };
struct ipvl_skb_cb {
bool tx_pkt;
};
#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0]))
static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
{ {
return rcu_dereference(d->rx_handler_data); return rcu_dereference(d->rx_handler_data);
......
...@@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work) ...@@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work)
unsigned int mac_hash; unsigned int mac_hash;
int ret; int ret;
u8 pkt_type; u8 pkt_type;
bool hlocal, dlocal; bool tx_pkt;
__skb_queue_head_init(&list); __skb_queue_head_init(&list);
...@@ -207,8 +207,11 @@ void ipvlan_process_multicast(struct work_struct *work) ...@@ -207,8 +207,11 @@ void ipvlan_process_multicast(struct work_struct *work)
spin_unlock_bh(&port->backlog.lock); spin_unlock_bh(&port->backlog.lock);
while ((skb = __skb_dequeue(&list)) != NULL) { while ((skb = __skb_dequeue(&list)) != NULL) {
struct net_device *dev = skb->dev;
bool consumed = false;
ethh = eth_hdr(skb); ethh = eth_hdr(skb);
hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
mac_hash = ipvlan_mac_hash(ethh->h_dest); mac_hash = ipvlan_mac_hash(ethh->h_dest);
if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
...@@ -216,41 +219,45 @@ void ipvlan_process_multicast(struct work_struct *work) ...@@ -216,41 +219,45 @@ void ipvlan_process_multicast(struct work_struct *work)
else else
pkt_type = PACKET_MULTICAST; pkt_type = PACKET_MULTICAST;
dlocal = false;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
if (hlocal && (ipvlan->dev == skb->dev)) { if (tx_pkt && (ipvlan->dev == skb->dev))
dlocal = true;
continue; continue;
}
if (!test_bit(mac_hash, ipvlan->mac_filters)) if (!test_bit(mac_hash, ipvlan->mac_filters))
continue; continue;
if (!(ipvlan->dev->flags & IFF_UP))
continue;
ret = NET_RX_DROP; ret = NET_RX_DROP;
len = skb->len + ETH_HLEN; len = skb->len + ETH_HLEN;
nskb = skb_clone(skb, GFP_ATOMIC); nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb) local_bh_disable();
goto acct; if (nskb) {
consumed = true;
nskb->pkt_type = pkt_type; nskb->pkt_type = pkt_type;
nskb->dev = ipvlan->dev; nskb->dev = ipvlan->dev;
if (hlocal) if (tx_pkt)
ret = dev_forward_skb(ipvlan->dev, nskb); ret = dev_forward_skb(ipvlan->dev, nskb);
else else
ret = netif_rx(nskb); ret = netif_rx(nskb);
acct: }
ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
local_bh_enable();
} }
rcu_read_unlock(); rcu_read_unlock();
if (dlocal) { if (tx_pkt) {
/* If the packet originated here, send it out. */ /* If the packet originated here, send it out. */
skb->dev = port->dev; skb->dev = port->dev;
skb->pkt_type = pkt_type; skb->pkt_type = pkt_type;
dev_queue_xmit(skb); dev_queue_xmit(skb);
} else { } else {
kfree_skb(skb); if (consumed)
consume_skb(skb);
else
kfree_skb(skb);
} }
if (dev)
dev_put(dev);
} }
} }
...@@ -470,15 +477,24 @@ static int ipvlan_process_outbound(struct sk_buff *skb) ...@@ -470,15 +477,24 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
} }
static void ipvlan_multicast_enqueue(struct ipvl_port *port, static void ipvlan_multicast_enqueue(struct ipvl_port *port,
struct sk_buff *skb) struct sk_buff *skb, bool tx_pkt)
{ {
if (skb->protocol == htons(ETH_P_PAUSE)) { if (skb->protocol == htons(ETH_P_PAUSE)) {
kfree_skb(skb); kfree_skb(skb);
return; return;
} }
/* Record that the deferred packet is from TX or RX path. By
* looking at mac-addresses on packet will lead to erronus decisions.
* (This would be true for a loopback-mode on master device or a
* hair-pin mode of the switch.)
*/
IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
spin_lock(&port->backlog.lock); spin_lock(&port->backlog.lock);
if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
if (skb->dev)
dev_hold(skb->dev);
__skb_queue_tail(&port->backlog, skb); __skb_queue_tail(&port->backlog, skb);
spin_unlock(&port->backlog.lock); spin_unlock(&port->backlog.lock);
schedule_work(&port->wq); schedule_work(&port->wq);
...@@ -537,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) ...@@ -537,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
} else if (is_multicast_ether_addr(eth->h_dest)) { } else if (is_multicast_ether_addr(eth->h_dest)) {
ipvlan_skb_crossing_ns(skb, NULL); ipvlan_skb_crossing_ns(skb, NULL);
ipvlan_multicast_enqueue(ipvlan->port, skb); ipvlan_multicast_enqueue(ipvlan->port, skb, true);
return NET_XMIT_SUCCESS; return NET_XMIT_SUCCESS;
} }
...@@ -634,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, ...@@ -634,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
*/ */
if (nskb) { if (nskb) {
ipvlan_skb_crossing_ns(nskb, NULL); ipvlan_skb_crossing_ns(nskb, NULL);
ipvlan_multicast_enqueue(port, nskb); ipvlan_multicast_enqueue(port, nskb, false);
} }
} }
} else { } else {
......
...@@ -135,6 +135,7 @@ static int ipvlan_port_create(struct net_device *dev) ...@@ -135,6 +135,7 @@ static int ipvlan_port_create(struct net_device *dev)
static void ipvlan_port_destroy(struct net_device *dev) static void ipvlan_port_destroy(struct net_device *dev)
{ {
struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
struct sk_buff *skb;
dev->priv_flags &= ~IFF_IPVLAN_MASTER; dev->priv_flags &= ~IFF_IPVLAN_MASTER;
if (port->mode == IPVLAN_MODE_L3S) { if (port->mode == IPVLAN_MODE_L3S) {
...@@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev) ...@@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
} }
netdev_rx_handler_unregister(dev); netdev_rx_handler_unregister(dev);
cancel_work_sync(&port->wq); cancel_work_sync(&port->wq);
__skb_queue_purge(&port->backlog); while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
if (skb->dev)
dev_put(skb->dev);
kfree_skb(skb);
}
kfree(port); kfree(port);
} }
......
...@@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, ...@@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
} }
static int __dax_invalidate_mapping_entry(struct address_space *mapping,
pgoff_t index, bool trunc)
{
int ret = 0;
void *entry;
struct radix_tree_root *page_tree = &mapping->page_tree;
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, NULL);
if (!entry || !radix_tree_exceptional_entry(entry))
goto out;
if (!trunc &&
(radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
goto out;
radix_tree_delete(page_tree, index);
mapping->nrexceptional--;
ret = 1;
out:
put_unlocked_mapping_entry(mapping, index, entry);
spin_unlock_irq(&mapping->tree_lock);
return ret;
}
/* /*
* Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
* entry to get unlocked before deleting it. * entry to get unlocked before deleting it.
*/ */
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
{ {
void *entry; int ret = __dax_invalidate_mapping_entry(mapping, index, true);
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, NULL);
/* /*
* This gets called from truncate / punch_hole path. As such, the caller * This gets called from truncate / punch_hole path. As such, the caller
* must hold locks protecting against concurrent modifications of the * must hold locks protecting against concurrent modifications of the
...@@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) ...@@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
* caller has seen exceptional entry for this index, we better find it * caller has seen exceptional entry for this index, we better find it
* at that index as well... * at that index as well...
*/ */
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) { WARN_ON_ONCE(!ret);
spin_unlock_irq(&mapping->tree_lock); return ret;
return 0; }
}
radix_tree_delete(&mapping->page_tree, index); /*
* Invalidate exceptional DAX entry if easily possible. This handles DAX
* entries for invalidate_inode_pages() so we evict the entry only if we can
* do so without blocking.
*/
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
{
int ret = 0;
void *entry, **slot;
struct radix_tree_root *page_tree = &mapping->page_tree;
spin_lock_irq(&mapping->tree_lock);
entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
if (!entry || !radix_tree_exceptional_entry(entry) ||
slot_locked(mapping, slot))
goto out;
if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
goto out;
radix_tree_delete(page_tree, index);
mapping->nrexceptional--; mapping->nrexceptional--;
ret = 1;
out:
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
dax_wake_mapping_entry_waiter(mapping, index, entry, true); if (ret)
dax_wake_mapping_entry_waiter(mapping, index, entry, true);
return ret;
}
return 1; /*
* Invalidate exceptional DAX entry if it is clean.
*/
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index)
{
return __dax_invalidate_mapping_entry(mapping, index, false);
} }
/* /*
...@@ -488,15 +539,16 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) ...@@ -488,15 +539,16 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
* otherwise it will simply fall out of the page cache under memory * otherwise it will simply fall out of the page cache under memory
* pressure without ever having been dirtied. * pressure without ever having been dirtied.
*/ */
static int dax_load_hole(struct address_space *mapping, void *entry, static int dax_load_hole(struct address_space *mapping, void **entry,
struct vm_fault *vmf) struct vm_fault *vmf)
{ {
struct page *page; struct page *page;
int ret;
/* Hole page already exists? Return it... */ /* Hole page already exists? Return it... */
if (!radix_tree_exceptional_entry(entry)) { if (!radix_tree_exceptional_entry(*entry)) {
vmf->page = entry; page = *entry;
return VM_FAULT_LOCKED; goto out;
} }
/* This will replace locked radix tree entry with a hole page */ /* This will replace locked radix tree entry with a hole page */
...@@ -504,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry, ...@@ -504,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
vmf->gfp_mask | __GFP_ZERO); vmf->gfp_mask | __GFP_ZERO);
if (!page) if (!page)
return VM_FAULT_OOM; return VM_FAULT_OOM;
out:
vmf->page = page; vmf->page = page;
return VM_FAULT_LOCKED; ret = finish_fault(vmf);
vmf->page = NULL;
*entry = page;
if (!ret) {
/* Grab reference for PTE that is now referencing the page */
get_page(page);
return VM_FAULT_NOPAGE;
}
return ret;
} }
static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
...@@ -934,6 +995,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -934,6 +995,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
return -EIO; return -EIO;
/*
* Write can allocate block for an area which has a hole page mapped
* into page tables. We have to tear down these mappings so that data
* written by write(2) is visible in mmap.
*/
if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
invalidate_inode_pages2_range(inode->i_mapping,
pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT);
}
while (pos < end) { while (pos < end) {
unsigned offset = pos & (PAGE_SIZE - 1); unsigned offset = pos & (PAGE_SIZE - 1);
struct blk_dax_ctl dax = { 0 }; struct blk_dax_ctl dax = { 0 };
...@@ -992,23 +1064,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -992,23 +1064,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iov_iter_rw(iter) == WRITE) if (iov_iter_rw(iter) == WRITE)
flags |= IOMAP_WRITE; flags |= IOMAP_WRITE;
/*
* Yes, even DAX files can have page cache attached to them: A zeroed
* page is inserted into the pagecache when we have to serve a write
* fault on a hole. It should never be dirtied and can simply be
* dropped from the pagecache once we get real data for the page.
*
* XXX: This is racy against mmap, and there's nothing we can do about
* it. We'll eventually need to shift this down even further so that
* we can check if we allocated blocks over a hole first.
*/
if (mapping->nrpages) {
ret = invalidate_inode_pages2_range(mapping,
pos >> PAGE_SHIFT,
(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
}
while (iov_iter_count(iter)) { while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
iter, dax_iomap_actor); iter, dax_iomap_actor);
...@@ -1023,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1023,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
} }
EXPORT_SYMBOL_GPL(dax_iomap_rw); EXPORT_SYMBOL_GPL(dax_iomap_rw);
static int dax_fault_return(int error)
{
if (error == 0)
return VM_FAULT_NOPAGE;
if (error == -ENOMEM)
return VM_FAULT_OOM;
return VM_FAULT_SIGBUS;
}
/** /**
* dax_iomap_fault - handle a page fault on a DAX file * dax_iomap_fault - handle a page fault on a DAX file
* @vma: The virtual memory area where the fault occurred * @vma: The virtual memory area where the fault occurred
...@@ -1055,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1055,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (pos >= i_size_read(inode)) if (pos >= i_size_read(inode))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
if (IS_ERR(entry)) {
error = PTR_ERR(entry);
goto out;
}
if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
flags |= IOMAP_WRITE; flags |= IOMAP_WRITE;
...@@ -1071,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1071,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
*/ */
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
if (error) if (error)
goto unlock_entry; return dax_fault_return(error);
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
error = -EIO; /* fs corruption? */ vmf_ret = dax_fault_return(-EIO); /* fs corruption? */
goto finish_iomap;
}
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
if (IS_ERR(entry)) {
vmf_ret = dax_fault_return(PTR_ERR(entry));
goto finish_iomap; goto finish_iomap;
} }
...@@ -1096,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1096,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
} }
if (error) if (error)
goto finish_iomap; goto error_unlock_entry;
__SetPageUptodate(vmf->cow_page); __SetPageUptodate(vmf->cow_page);
vmf_ret = finish_fault(vmf); vmf_ret = finish_fault(vmf);
if (!vmf_ret) if (!vmf_ret)
vmf_ret = VM_FAULT_DONE_COW; vmf_ret = VM_FAULT_DONE_COW;
goto finish_iomap; goto unlock_entry;
} }
switch (iomap.type) { switch (iomap.type) {
...@@ -1114,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1114,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
} }
error = dax_insert_mapping(mapping, iomap.bdev, sector, error = dax_insert_mapping(mapping, iomap.bdev, sector,
PAGE_SIZE, &entry, vma, vmf); PAGE_SIZE, &entry, vma, vmf);
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error == -EBUSY)
error = 0;
break; break;
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
case IOMAP_HOLE: case IOMAP_HOLE:
if (!(vmf->flags & FAULT_FLAG_WRITE)) { if (!(vmf->flags & FAULT_FLAG_WRITE)) {
vmf_ret = dax_load_hole(mapping, entry, vmf); vmf_ret = dax_load_hole(mapping, &entry, vmf);
break; goto unlock_entry;
} }
/*FALLTHRU*/ /*FALLTHRU*/
default: default:
...@@ -1128,31 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1128,31 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
break; break;
} }
error_unlock_entry:
vmf_ret = dax_fault_return(error) | major;
unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
finish_iomap: finish_iomap:
if (ops->iomap_end) { if (ops->iomap_end) {
if (error || (vmf_ret & VM_FAULT_ERROR)) { int copied = PAGE_SIZE;
/* keep previous error */
ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags, if (vmf_ret & VM_FAULT_ERROR)
&iomap); copied = 0;
} else { /*
error = ops->iomap_end(inode, pos, PAGE_SIZE, * The fault is done by now and there's no way back (other
PAGE_SIZE, flags, &iomap); * thread may be already happily using PTE we have installed).
} * Just ignore error from ->iomap_end since we cannot do much
} * with it.
unlock_entry: */
if (vmf_ret != VM_FAULT_LOCKED || error) ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
if (error == -ENOMEM)
return VM_FAULT_OOM | major;
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error < 0 && error != -EBUSY)
return VM_FAULT_SIGBUS | major;
if (vmf_ret) {
WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
return vmf_ret;
} }
return VM_FAULT_NOPAGE | major; return vmf_ret;
} }
EXPORT_SYMBOL_GPL(dax_iomap_fault); EXPORT_SYMBOL_GPL(dax_iomap_fault);
...@@ -1276,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -1276,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pgoff | PG_PMD_COLOUR) > max_pgoff) if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
goto fallback; goto fallback;
/*
* grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
* PMD or a HZP entry. If it can't (because a 4k page is already in
* the tree, for instance), it will return -EEXIST and we just fall
* back to 4k entries.
*/
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
if (IS_ERR(entry))
goto fallback;
/* /*
* Note that we don't use iomap_apply here. We aren't doing I/O, only * Note that we don't use iomap_apply here. We aren't doing I/O, only
* setting up a mapping, so really we're using iomap_begin() as a way * setting up a mapping, so really we're using iomap_begin() as a way
...@@ -1294,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -1294,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
pos = (loff_t)pgoff << PAGE_SHIFT; pos = (loff_t)pgoff << PAGE_SHIFT;
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
if (error) if (error)
goto unlock_entry; goto fallback;
if (iomap.offset + iomap.length < pos + PMD_SIZE) if (iomap.offset + iomap.length < pos + PMD_SIZE)
goto finish_iomap; goto finish_iomap;
/*
* grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
* PMD or a HZP entry. If it can't (because a 4k page is already in
* the tree, for instance), it will return -EEXIST and we just fall
* back to 4k entries.
*/
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
if (IS_ERR(entry))
goto finish_iomap;
vmf.pgoff = pgoff; vmf.pgoff = pgoff;
vmf.flags = flags; vmf.flags = flags;
vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
...@@ -1310,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -1310,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
case IOMAP_HOLE: case IOMAP_HOLE:
if (WARN_ON_ONCE(write)) if (WARN_ON_ONCE(write))
goto finish_iomap; goto unlock_entry;
result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
&entry); &entry);
break; break;
...@@ -1319,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -1319,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
break; break;
} }
unlock_entry:
put_locked_mapping_entry(mapping, pgoff, entry);
finish_iomap: finish_iomap:
if (ops->iomap_end) { if (ops->iomap_end) {
if (result == VM_FAULT_FALLBACK) { int copied = PMD_SIZE;
ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
&iomap); if (result == VM_FAULT_FALLBACK)
} else { copied = 0;
error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE, /*
iomap_flags, &iomap); * The fault is done by now and there's no way back (other
if (error) * thread may be already happily using PMD we have installed).
result = VM_FAULT_FALLBACK; * Just ignore error from ->iomap_end since we cannot do much
} * with it.
*/
ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
&iomap);
} }
unlock_entry:
put_locked_mapping_entry(mapping, pgoff, entry);
fallback: fallback:
if (result == VM_FAULT_FALLBACK) { if (result == VM_FAULT_FALLBACK) {
split_huge_pmd(vma, pmd, address); split_huge_pmd(vma, pmd, address);
......
...@@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode, ...@@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock(&ei->truncate_mutex); mutex_unlock(&ei->truncate_mutex);
goto cleanup; goto cleanup;
} }
} else {
*new = true;
} }
*new = true;
ext2_splice_branch(inode, iblock, partial, indirect_blks, count); ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
mutex_unlock(&ei->truncate_mutex); mutex_unlock(&ei->truncate_mutex);
......
...@@ -258,7 +258,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -258,7 +258,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
int result; int result;
handle_t *handle = NULL;
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
...@@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem); }
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, down_read(&EXT4_I(inode)->i_mmap_sem);
EXT4_DATA_TRANS_BLOCKS(sb)); result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
} else up_read(&EXT4_I(inode)->i_mmap_sem);
down_read(&EXT4_I(inode)->i_mmap_sem); if (write)
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb); sb_end_pagefault(sb);
} else
up_read(&EXT4_I(inode)->i_mmap_sem);
return result; return result;
} }
...@@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags) pmd_t *pmd, unsigned int flags)
{ {
int result; int result;
handle_t *handle = NULL;
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
bool write = flags & FAULT_FLAG_WRITE; bool write = flags & FAULT_FLAG_WRITE;
...@@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE));
} else
down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else {
result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
&ext4_iomap_ops);
} }
down_read(&EXT4_I(inode)->i_mmap_sem);
if (write) { result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
if (!IS_ERR(handle)) &ext4_iomap_ops);
ext4_journal_stop(handle); up_read(&EXT4_I(inode)->i_mmap_sem);
up_read(&EXT4_I(inode)->i_mmap_sem); if (write)
sb_end_pagefault(sb); sb_end_pagefault(sb);
} else
up_read(&EXT4_I(inode)->i_mmap_sem);
return result; return result;
} }
......
...@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops); struct iomap_ops *ops);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping, void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, void *entry, bool wake_all); pgoff_t index, void *entry, bool wake_all);
......
...@@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func); ...@@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func);
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len); const struct bpf_insn *patch, u32 len);
void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_buffer(void);
#ifdef CONFIG_BPF_JIT #ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable; extern int bpf_jit_enable;
......
...@@ -73,13 +73,13 @@ ...@@ -73,13 +73,13 @@
*/ */
enum pageflags { enum pageflags {
PG_locked, /* Page is locked. Don't touch. */ PG_locked, /* Page is locked. Don't touch. */
PG_waiters, /* Page has waiters, check its waitqueue */
PG_error, PG_error,
PG_referenced, PG_referenced,
PG_uptodate, PG_uptodate,
PG_dirty, PG_dirty,
PG_lru, PG_lru,
PG_active, PG_active,
PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
PG_slab, PG_slab,
PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
PG_arch_1, PG_arch_1,
......
...@@ -110,6 +110,7 @@ struct netns_ipv4 { ...@@ -110,6 +110,7 @@ struct netns_ipv4 {
int sysctl_tcp_orphan_retries; int sysctl_tcp_orphan_retries;
int sysctl_tcp_fin_timeout; int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat; unsigned int sysctl_tcp_notsent_lowat;
int sysctl_tcp_tw_reuse;
int sysctl_igmp_max_memberships; int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf; int sysctl_igmp_max_msf;
......
...@@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3]; ...@@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win; extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse;
extern int sysctl_tcp_frto; extern int sysctl_tcp_frto;
extern int sysctl_tcp_low_latency; extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_nometrics_save;
......
...@@ -1471,6 +1471,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, ...@@ -1471,6 +1471,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
bool multi_instance) bool multi_instance)
{ {
int cpu, ret = 0; int cpu, ret = 0;
bool dynstate;
if (cpuhp_cb_check(state) || !name) if (cpuhp_cb_check(state) || !name)
return -EINVAL; return -EINVAL;
...@@ -1480,6 +1481,12 @@ int __cpuhp_setup_state(enum cpuhp_state state, ...@@ -1480,6 +1481,12 @@ int __cpuhp_setup_state(enum cpuhp_state state,
ret = cpuhp_store_callbacks(state, name, startup, teardown, ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance); multi_instance);
dynstate = state == CPUHP_AP_ONLINE_DYN;
if (ret > 0 && dynstate) {
state = ret;
ret = 0;
}
if (ret || !invoke || !startup) if (ret || !invoke || !startup)
goto out; goto out;
...@@ -1508,7 +1515,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, ...@@ -1508,7 +1515,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
* If the requested state is CPUHP_AP_ONLINE_DYN, return the * If the requested state is CPUHP_AP_ONLINE_DYN, return the
* dynamically allocated state in case of success. * dynamically allocated state in case of success.
*/ */
if (!ret && state == CPUHP_AP_ONLINE_DYN) if (!ret && dynstate)
return state; return state;
return ret; return ret;
} }
......
...@@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) ...@@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
} }
EXPORT_SYMBOL_GPL(add_page_wait_queue); EXPORT_SYMBOL_GPL(add_page_wait_queue);
#ifndef clear_bit_unlock_is_negative_byte
/*
* PG_waiters is the high bit in the same byte as PG_lock.
*
* On x86 (and on many other architectures), we can clear PG_lock and
* test the sign bit at the same time. But if the architecture does
* not support that special operation, we just do this all by hand
* instead.
*
* The read of PG_waiters has to be after (or concurrently with) PG_locked
* being cleared, but a memory barrier should be unneccssary since it is
* in the same byte as PG_locked.
*/
static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
{
clear_bit_unlock(nr, mem);
/* smp_mb__after_atomic(); */
return test_bit(PG_waiters, mem);
}
#endif
/** /**
* unlock_page - unlock a locked page * unlock_page - unlock a locked page
* @page: the page * @page: the page
...@@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); ...@@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
* mechanism between PageLocked pages and PageWriteback pages is shared. * mechanism between PageLocked pages and PageWriteback pages is shared.
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
* *
* The mb is necessary to enforce ordering between the clear_bit and the read * Note that this depends on PG_waiters being the sign bit in the byte
* of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
* clear the PG_locked bit and test PG_waiters at the same time fairly
* portably (architectures that do LL/SC can test any bit, while x86 can
* test the sign bit).
*/ */
void unlock_page(struct page *page) void unlock_page(struct page *page)
{ {
BUILD_BUG_ON(PG_waiters != 7);
page = compound_head(page); page = compound_head(page);
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page);
clear_bit_unlock(PG_locked, &page->flags); if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
smp_mb__after_atomic(); wake_up_page_bit(page, PG_locked);
wake_up_page(page, PG_locked);
} }
EXPORT_SYMBOL(unlock_page); EXPORT_SYMBOL(unlock_page);
......
...@@ -24,20 +24,12 @@ ...@@ -24,20 +24,12 @@
#include <linux/rmap.h> #include <linux/rmap.h>
#include "internal.h" #include "internal.h"
static void clear_exceptional_entry(struct address_space *mapping, static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
pgoff_t index, void *entry) void *entry)
{ {
struct radix_tree_node *node; struct radix_tree_node *node;
void **slot; void **slot;
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return;
if (dax_mapping(mapping)) {
dax_delete_mapping_entry(mapping, index);
return;
}
spin_lock_irq(&mapping->tree_lock); spin_lock_irq(&mapping->tree_lock);
/* /*
* Regular page slots are stabilized by the page lock even * Regular page slots are stabilized by the page lock even
...@@ -55,6 +47,56 @@ static void clear_exceptional_entry(struct address_space *mapping, ...@@ -55,6 +47,56 @@ static void clear_exceptional_entry(struct address_space *mapping,
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
} }
/*
* Unconditionally remove exceptional entry. Usually called from truncate path.
*/
static void truncate_exceptional_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return;
if (dax_mapping(mapping)) {
dax_delete_mapping_entry(mapping, index);
return;
}
clear_shadow_entry(mapping, index, entry);
}
/*
* Invalidate exceptional entry if easily possible. This handles exceptional
* entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
* clean entries.
*/
static int invalidate_exceptional_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return 1;
if (dax_mapping(mapping))
return dax_invalidate_mapping_entry(mapping, index);
clear_shadow_entry(mapping, index, entry);
return 1;
}
/*
* Invalidate exceptional entry if clean. This handles exceptional entries for
* invalidate_inode_pages2() so for DAX it evicts only clean entries.
*/
static int invalidate_exceptional_entry2(struct address_space *mapping,
pgoff_t index, void *entry)
{
/* Handled by shmem itself */
if (shmem_mapping(mapping))
return 1;
if (dax_mapping(mapping))
return dax_invalidate_mapping_entry_sync(mapping, index);
clear_shadow_entry(mapping, index, entry);
return 1;
}
/** /**
* do_invalidatepage - invalidate part or all of a page * do_invalidatepage - invalidate part or all of a page
* @page: the page which is affected * @page: the page which is affected
...@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
break; break;
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); truncate_exceptional_entry(mapping, index,
page);
continue; continue;
} }
...@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
} }
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); truncate_exceptional_entry(mapping, index,
page);
continue; continue;
} }
...@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, ...@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
break; break;
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); invalidate_exceptional_entry(mapping, index,
page);
continue; continue;
} }
...@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, ...@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
break; break;
if (radix_tree_exceptional_entry(page)) { if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page); if (!invalidate_exceptional_entry2(mapping,
index, page))
ret = -EBUSY;
continue; continue;
} }
......
...@@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act) ...@@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act)
} }
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
void bpf_warn_invalid_xdp_buffer(void)
{
WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n");
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off, int src_reg, int ctx_off,
struct bpf_insn *insn_buf, struct bpf_insn *insn_buf,
......
...@@ -432,13 +432,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -432,13 +432,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &tcp_adv_win_scale_min, .extra1 = &tcp_adv_win_scale_min,
.extra2 = &tcp_adv_win_scale_max, .extra2 = &tcp_adv_win_scale_max,
}, },
{
.procname = "tcp_tw_reuse",
.data = &sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tcp_frto", .procname = "tcp_frto",
.data = &sysctl_tcp_frto, .data = &sysctl_tcp_frto,
...@@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "tcp_tw_reuse",
.data = &init_net.ipv4.sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
{ {
.procname = "fib_multipath_use_neigh", .procname = "fib_multipath_use_neigh",
......
...@@ -84,7 +84,6 @@ ...@@ -84,7 +84,6 @@
#include <crypto/hash.h> #include <crypto/hash.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly; int sysctl_tcp_low_latency __read_mostly;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
...@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) ...@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table. and use initial timestamp retrieved from peer table.
*/ */
if (tcptw->tw_ts_recent_stamp && if (tcptw->tw_ts_recent_stamp &&
(!twp || (sysctl_tcp_tw_reuse && (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0) if (tp->write_seq == 0)
...@@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_tcp_tw_reuse = 0;
return 0; return 0;
fail: fail:
......
...@@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ...@@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts); rcu_assign_pointer(flow->sf_acts, acts);
packet->priority = flow->key.phy.priority; packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark; packet->mark = flow->key.phy.skb_mark;
packet->protocol = flow->key.eth.type;
rcu_read_lock(); rcu_read_lock();
dp = get_dp_rcu(net, ovs_header->dp_ifindex); dp = get_dp_rcu(net, ovs_header->dp_ifindex);
......
...@@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb) ...@@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
* Returns 0 if it encounters a non-vlan or incomplete packet. * Returns 0 if it encounters a non-vlan or incomplete packet.
* Returns 1 after successfully parsing vlan tag. * Returns 1 after successfully parsing vlan tag.
*/ */
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
bool untag_vlan)
{ {
struct vlan_head *vh = (struct vlan_head *)skb->data; struct vlan_head *vh = (struct vlan_head *)skb->data;
...@@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) ...@@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
key_vh->tpid = vh->tpid; key_vh->tpid = vh->tpid;
__skb_pull(skb, sizeof(struct vlan_head)); if (unlikely(untag_vlan)) {
int offset = skb->data - skb_mac_header(skb);
u16 tci;
int err;
__skb_push(skb, offset);
err = __skb_vlan_pop(skb, &tci);
__skb_pull(skb, offset);
if (err)
return err;
__vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
} else {
__skb_pull(skb, sizeof(struct vlan_head));
}
return 1; return 1;
} }
...@@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) ...@@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
key->eth.vlan.tpid = skb->vlan_proto; key->eth.vlan.tpid = skb->vlan_proto;
} else { } else {
/* Parse outer vlan tag in the non-accelerated case. */ /* Parse outer vlan tag in the non-accelerated case. */
res = parse_vlan_tag(skb, &key->eth.vlan); res = parse_vlan_tag(skb, &key->eth.vlan, true);
if (res <= 0) if (res <= 0)
return res; return res;
} }
/* Parse inner vlan tag. */ /* Parse inner vlan tag. */
res = parse_vlan_tag(skb, &key->eth.cvlan); res = parse_vlan_tag(skb, &key->eth.cvlan, false);
if (res <= 0) if (res <= 0)
return res; return res;
...@@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, ...@@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
if (err) if (err)
return err; return err;
if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { /* key_extract assumes that skb->protocol is set-up for
/* key_extract assumes that skb->protocol is set-up for * layer 3 packets which is the case for other callers,
* layer 3 packets which is the case for other callers, * in particular packets received from the network stack.
* in particular packets recieved from the network stack. * Here the correct value can be set from the metadata
* Here the correct value can be set from the metadata * extracted above.
* extracted above. * For L2 packet key eth type would be zero. skb protocol
*/ * would be set to correct value later during key-extact.
skb->protocol = key->eth.type; */
} else {
struct ethhdr *eth;
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
/* Normally, setting the skb 'protocol' field would be
* handled by a call to eth_type_trans(), but it assumes
* there's a sending device, which we may not have.
*/
if (eth_proto_is_802_3(eth->h_proto))
skb->protocol = eth->h_proto;
else
skb->protocol = htons(ETH_P_802_2);
}
skb->protocol = key->eth.type;
return key_extract(skb, key); return key_extract(skb, key);
} }
...@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) ...@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
unsigned long cl; unsigned long cl;
unsigned long fh; unsigned long fh;
int err; int err;
int tp_created = 0; int tp_created;
if ((n->nlmsg_type != RTM_GETTFILTER) && if ((n->nlmsg_type != RTM_GETTFILTER) &&
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM; return -EPERM;
replay: replay:
tp_created = 0;
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
if (err < 0) if (err < 0)
return err; return err;
......
...@@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error) ...@@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error)
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
if (TIPC_SKB_CB(skb)->bytes_read) { if (TIPC_SKB_CB(skb)->bytes_read) {
kfree_skb(skb); kfree_skb(skb);
} else { continue;
if (!tipc_sk_type_connectionless(sk) && }
sk->sk_state != TIPC_DISCONNECTING) { if (!tipc_sk_type_connectionless(sk) &&
tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_state != TIPC_DISCONNECTING) {
tipc_node_remove_conn(net, dnode, tsk->portid); tipc_set_sk_state(sk, TIPC_DISCONNECTING);
} tipc_node_remove_conn(net, dnode, tsk->portid);
tipc_sk_respond(sk, skb, error);
} }
tipc_sk_respond(sk, skb, error);
} }
if (tipc_sk_type_connectionless(sk))
return;
if (sk->sk_state != TIPC_DISCONNECTING) { if (sk->sk_state != TIPC_DISCONNECTING) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
...@@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error) ...@@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error)
tsk->portid, error); tsk->portid, error);
if (skb) if (skb)
tipc_node_xmit_skb(net, skb, dnode, tsk->portid); tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
if (!tipc_sk_type_connectionless(sk)) { tipc_node_remove_conn(net, dnode, tsk->portid);
tipc_node_remove_conn(net, dnode, tsk->portid); tipc_set_sk_state(sk, TIPC_DISCONNECTING);
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
}
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册