提交 64fce444 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) BPF speculation prevention and BPF_JIT_ALWAYS_ON, from Alexei
    Starovoitov.

 2) Revert dev_get_random_name() changes as adjust the error code
    returns seen by userspace definitely breaks stuff.

 3) Fix TX DMA map/unmap on older iwlwifi devices, from Emmanuel
    Grumbach.

 4) From wrong AF family when requesting sock diag modules, from Andrii
    Vladyka.

 5) Don't add new ipv6 routes attached to the null_entry, from Wei Wang.

 6) Some SCTP sockopt length fixes from Marcelo Ricardo Leitner.

 7) Don't leak when removing VLAN ID 0, from Cong Wang.

 8) Hey there's a potential leak in ipv6_make_skb() too, from Eric
    Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (27 commits)
  ipv6: sr: fix TLVs not being copied using setsockopt
  ipv6: fix possible mem leaks in ipv6_make_skb()
  mlxsw: spectrum_qdisc: Don't use variable array in mlxsw_sp_tclass_congestion_enable
  mlxsw: pci: Wait after reset before accessing HW
  nfp: always unmask aux interrupts at init
  8021q: fix a memory leak for VLAN 0 device
  of_mdio: avoid MDIO bus removal when a PHY is missing
  caif_usb: use strlcpy() instead of strncpy()
  doc: clarification about setting SO_ZEROCOPY
  net: gianfar_ptp: move set_fipers() to spinlock protecting area
  sctp: make use of pre-calculated len
  sctp: add a ceiling to optlen in some sockopts
  sctp: GFP_ATOMIC is not needed in sctp_setsockopt_events
  bpf: introduce BPF_JIT_ALWAYS_ON config
  bpf: avoid false sharing of map refcount with max_entries
  ipv6: remove null_entry before adding default route
  SolutionEngine771x: add Ether TSU resource
  SolutionEngine771x: fix Ether platform data
  docs-rst: networking: wire up msg_zerocopy
  net: ipv4: emulate READ_ONCE() on ->hdrincl bit-field in raw_sendmsg()
  ...
......@@ -9,6 +9,7 @@ Contents:
batman-adv
kapi
z8530book
msg_zerocopy
.. only:: subproject
......@@ -16,4 +17,3 @@ Contents:
=======
* :ref:`genindex`
......@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
error(1, errno, "setsockopt zerocopy");
Setting the socket option only works when the socket is in its initial
(TCP_CLOSED) state. Trying to set the option for a socket returned by accept(),
for example, will lead to an EBUSY error. In this case, the option should be set
to the listening socket and it will be inherited by the accepted sockets.
Transmission
------------
......
......@@ -9,6 +9,7 @@
*/
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/sh_eth.h>
#include <mach-se/mach/se.h>
#include <mach-se/mach/mrshpc.h>
#include <asm/machvec.h>
......@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
#if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
defined(CONFIG_CPU_SUBTYPE_SH7712)
/* SH771X Ethernet driver */
static struct sh_eth_plat_data sh_eth_plat = {
.phy = PHY_ID,
.phy_interface = PHY_INTERFACE_MODE_MII,
};
static struct resource sh_eth0_resources[] = {
[0] = {
.start = SH_ETH0_BASE,
.end = SH_ETH0_BASE + 0x1B8,
.end = SH_ETH0_BASE + 0x1B8 - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = SH_TSU_BASE,
.end = SH_TSU_BASE + 0x200 - 1,
.flags = IORESOURCE_MEM,
},
[2] = {
.start = SH_ETH0_IRQ,
.end = SH_ETH0_IRQ,
.flags = IORESOURCE_IRQ,
......@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
.name = "sh771x-ether",
.id = 0,
.dev = {
.platform_data = PHY_ID,
.platform_data = &sh_eth_plat,
},
.num_resources = ARRAY_SIZE(sh_eth0_resources),
.resource = sh_eth0_resources,
......@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
static struct resource sh_eth1_resources[] = {
[0] = {
.start = SH_ETH1_BASE,
.end = SH_ETH1_BASE + 0x1B8,
.end = SH_ETH1_BASE + 0x1B8 - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = SH_TSU_BASE,
.end = SH_TSU_BASE + 0x200 - 1,
.flags = IORESOURCE_MEM,
},
[2] = {
.start = SH_ETH1_IRQ,
.end = SH_ETH1_IRQ,
.flags = IORESOURCE_IRQ,
......@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
.name = "sh771x-ether",
.id = 1,
.dev = {
.platform_data = PHY_ID,
.platform_data = &sh_eth_plat,
},
.num_resources = ARRAY_SIZE(sh_eth1_resources),
.resource = sh_eth1_resources,
......
......@@ -100,6 +100,7 @@
/* Base address */
#define SH_ETH0_BASE 0xA7000000
#define SH_ETH1_BASE 0xA7000400
#define SH_TSU_BASE 0xA7000800
/* PHY ID */
#if defined(CONFIG_CPU_SUBTYPE_SH7710)
# define PHY_ID 0x00
......
......@@ -319,11 +319,10 @@ static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta)
now = tmr_cnt_read(etsects);
now += delta;
tmr_cnt_write(etsects, now);
set_fipers(etsects);
spin_unlock_irqrestore(&etsects->lock, flags);
set_fipers(etsects);
return 0;
}
......
......@@ -1643,7 +1643,12 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
return 0;
}
wmb(); /* reset needs to be written before we read control register */
/* Reset needs to be written before we read control register, and
* we must wait for the HW to become responsive once again
*/
wmb();
msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
do {
u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
......
......@@ -59,6 +59,7 @@
#define MLXSW_PCI_SW_RESET 0xF0010
#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000
#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E
......
......@@ -46,7 +46,8 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
u32 probability, bool is_ecn)
{
char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)];
char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
char cwtp_cmd[MLXSW_REG_CWTP_LEN];
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
int err;
......@@ -60,10 +61,10 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
if (err)
return err;
mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num,
mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
}
static int
......
......@@ -568,6 +568,7 @@ nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
return err;
}
nn_writeb(nn, ctrl_offset, entry->entry);
nfp_net_irq_unmask(nn, entry->entry);
return 0;
}
......@@ -582,6 +583,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
unsigned int vector_idx)
{
nn_writeb(nn, ctrl_offset, 0xff);
nn_pci_flush(nn);
free_irq(nn->irq_entries[vector_idx].vector, nn);
}
......
......@@ -384,6 +384,18 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
}
}
if (changed & IEEE80211_CONF_CHANGE_PS) {
list_for_each_entry(tmp, &wcn->vif_list, list) {
vif = wcn36xx_priv_to_vif(tmp);
if (hw->conf.flags & IEEE80211_CONF_PS) {
if (vif->bss_conf.ps) /* ps allowed ? */
wcn36xx_pmc_enter_bmps_state(wcn, vif);
} else {
wcn36xx_pmc_exit_bmps_state(wcn, vif);
}
}
}
mutex_unlock(&wcn->conf_mutex);
return 0;
......@@ -747,17 +759,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
vif_priv->dtim_period = bss_conf->dtim_period;
}
if (changed & BSS_CHANGED_PS) {
wcn36xx_dbg(WCN36XX_DBG_MAC,
"mac bss PS set %d\n",
bss_conf->ps);
if (bss_conf->ps) {
wcn36xx_pmc_enter_bmps_state(wcn, vif);
} else {
wcn36xx_pmc_exit_bmps_state(wcn, vif);
}
}
if (changed & BSS_CHANGED_BSSID) {
wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n",
bss_conf->bssid);
......
......@@ -45,8 +45,10 @@ int wcn36xx_pmc_exit_bmps_state(struct wcn36xx *wcn,
struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
if (WCN36XX_BMPS != vif_priv->pw_state) {
wcn36xx_err("Not in BMPS mode, no need to exit from BMPS mode!\n");
return -EINVAL;
/* Unbalanced call or last BMPS enter failed */
wcn36xx_dbg(WCN36XX_DBG_PMC,
"Not in BMPS mode, no need to exit\n");
return -EALREADY;
}
wcn36xx_smd_exit_bmps(wcn, vif);
vif_priv->pw_state = WCN36XX_FULL_POWER;
......
......@@ -670,11 +670,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
return index & (q->n_window - 1);
}
static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
struct iwl_txq *txq, int idx)
{
return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
idx);
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
if (trans->cfg->use_tfh)
idx = iwl_pcie_get_cmd_index(txq, idx);
return txq->tfds + trans_pcie->tfd_size * idx;
}
static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
......
......@@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
* idx is bounded by n_window
*/
......@@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
lockdep_assert_held(&txq->lock);
iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
iwl_pcie_get_tfd(trans_pcie, txq, idx));
iwl_pcie_get_tfd(trans, txq, idx));
/* free SKB */
if (txq->entries) {
......@@ -364,11 +362,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
struct sk_buff *skb,
struct iwl_cmd_meta *out_meta)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
struct iwl_tfh_tfd *tfd =
iwl_pcie_get_tfd(trans_pcie, txq, idx);
struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
dma_addr_t tb_phys;
bool amsdu;
int i, len, tb1_len, tb2_len, hdr_len;
......@@ -565,8 +561,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
u8 group_id = iwl_cmd_groupid(cmd->id);
const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
struct iwl_tfh_tfd *tfd =
iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
memset(tfd, 0, sizeof(*tfd));
......
......@@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int i, num_tbs;
void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
void *tfd = iwl_pcie_get_tfd(trans, txq, index);
/* Sanity check on number of chunks */
num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
......@@ -2018,7 +2018,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
}
trace_iwlwifi_dev_tx(trans->dev, skb,
iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
trans_pcie->tfd_size,
&dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
hdr_len);
......@@ -2092,7 +2092,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
IEEE80211_CCMP_HDR_LEN : 0;
trace_iwlwifi_dev_tx(trans->dev, skb,
iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
trans_pcie->tfd_size,
&dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
......@@ -2425,7 +2425,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
IWL_FIRST_TB_SIZE);
tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
/* Set up entry for this TFD in Tx byte-count array */
iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
iwl_pcie_tfd_get_num_tbs(trans, tfd));
......
......@@ -231,7 +231,12 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
rc = of_mdiobus_register_phy(mdio, child, addr);
else
rc = of_mdiobus_register_device(mdio, child, addr);
if (rc)
if (rc == -ENODEV)
dev_err(&mdio->dev,
"MDIO device at address %d is missing.\n",
addr);
else if (rc)
goto unregister;
}
......@@ -255,7 +260,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
if (of_mdiobus_child_is_phy(child)) {
rc = of_mdiobus_register_phy(mdio, child, addr);
if (rc)
if (rc && rc != -ENODEV)
goto unregister;
}
}
......
......@@ -43,7 +43,14 @@ struct bpf_map_ops {
};
struct bpf_map {
atomic_t refcnt;
/* 1st cacheline with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
#endif
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
......@@ -52,15 +59,17 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
struct user_struct *user;
const struct bpf_map_ops *ops;
struct work_struct work;
bool unpriv_array;
/* 7 bytes hole */
/* 2nd cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
atomic_t refcnt;
atomic_t usercnt;
struct bpf_map *inner_map_meta;
struct work_struct work;
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
#endif
};
/* function argument constraints */
......@@ -221,6 +230,7 @@ struct bpf_prog_aux {
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same
......
......@@ -1396,6 +1396,13 @@ config BPF_SYSCALL
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.
config BPF_JIT_ALWAYS_ON
bool "Permanently enable BPF JIT and remove BPF interpreter"
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
help
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
config USERFAULTFD
bool "Enable userfaultfd() system call"
select ANON_INODES
......
......@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
u64 array_size;
u32 elem_size;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
......@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
elem_size = round_up(attr->value_size, 8);
max_entries = attr->max_entries;
index_mask = roundup_pow_of_two(max_entries) - 1;
if (unpriv)
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;
array_size = sizeof(*array);
if (percpu)
array_size += (u64) attr->max_entries * sizeof(void *);
array_size += (u64) max_entries * sizeof(void *);
else
array_size += (u64) attr->max_entries * elem_size;
array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
......@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
......@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;
return array->value + array->elem_size * index;
return array->value + array->elem_size * (index & array->index_mask);
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0;
......@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
}
if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
......@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;
return this_cpu_ptr(array->pptrs[index]);
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
......@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
......@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]),
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
memcpy(array->value + array->elem_size * index,
memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
......@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
......@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
......@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
}
if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else
......
......@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
EXPORT_SYMBOL_GPL(__bpf_call_base);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
* @ctx: is the data we are operating on
......@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
#else
static unsigned int __bpf_prog_ret0(const void *ctx,
const struct bpf_insn *insn)
{
return 0;
}
#endif
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
{
......@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
fp->bpf_func = __bpf_prog_ret0;
#endif
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
......@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
*/
if (!bpf_prog_is_dev_bound(fp->aux)) {
fp = bpf_int_jit_compile(fp);
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
if (!fp->jited) {
*err = -ENOTSUPP;
return fp;
}
#endif
} else {
*err = bpf_prog_offload_compile(fp);
if (*err)
......
......@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
smap_list_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
* causing the refcnt to hit zero and sock user data (psock)
* to be null and queued for garbage collection.
*/
if (likely(psock)) {
smap_list_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock);
}
write_unlock_bh(&sock->sk_callback_lock);
}
rcu_read_unlock();
......
......@@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err)
return err;
if (func_id == BPF_FUNC_tail_call) {
if (meta.map_ptr == NULL) {
verbose(env, "verifier bug\n");
return -EINVAL;
}
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
}
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err)
return err;
......@@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
* if (index >= max_entries) goto out;
* index &= array->index_mask;
* to avoid out-of-bounds cpu speculation
*/
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (map_ptr == BPF_MAP_PTR_POISON) {
verbose(env, "tail_call obusing map_ptr\n");
return -EINVAL;
}
if (!map_ptr->unpriv_array)
continue;
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
insn_buf[2] = *insn;
cnt = 3;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
}
......
......@@ -6250,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
return NULL;
}
}
/* We don't expect to fail. */
if (*err) {
pr_cont("FAIL to attach err=%d len=%d\n",
pr_cont("FAIL to prog_create err=%d len=%d\n",
*err, fprog.len);
return NULL;
}
......@@ -6276,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
* checks.
*/
fp = bpf_prog_select_runtime(fp, err);
if (*err) {
pr_cont("FAIL to select_runtime err=%d\n", *err);
return NULL;
}
break;
}
......@@ -6461,8 +6464,8 @@ static __init int test_bpf(void)
pass_cnt++;
continue;
}
return err;
err_cnt++;
continue;
}
pr_cont("jited:%u ", fp->jited);
......
......@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_uninit_applicant(real_dev);
}
/* Take it out of our own structures, but be sure to interlock with
* HW accelerating devices or SW vlan input packet processing if
* VLAN is not 0 (leave it there for 802.1p).
*/
if (vlan_id)
vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
......
......@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
strncpy(caifd->layer.name, dev->name,
sizeof(caifd->layer.name) - 1);
caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
strlcpy(caifd->layer.name, dev->name,
sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
cfcnfg_add_phy_layer(cfg,
dev,
......
......@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
dev_add_pack(&caif_usb_type);
pack_added = true;
strncpy(layer->name, dev->name,
sizeof(layer->name) - 1);
layer->name[sizeof(layer->name) - 1] = 0;
strlcpy(layer->name, dev->name, sizeof(layer->name));
return 0;
}
......
......@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
sizeof(l->u.rfm.volume)-1);
l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
strncpy(l->u.utility.name, s->sockaddr.u.util.service,
sizeof(l->u.utility.name)-1);
l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
if (l->u.utility.paramlen > sizeof(l->u.utility.params))
......
......@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
strncpy(utility_name, param->u.utility.name,
UTILITY_NAME_LENGTH - 1);
strlcpy(utility_name, param->u.utility.name,
UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
cfpkt_add_body(pkt, &tmp8, 1);
......
......@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;
/* We are guaranteed to never error here with cBPF to eBPF
* transitions, since there's no issue with type compatibility
* checks on program arrays.
*/
fp = bpf_prog_select_runtime(fp, &err);
if (err)
goto out_err_free;
kfree(old_prog);
return fp;
......
......@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
NETLINK_SOCK_DIAG, AF_INET);
NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
......
......@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec
#else
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &one,
#endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
......
......@@ -520,9 +520,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
/* hdrincl should be READ_ONCE(inet->hdrincl)
* but READ_ONCE() doesn't work with bit fields
* but READ_ONCE() doesn't work with bit fields.
* Doing this indirectly yields the same result.
*/
hdrincl = inet->hdrincl;
hdrincl = READ_ONCE(hdrincl);
/*
* Check the flags.
*/
......
......@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
sr_phdr->segments[0] = **addr_p;
*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
if (sr_ihdr->hdrlen > hops * 2) {
int tlvs_offset, tlvs_length;
tlvs_offset = (1 + hops * 2) << 3;
tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
memcpy((char *)sr_phdr + tlvs_offset,
(char *)sr_ihdr + tlvs_offset, tlvs_length);
}
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
struct net *net = NULL;
......
......@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
if (!(fn->fn_flags & RTN_RTINFO)) {
RCU_INIT_POINTER(fn->leaf, NULL);
rt6_release(leaf);
/* remove null_entry in the root node */
} else if (fn->fn_flags & RTN_TL_ROOT &&
rcu_access_pointer(fn->leaf) ==
net->ipv6.ip6_null_entry) {
RCU_INIT_POINTER(fn->leaf, NULL);
}
return fn;
......@@ -1270,13 +1275,17 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
return err;
failure:
/* fn->leaf could be NULL if fn is an intermediate node and we
* failed to add the new route to it in both subtree creation
* failure and fib6_add_rt2node() failure case.
* In both cases, fib6_repair_tree() should be called to fix
* fn->leaf.
/* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
* 1. fn is an intermediate node and we failed to add the new
* route to it in both subtree creation failure and fib6_add_rt2node()
* failure case.
* 2. fn is the root node in the table and we fail to add the first
* default route to it.
*/
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
if (fn &&
(!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
(fn->fn_flags & RTN_TL_ROOT &&
!rcu_access_pointer(fn->leaf))))
fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
......@@ -1531,6 +1540,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
struct fib6_walker *w;
int iter = 0;
/* Set fn->leaf to null_entry for root node. */
if (fn->fn_flags & RTN_TL_ROOT) {
rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
return fn;
}
for (;;) {
struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
lockdep_is_held(&table->tb6_lock));
......@@ -1685,10 +1700,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
}
read_unlock(&net->ipv6.fib6_walker_lock);
/* If it was last route, expunge its radix tree node */
/* If it was last route, call fib6_repair_tree() to:
* 1. For root node, put back null_entry as how the table was created.
* 2. For other nodes, expunge its radix tree node.
*/
if (!rcu_access_pointer(fn->leaf)) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
if (!(fn->fn_flags & RTN_TL_ROOT)) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
}
fn = fib6_repair_tree(net, table, fn);
}
......
......@@ -1735,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.opt = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
if (err)
if (err) {
ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
......
......@@ -2277,7 +2277,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
event = sctp_ulpevent_make_sender_dry_event(asoc,
GFP_ATOMIC);
GFP_USER | __GFP_NOWARN);
if (!event)
return -ENOMEM;
......@@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
if (optlen < sizeof(struct sctp_hmacalgo))
return -EINVAL;
optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
SCTP_AUTH_NUM_HMACS * sizeof(u16));
hmacs = memdup_user(optval, optlen);
if (IS_ERR(hmacs))
......@@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
if (optlen <= sizeof(struct sctp_authkey))
return -EINVAL;
/* authkey->sca_keylength is u16, so optlen can't be bigger than
* this.
*/
optlen = min_t(unsigned int, optlen, USHRT_MAX +
sizeof(struct sctp_authkey));
authkey = memdup_user(optval, optlen);
if (IS_ERR(authkey))
......@@ -3893,6 +3900,9 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
if (optlen < sizeof(*params))
return -EINVAL;
/* srs_number_streams is u16, so optlen can't be bigger than this. */
optlen = min_t(unsigned int, optlen, USHRT_MAX +
sizeof(__u16) * sizeof(*params));
params = memdup_user(optval, optlen);
if (IS_ERR(params))
......@@ -5015,7 +5025,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
len = sizeof(int);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
return -EFAULT;
return 0;
}
......@@ -5645,6 +5655,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
err = -EFAULT;
goto out;
}
/* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
* but we can't change it anymore.
*/
if (put_user(bytes_copied, optlen))
err = -EFAULT;
out:
......@@ -6081,7 +6094,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
params.assoc_id = 0;
} else if (len >= sizeof(struct sctp_assoc_value)) {
len = sizeof(struct sctp_assoc_value);
if (copy_from_user(&params, optval, sizeof(params)))
if (copy_from_user(&params, optval, len))
return -EFAULT;
} else
return -EINVAL;
......@@ -6251,7 +6264,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (len < sizeof(struct sctp_authkeyid))
return -EINVAL;
if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
len = sizeof(struct sctp_authkeyid);
if (copy_from_user(&val, optval, len))
return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id);
......@@ -6263,7 +6278,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
else
val.scact_keynumber = ep->active_key_id;
len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, &val, len))
......@@ -6289,7 +6303,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
......@@ -6334,7 +6348,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
......
......@@ -2619,6 +2619,15 @@ static int __init sock_init(void)
core_initcall(sock_init); /* early initcall */
static int __init jit_init(void)
{
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
bpf_jit_enable = 1;
#endif
return 0;
}
pure_initcall(jit_init);
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
......
......@@ -474,27 +474,7 @@ static struct bpf_align_test tests[] = {
.result = REJECT,
.matches = {
{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
/* ptr & 0x40 == either 0 or 0x40 */
{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
/* ptr << 2 == unknown, (4n) */
{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
/* Checked s>=0 */
{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* packet pointer + nonnegative (4n+2) */
{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
* upper half of the address space.
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* R5 bitwise operator &= on pointer prohibited */
}
},
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册