提交 ba3e2084 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Conflicts:
	net/ipv6/xfrm6_output.c
	net/openvswitch/flow_netlink.c
	net/openvswitch/vport-gre.c
	net/openvswitch/vport-vxlan.c
	net/openvswitch/vport.c
	net/openvswitch/vport.h

The openvswitch conflicts were overlapping changes.  One was
the egress tunnel info fix in 'net' and the other was the
vport ->send() op simplification in 'net-next'.

The xfrm6_output.c conflicts was also a simplification
overlapping a bug fix.
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -46,6 +46,7 @@ Required properties:
Optional properties:
- dual_emac_res_vlan : Specifies VID to be used to segregate the ports
- mac-address : See ethernet.txt file in the same directory
- phy-handle : See ethernet.txt file in the same directory
Note: "ti,hwmods" field is used to fetch the base address and irq
resources from TI, omap hwmod data base during device registration.
......
SMSC LAN87xx Ethernet PHY
Some boards require special tuning values. Configure them
through an Ethernet OF device node.
Optional properties:
- smsc,disable-energy-detect:
If set, do not enable energy detect mode for the SMSC phy.
default: enable energy detect mode
Examples:
smsc phy with disabled energy detect mode on an am335x based board.
&davinci_mdio {
pinctrl-names = "default", "sleep";
pinctrl-0 = <&davinci_mdio_default>;
pinctrl-1 = <&davinci_mdio_sleep>;
status = "okay";
ethernetphy0: ethernet-phy@0 {
reg = <0>;
smsc,disable-energy-detect;
};
};
......@@ -1247,7 +1247,7 @@ static void
l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
{
struct PStack *st = fi->userdata;
struct sk_buff *skb;
struct sk_buff *skb, *nskb;
struct Layer2 *l2 = &st->l2;
u_char header[MAX_HEADER_LEN];
int i, hdr_space_needed;
......@@ -1262,14 +1262,10 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
return;
hdr_space_needed = l2headersize(l2, 0);
if (hdr_space_needed > skb_headroom(skb)) {
struct sk_buff *orig_skb = skb;
skb = skb_realloc_headroom(skb, hdr_space_needed);
if (!skb) {
dev_kfree_skb(orig_skb);
return;
}
nskb = skb_realloc_headroom(skb, hdr_space_needed);
if (!nskb) {
skb_queue_head(&l2->i_queue, skb);
return;
}
spin_lock_irqsave(&l2->lock, flags);
if (test_bit(FLG_MOD128, &l2->flag))
......@@ -1282,7 +1278,7 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
p1);
dev_kfree_skb(l2->windowar[p1]);
}
l2->windowar[p1] = skb_clone(skb, GFP_ATOMIC);
l2->windowar[p1] = skb;
i = sethdraddr(&st->l2, header, CMD);
......@@ -1295,8 +1291,8 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
l2->vs = (l2->vs + 1) % 8;
}
spin_unlock_irqrestore(&l2->lock, flags);
memcpy(skb_push(skb, i), header, i);
st->l2.l2l1(st, PH_PULL | INDICATION, skb);
memcpy(skb_push(nskb, i), header, i);
st->l2.l2l1(st, PH_PULL | INDICATION, nskb);
test_and_clear_bit(FLG_ACK_PEND, &st->l2.flag);
if (!test_and_set_bit(FLG_T200_RUN, &st->l2.flag)) {
FsmDelTimer(&st->l2.t203, 13);
......
......@@ -1476,7 +1476,7 @@ static void
l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
{
struct layer2 *l2 = fi->userdata;
struct sk_buff *skb, *nskb, *oskb;
struct sk_buff *skb, *nskb;
u_char header[MAX_L2HEADER_LEN];
u_int i, p1;
......@@ -1486,48 +1486,34 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
skb = skb_dequeue(&l2->i_queue);
if (!skb)
return;
if (test_bit(FLG_MOD128, &l2->flag))
p1 = (l2->vs - l2->va) % 128;
else
p1 = (l2->vs - l2->va) % 8;
p1 = (p1 + l2->sow) % l2->window;
if (l2->windowar[p1]) {
printk(KERN_WARNING "%s: l2 try overwrite ack queue entry %d\n",
mISDNDevName4ch(&l2->ch), p1);
dev_kfree_skb(l2->windowar[p1]);
}
l2->windowar[p1] = skb;
i = sethdraddr(l2, header, CMD);
if (test_bit(FLG_MOD128, &l2->flag)) {
header[i++] = l2->vs << 1;
header[i++] = l2->vr << 1;
} else
header[i++] = (l2->vr << 5) | (l2->vs << 1);
nskb = skb_realloc_headroom(skb, i);
if (!nskb) {
printk(KERN_WARNING "%s: no headroom(%d) copy for IFrame\n",
mISDNDevName4ch(&l2->ch), i);
skb_queue_head(&l2->i_queue, skb);
return;
}
if (test_bit(FLG_MOD128, &l2->flag)) {
p1 = (l2->vs - l2->va) % 128;
l2->vs = (l2->vs + 1) % 128;
} else {
header[i++] = (l2->vr << 5) | (l2->vs << 1);
p1 = (l2->vs - l2->va) % 8;
l2->vs = (l2->vs + 1) % 8;
}
nskb = skb_clone(skb, GFP_ATOMIC);
p1 = skb_headroom(nskb);
if (p1 >= i)
memcpy(skb_push(nskb, i), header, i);
else {
printk(KERN_WARNING
"%s: L2 pull_iqueue skb header(%d/%d) too short\n",
mISDNDevName4ch(&l2->ch), i, p1);
oskb = nskb;
nskb = mI_alloc_skb(oskb->len + i, GFP_ATOMIC);
if (!nskb) {
dev_kfree_skb(oskb);
printk(KERN_WARNING "%s: no skb mem in %s\n",
mISDNDevName4ch(&l2->ch), __func__);
return;
}
memcpy(skb_put(nskb, i), header, i);
memcpy(skb_put(nskb, oskb->len), oskb->data, oskb->len);
dev_kfree_skb(oskb);
p1 = (p1 + l2->sow) % l2->window;
if (l2->windowar[p1]) {
printk(KERN_WARNING "%s: l2 try overwrite ack queue entry %d\n",
mISDNDevName4ch(&l2->ch), p1);
dev_kfree_skb(l2->windowar[p1]);
}
l2->windowar[p1] = skb;
memcpy(skb_push(nskb, i), header, i);
l2down(l2, PH_DATA_REQ, l2_newid(l2), nskb);
test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
if (!test_and_set_bit(FLG_T200_RUN, &l2->flag)) {
......
......@@ -847,21 +847,25 @@ static int emac_probe(struct platform_device *pdev)
if (ndev->irq == -ENXIO) {
netdev_err(ndev, "No irq resource\n");
ret = ndev->irq;
goto out;
goto out_iounmap;
}
db->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(db->clk)) {
ret = PTR_ERR(db->clk);
goto out;
goto out_iounmap;
}
clk_prepare_enable(db->clk);
ret = clk_prepare_enable(db->clk);
if (ret) {
dev_err(&pdev->dev, "Error couldn't enable clock (%d)\n", ret);
goto out_iounmap;
}
ret = sunxi_sram_claim(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Error couldn't map SRAM to device\n");
goto out;
goto out_clk_disable_unprepare;
}
db->phy_node = of_parse_phandle(np, "phy", 0);
......@@ -910,6 +914,10 @@ static int emac_probe(struct platform_device *pdev)
out_release_sram:
sunxi_sram_release(&pdev->dev);
out_clk_disable_unprepare:
clk_disable_unprepare(db->clk);
out_iounmap:
iounmap(db->membase);
out:
dev_err(db->dev, "not found (%d).\n", ret);
......@@ -921,8 +929,12 @@ static int emac_probe(struct platform_device *pdev)
static int emac_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct emac_board_info *db = netdev_priv(ndev);
unregister_netdev(ndev);
sunxi_sram_release(&pdev->dev);
clk_disable_unprepare(db->clk);
iounmap(db->membase);
free_netdev(ndev);
dev_dbg(&pdev->dev, "released and freed device\n");
......
......@@ -1595,7 +1595,7 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
packet->rdesc_count, 1);
/* Make sure ownership is written to the descriptor */
dma_wmb();
wmb();
ring->cur = cur_index + 1;
if (!packet->skb->xmit_more ||
......
......@@ -1811,6 +1811,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
struct netdev_queue *txq;
int processed = 0;
unsigned int tx_packets = 0, tx_bytes = 0;
unsigned int cur;
DBGPR("-->xgbe_tx_poll\n");
......@@ -1818,10 +1819,11 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
if (!ring)
return 0;
cur = ring->cur;
txq = netdev_get_tx_queue(netdev, channel->queue_index);
while ((processed < XGBE_TX_DESC_MAX_PROC) &&
(ring->dirty != ring->cur)) {
(ring->dirty != cur)) {
rdata = XGBE_GET_DESC_DATA(ring, ring->dirty);
rdesc = rdata->rdesc;
......
......@@ -2048,7 +2048,7 @@ static void swphy_poll_timer(unsigned long data)
for (i = 0; i < priv->num_ports; i++) {
struct bcm63xx_enetsw_port *port;
int val, j, up, advertise, lpa, lpa2, speed, duplex, media;
int val, j, up, advertise, lpa, speed, duplex, media;
int external_phy = bcm_enet_port_is_rgmii(i);
u8 override;
......@@ -2091,22 +2091,27 @@ static void swphy_poll_timer(unsigned long data)
lpa = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
MII_LPA);
lpa2 = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
MII_STAT1000);
/* figure out media and duplex from advertise and LPA values */
media = mii_nway_result(lpa & advertise);
duplex = (media & ADVERTISE_FULL) ? 1 : 0;
if (lpa2 & LPA_1000FULL)
duplex = 1;
if (lpa2 & (LPA_1000FULL | LPA_1000HALF))
speed = 1000;
else {
if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
speed = 100;
else
speed = 10;
if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
speed = 100;
else
speed = 10;
if (val & BMSR_ESTATEN) {
advertise = bcmenet_sw_mdio_read(priv, external_phy,
port->phy_id, MII_CTRL1000);
lpa = bcmenet_sw_mdio_read(priv, external_phy,
port->phy_id, MII_STAT1000);
if (advertise & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
&& lpa & (LPA_1000FULL | LPA_1000HALF)) {
speed = 1000;
duplex = (lpa & LPA_1000FULL);
}
}
dev_info(&priv->pdev->dev,
......
......@@ -3,7 +3,7 @@
#
config NET_VENDOR_CAVIUM
tristate "Cavium ethernet drivers"
bool "Cavium ethernet drivers"
depends on PCI
default y
---help---
......
......@@ -1406,6 +1406,12 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
data[i++] = (i40e_gstrings_veb_stats[j].sizeof_stat ==
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
for (j = 0; j < I40E_MAX_TRAFFIC_CLASS; j++) {
data[i++] = veb->tc_stats.tc_tx_packets[j];
data[i++] = veb->tc_stats.tc_tx_bytes[j];
data[i++] = veb->tc_stats.tc_rx_packets[j];
data[i++] = veb->tc_stats.tc_rx_bytes[j];
}
}
for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
......
......@@ -8154,6 +8154,7 @@ static int i40e_sw_init(struct i40e_pf *pf)
if (pf->hw.func_caps.vmdq) {
pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
pf->flags |= I40E_FLAG_VMDQ_ENABLED;
pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
}
#ifdef I40E_FCOE
......
......@@ -759,11 +759,23 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
desc->l4i_chk = 0;
desc->byte_cnt = length;
desc->buf_ptr = dma_map_single(dev->dev.parent, data,
length, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
WARN(1, "dma_map_single failed!\n");
return -ENOMEM;
if (length <= 8 && (uintptr_t)data & 0x7) {
/* Copy unaligned small data fragment to TSO header data area */
memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
data, length);
desc->buf_ptr = txq->tso_hdrs_dma
+ txq->tx_curr_desc * TSO_HEADER_SIZE;
} else {
/* Alignment is okay, map buffer and hand off to hardware */
txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
desc->buf_ptr = dma_map_single(dev->dev.parent, data,
length, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev.parent,
desc->buf_ptr))) {
WARN(1, "dma_map_single failed!\n");
return -ENOMEM;
}
}
cmd_sts = BUFFER_OWNED_BY_DMA;
......@@ -779,7 +791,8 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
}
static inline void
txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length,
u32 *first_cmd_sts, bool first_desc)
{
struct mv643xx_eth_private *mp = txq_to_mp(txq);
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
......@@ -788,6 +801,7 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
int ret;
u32 cmd_csum = 0;
u16 l4i_chk = 0;
u32 cmd_sts;
tx_index = txq->tx_curr_desc;
desc = &txq->tx_desc_area[tx_index];
......@@ -803,9 +817,17 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
desc->byte_cnt = hdr_len;
desc->buf_ptr = txq->tso_hdrs_dma +
txq->tx_curr_desc * TSO_HEADER_SIZE;
desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA | TX_FIRST_DESC |
cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA | TX_FIRST_DESC |
GEN_CRC;
/* Defer updating the first command descriptor until all
* following descriptors have been written.
*/
if (first_desc)
*first_cmd_sts = cmd_sts;
else
desc->cmd_sts = cmd_sts;
txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size)
txq->tx_curr_desc = 0;
......@@ -819,6 +841,8 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
int desc_count = 0;
struct tso_t tso;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
struct tx_desc *first_tx_desc;
u32 first_cmd_sts = 0;
/* Count needed descriptors */
if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
......@@ -826,11 +850,14 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
return -EBUSY;
}
first_tx_desc = &txq->tx_desc_area[txq->tx_curr_desc];
/* Initialize the TSO handler, and prepare the first payload */
tso_start(skb, &tso);
total_len = skb->len - hdr_len;
while (total_len > 0) {
bool first_desc = (desc_count == 0);
char *hdr;
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
......@@ -840,7 +867,8 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
/* prepare packet headers: MAC + IP + TCP */
hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
txq_put_hdr_tso(skb, txq, data_left);
txq_put_hdr_tso(skb, txq, data_left, &first_cmd_sts,
first_desc);
while (data_left > 0) {
int size;
......@@ -860,6 +888,10 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
__skb_queue_tail(&txq->tx_skb, skb);
skb_tx_timestamp(skb);
/* ensure all other descriptors are written before first cmd_sts */
wmb();
first_tx_desc->cmd_sts = first_cmd_sts;
/* clear TX_END status */
mp->work_tx_end &= ~(1 << txq->index);
......
......@@ -31,6 +31,7 @@
#include <linux/pm_runtime.h>
#include <linux/gpio.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/of_device.h>
#include <linux/if_vlan.h>
......@@ -366,6 +367,7 @@ struct cpsw_priv {
spinlock_t lock;
struct platform_device *pdev;
struct net_device *ndev;
struct device_node *phy_node;
struct napi_struct napi_rx;
struct napi_struct napi_tx;
struct device *dev;
......@@ -1146,7 +1148,11 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
if (priv->phy_node)
slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
&cpsw_adjust_link, 0, slave->data->phy_if);
else
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
&cpsw_adjust_link, slave->data->phy_if);
if (IS_ERR(slave->phy)) {
dev_err(priv->dev, "phy %s not found on slave %d\n",
......@@ -1934,11 +1940,12 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
slave->port_vlan = data->dual_emac_res_vlan;
}
static int cpsw_probe_dt(struct cpsw_platform_data *data,
static int cpsw_probe_dt(struct cpsw_priv *priv,
struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct device_node *slave_node;
struct cpsw_platform_data *data = &priv->data;
int i = 0, ret;
u32 prop;
......@@ -2029,6 +2036,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
if (strcmp(slave_node->name, "slave"))
continue;
priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i);
......@@ -2044,7 +2052,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
}
snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
PHY_ID_FMT, mdio->name, phyid);
slave_data->phy_if = of_get_phy_mode(slave_node);
if (slave_data->phy_if < 0) {
dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
......@@ -2245,7 +2252,7 @@ static int cpsw_probe(struct platform_device *pdev)
/* Select default pin state */
pinctrl_pm_select_default_state(&pdev->dev);
if (cpsw_probe_dt(&priv->data, pdev)) {
if (cpsw_probe_dt(priv, pdev)) {
dev_err(&pdev->dev, "cpsw: platform data missing\n");
ret = -ENODEV;
goto clean_runtime_disable_ret;
......
......@@ -594,14 +594,12 @@ static struct rtable *geneve_get_rt(struct sk_buff *skb,
rt = ip_route_output_key(geneve->net, fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
dev->stats.tx_carrier_errors++;
return rt;
return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
dev->stats.collisions++;
ip_rt_put(rt);
return ERR_PTR(-EINVAL);
return ERR_PTR(-ELOOP);
}
return rt;
}
......@@ -627,12 +625,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel_info *info = NULL;
struct rtable *rt = NULL;
const struct iphdr *iip; /* interior IP header */
int err = -EINVAL;
struct flowi4 fl4;
__u8 tos, ttl;
__be16 sport;
bool udp_csum;
__be16 df;
int err;
if (geneve->collect_md) {
info = skb_tunnel_info(skb);
......@@ -647,7 +645,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
rt = geneve_get_rt(skb, dev, &fl4, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
dev->stats.tx_carrier_errors++;
err = PTR_ERR(rt);
goto tx_error;
}
......@@ -699,10 +697,37 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
tx_error:
dev_kfree_skb(skb);
err:
dev->stats.tx_errors++;
if (err == -ELOOP)
dev->stats.collisions++;
else if (err == -ENETUNREACH)
dev->stats.tx_carrier_errors++;
else
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
struct geneve_dev *geneve = netdev_priv(dev);
struct rtable *rt;
struct flowi4 fl4;
if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL;
rt = geneve_get_rt(skb, dev, &fl4, info);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
info->key.u.ipv4.src = fl4.saddr;
info->key.tp_src = udp_flow_src_port(geneve->net, skb,
1, USHRT_MAX, true);
info->key.tp_dst = geneve->dst_port;
return 0;
}
static const struct net_device_ops geneve_netdev_ops = {
.ndo_init = geneve_init,
.ndo_uninit = geneve_uninit,
......@@ -713,6 +738,7 @@ static const struct net_device_ops geneve_netdev_ops = {
.ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_fill_metadata_dst = geneve_fill_metadata_dst,
};
static void geneve_get_drvinfo(struct net_device *dev,
......
......@@ -137,7 +137,7 @@ static const struct proto_ops macvtap_socket_ops;
#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
NETIF_F_TSO6 | NETIF_F_UFO)
#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST)
static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev)
{
......
......@@ -141,6 +141,11 @@ config MICREL_PHY
---help---
Supports the KSZ9021, VSC8201, KS8001 PHYs.
config DP83848_PHY
tristate "Driver for Texas Instruments DP83848 PHY"
---help---
Supports the DP83848 PHY.
config DP83867_PHY
tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
---help---
......
......@@ -26,6 +26,7 @@ obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o
obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o
obj-$(CONFIG_NATIONAL_PHY) += national.o
obj-$(CONFIG_DP83640_PHY) += dp83640.o
obj-$(CONFIG_DP83848_PHY) += dp83848.o
obj-$(CONFIG_DP83867_PHY) += dp83867.o
obj-$(CONFIG_STE10XP) += ste10Xp.o
obj-$(CONFIG_MICREL_PHY) += micrel.o
......
/*
* Driver for the Texas Instruments DP83848 PHY
*
* Copyright (C) 2015 Texas Instruments Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/phy.h>
#define DP83848_PHY_ID 0x20005c90
/* Registers */
#define DP83848_MICR 0x11
#define DP83848_MISR 0x12
/* MICR Register Fields */
#define DP83848_MICR_INT_OE BIT(0) /* Interrupt Output Enable */
#define DP83848_MICR_INTEN BIT(1) /* Interrupt Enable */
/* MISR Register Fields */
#define DP83848_MISR_RHF_INT_EN BIT(0) /* Receive Error Counter */
#define DP83848_MISR_FHF_INT_EN BIT(1) /* False Carrier Counter */
#define DP83848_MISR_ANC_INT_EN BIT(2) /* Auto-negotiation complete */
#define DP83848_MISR_DUP_INT_EN BIT(3) /* Duplex Status */
#define DP83848_MISR_SPD_INT_EN BIT(4) /* Speed status */
#define DP83848_MISR_LINK_INT_EN BIT(5) /* Link status */
#define DP83848_MISR_ED_INT_EN BIT(6) /* Energy detect */
#define DP83848_MISR_LQM_INT_EN BIT(7) /* Link Quality Monitor */
static int dp83848_ack_interrupt(struct phy_device *phydev)
{
int err = phy_read(phydev, DP83848_MISR);
return err < 0 ? err : 0;
}
static int dp83848_config_intr(struct phy_device *phydev)
{
int err;
if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
err = phy_write(phydev, DP83848_MICR,
DP83848_MICR_INT_OE |
DP83848_MICR_INTEN);
if (err < 0)
return err;
return phy_write(phydev, DP83848_MISR,
DP83848_MISR_ANC_INT_EN |
DP83848_MISR_DUP_INT_EN |
DP83848_MISR_SPD_INT_EN |
DP83848_MISR_LINK_INT_EN);
}
return phy_write(phydev, DP83848_MICR, 0x0);
}
static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
{ DP83848_PHY_ID, 0xfffffff0 },
{ }
};
MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
static struct phy_driver dp83848_driver[] = {
{
.phy_id = DP83848_PHY_ID,
.phy_id_mask = 0xfffffff0,
.name = "TI DP83848",
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.soft_reset = genphy_soft_reset,
.config_init = genphy_config_init,
.suspend = genphy_suspend,
.resume = genphy_resume,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
/* IRQ related */
.ack_interrupt = dp83848_ack_interrupt,
.config_intr = dp83848_config_intr,
.driver = { .owner = THIS_MODULE, },
},
};
module_phy_driver(dp83848_driver);
MODULE_DESCRIPTION("Texas Instruments DP83848 PHY driver");
MODULE_AUTHOR("Andrew F. Davis <afd@ti.com");
MODULE_LICENSE("GPL");
......@@ -514,6 +514,27 @@ static int ksz8873mll_read_status(struct phy_device *phydev)
return 0;
}
static int ksz9031_read_status(struct phy_device *phydev)
{
int err;
int regval;
err = genphy_read_status(phydev);
if (err)
return err;
/* Make sure the PHY is not broken. Read idle error count,
* and reset the PHY if it is maxed out.
*/
regval = phy_read(phydev, MII_STAT1000);
if ((regval & 0xFF) == 0xFF) {
phy_init_hw(phydev);
phydev->link = 0;
}
return 0;
}
static int ksz8873mll_config_aneg(struct phy_device *phydev)
{
return 0;
......@@ -772,7 +793,7 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz9021_type,
.config_init = ksz9031_config_init,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
.read_status = ksz9031_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.suspend = genphy_suspend,
......
......@@ -43,16 +43,25 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
static int smsc_phy_config_init(struct phy_device *phydev)
{
int __maybe_unused len;
struct device *dev __maybe_unused = &phydev->dev;
struct device_node *of_node __maybe_unused = dev->of_node;
int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
int enable_energy = 1;
if (rc < 0)
return rc;
/* Enable energy detect mode for this SMSC Transceivers */
rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
rc | MII_LAN83C185_EDPWRDOWN);
if (rc < 0)
return rc;
if (of_find_property(of_node, "smsc,disable-energy-detect", &len))
enable_energy = 0;
if (enable_energy) {
/* Enable energy detect mode for this SMSC Transceivers */
rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
rc | MII_LAN83C185_EDPWRDOWN);
if (rc < 0)
return rc;
}
return smsc_phy_ack_interrupt(phydev);
}
......
......@@ -589,7 +589,7 @@ static int pppoe_release(struct socket *sock)
po = pppox_sk(sk);
if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) {
if (po->pppoe_dev) {
dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
}
......
......@@ -711,6 +711,10 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */
{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
{QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */
{QMI_FIXED_INTF(0x1199, 0x9070, 8)}, /* Sierra Wireless MC74xx/EM74xx */
{QMI_FIXED_INTF(0x1199, 0x9070, 10)}, /* Sierra Wireless MC74xx/EM74xx */
{QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */
{QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
......
......@@ -2360,6 +2360,46 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
struct ip_tunnel_info *info,
__be16 sport, __be16 dport)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct rtable *rt;
struct flowi4 fl4;
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_tos = RT_TOS(info->key.tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = info->key.u.ipv4.dst;
rt = ip_route_output_key(vxlan->net, &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
info->key.u.ipv4.src = fl4.saddr;
info->key.tp_src = sport;
info->key.tp_dst = dport;
return 0;
}
static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct ip_tunnel_info *info = skb_tunnel_info(skb);
__be16 sport, dport;
sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
if (ip_tunnel_info_af(info) == AF_INET)
return egress_ipv4_tun_info(dev, skb, info, sport, dport);
return -EINVAL;
}
static const struct net_device_ops vxlan_netdev_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
......@@ -2374,6 +2414,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
/* Info for udev, that this is a virtual tunnel endpoint */
......
......@@ -1706,19 +1706,19 @@ static void xennet_destroy_queues(struct netfront_info *info)
}
static int xennet_create_queues(struct netfront_info *info,
unsigned int num_queues)
unsigned int *num_queues)
{
unsigned int i;
int ret;
info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
GFP_KERNEL);
if (!info->queues)
return -ENOMEM;
rtnl_lock();
for (i = 0; i < num_queues; i++) {
for (i = 0; i < *num_queues; i++) {
struct netfront_queue *queue = &info->queues[i];
queue->id = i;
......@@ -1728,7 +1728,7 @@ static int xennet_create_queues(struct netfront_info *info,
if (ret < 0) {
dev_warn(&info->netdev->dev,
"only created %d queues\n", i);
num_queues = i;
*num_queues = i;
break;
}
......@@ -1738,11 +1738,11 @@ static int xennet_create_queues(struct netfront_info *info,
napi_enable(&queue->napi);
}
netif_set_real_num_tx_queues(info->netdev, num_queues);
netif_set_real_num_tx_queues(info->netdev, *num_queues);
rtnl_unlock();
if (num_queues == 0) {
if (*num_queues == 0) {
dev_err(&info->netdev->dev, "no queues\n");
return -EINVAL;
}
......@@ -1788,7 +1788,7 @@ static int talk_to_netback(struct xenbus_device *dev,
if (info->queues)
xennet_destroy_queues(info);
err = xennet_create_queues(info, num_queues);
err = xennet_create_queues(info, &num_queues);
if (err < 0)
goto destroy_ring;
......
......@@ -237,6 +237,10 @@
#define KASAN_ABI_VERSION 3
#endif
#if GCC_VERSION >= 50000
#define CC_HAVE_BUILTIN_OVERFLOW
#endif
#endif /* gcc version >= 40000 specific checks */
#if !defined(__noclone)
......
......@@ -1055,6 +1055,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* This function is used to pass protocol port error state information
* to the switch driver. The switch driver can react to the proto_down
* by doing a phys down on the associated switch port.
* int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
* This function is used to get egress tunnel information for given skb.
* This is useful for retrieving outer tunnel header parameters while
* sampling packet.
*
*/
struct net_device_ops {
......@@ -1230,6 +1234,8 @@ struct net_device_ops {
int (*ndo_get_iflink)(const struct net_device *dev);
int (*ndo_change_proto_down)(struct net_device *dev,
bool proto_down);
int (*ndo_fill_metadata_dst)(struct net_device *dev,
struct sk_buff *skb);
};
/**
......@@ -2210,6 +2216,7 @@ void dev_add_offload(struct packet_offload *po);
void dev_remove_offload(struct packet_offload *po);
int dev_get_iflink(const struct net_device *dev);
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
......
#pragma once
#include <linux/kernel.h>
#ifdef CC_HAVE_BUILTIN_OVERFLOW
#define overflow_usub __builtin_usub_overflow
#else
static inline bool overflow_usub(unsigned int a, unsigned int b,
unsigned int *res)
{
*res = a - b;
return *res > a ? true : false;
}
#endif
......@@ -60,6 +60,38 @@ static inline struct metadata_dst *tun_rx_dst(int md_size)
return tun_dst;
}
static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
int md_size = md_dst->u.tun_info.options_len;
struct metadata_dst *new_md;
if (!md_dst)
return ERR_PTR(-EINVAL);
new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
if (!new_md)
return ERR_PTR(-ENOMEM);
memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
sizeof(struct ip_tunnel_info) + md_size);
skb_dst_drop(skb);
dst_hold(&new_md->dst);
skb_dst_set(skb, &new_md->dst);
return new_md;
}
static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb)
{
struct metadata_dst *dst;
dst = tun_dst_unclone(skb);
if (IS_ERR(dst))
return NULL;
return &dst->u.tun_info;
}
static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
__be16 flags,
__be64 tunnel_id,
......
......@@ -622,7 +622,8 @@ struct ovs_action_hash {
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
* @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
* table. This allows future packets for the same connection to be identified
* as 'established' or 'related'.
* as 'established' or 'related'. The flow key for the current packet will
* retain the pre-commit connection state.
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
* @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
* mask, the corresponding bit in the value is copied to the connection
......
......@@ -99,6 +99,7 @@
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
......@@ -681,6 +682,32 @@ int dev_get_iflink(const struct net_device *dev)
}
EXPORT_SYMBOL(dev_get_iflink);
/**
* dev_fill_metadata_dst - Retrieve tunnel egress information.
* @dev: targeted interface
* @skb: The packet.
*
* For better visibility of tunnel traffic OVS needs to retrieve
* egress tunnel information for a packet. Following API allows
* user to get this info.
*/
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info;
if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
return -EINVAL;
info = skb_tunnel_info_unclone(skb);
if (!info)
return -ENOMEM;
if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
return -EINVAL;
return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
}
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
......
......@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
static struct rtable *gre_get_rt(struct sk_buff *skb,
struct net_device *dev,
struct flowi4 *fl,
const struct ip_tunnel_key *key)
{
struct net *net = dev_net(dev);
memset(fl, 0, sizeof(*fl));
fl->daddr = key->u.ipv4.dst;
fl->saddr = key->u.ipv4.src;
fl->flowi4_tos = RT_TOS(key->tos);
fl->flowi4_mark = skb->mark;
fl->flowi4_proto = IPPROTO_GRE;
return ip_route_output_key(net, fl);
}
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
struct net *net = dev_net(dev);
const struct ip_tunnel_key *key;
struct flowi4 fl;
struct rtable *rt;
......@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
memset(&fl, 0, sizeof(fl));
fl.daddr = key->u.ipv4.dst;
fl.saddr = key->u.ipv4.src;
fl.flowi4_tos = RT_TOS(key->tos);
fl.flowi4_mark = skb->mark;
fl.flowi4_proto = IPPROTO_GRE;
rt = ip_route_output_key(net, &fl);
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
......@@ -566,6 +575,24 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_dropped++;
}
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
struct rtable *rt;
struct flowi4 fl4;
if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL;
rt = gre_get_rt(skb, dev, &fl4, &info->key);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
info->key.u.ipv4.src = fl4.saddr;
return 0;
}
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
......@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_fill_metadata_dst = gre_fill_metadata_dst,
};
static void ipgre_tap_setup(struct net_device *dev)
......
......@@ -75,6 +75,7 @@ endif # NF_TABLES
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
help
This option enables the nf_dup_ipv4 core, which duplicates an IPv4
packet to be rerouted to another destination.
......
......@@ -60,9 +60,7 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
if (FIB_RES_DEV(res) == dev)
dev_match = true;
#endif
if (dev_match || flags & XT_RPFILTER_LOOSE)
return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
return dev_match;
return dev_match || flags & XT_RPFILTER_LOOSE;
}
static bool rpfilter_is_local(const struct sk_buff *skb)
......
......@@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
/* alpha = (1 - g) * alpha + g * F */
alpha -= alpha >> dctcp_shift_g;
alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
if (bytes_ecn) {
/* If dctcp_shift_g == 1, a 32bit value would overflow
* after 8 Mbytes.
......
......@@ -3410,7 +3410,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
skb_mstamp_get(&skb->skb_mstamp);
NET_INC_STATS_BH(sock_net(sk), mib);
NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
......
......@@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
mtu = dst_mtu(skb_dst(skb));
if (skb->len > mtu) {
skb->protocol = htons(ETH_P_IP);
if (skb->sk)
xfrm_local_error(skb, mtu);
else
......
......@@ -32,6 +32,7 @@ struct fib6_rule {
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.flags = FIB_LOOKUP_NOREF,
......@@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
if (arg.result)
return arg.result;
rt = arg.result;
dst_hold(&net->ipv6.ip6_null_entry->dst);
return &net->ipv6.ip6_null_entry->dst;
if (!rt) {
dst_hold(&net->ipv6.ip6_null_entry->dst);
return &net->ipv6.ip6_null_entry->dst;
}
if (rt->rt6i_flags & RTF_REJECT &&
rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
}
return &rt->dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
......
......@@ -286,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
struct rt6_info *rt;
rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
if (rt->rt6i_flags & RTF_REJECT &&
rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
}
return &rt->dst;
}
static void __net_init fib6_tables_init(struct net *net)
......
......@@ -28,6 +28,7 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/overflow-arith.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/net.h>
......@@ -596,7 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (np->frag_size)
mtu = np->frag_size;
}
mtu -= hlen + sizeof(struct frag_hdr);
if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
mtu <= 7)
goto fail_toobig;
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
......
......@@ -58,6 +58,7 @@ endif # NF_TABLES
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
help
This option enables the nf_dup_ipv6 core, which duplicates an IPv6
packet to be rerouted to another destination.
......
......@@ -1171,6 +1171,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
{
struct dst_entry *dst;
int flags = 0;
bool any_src;
dst = l3mdev_rt6_dst_by_oif(net, fl6);
if (dst)
......@@ -1178,11 +1179,12 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
fl6->flowi6_iif = LOOPBACK_IFINDEX;
any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
fl6->flowi6_oif)
(fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
if (!ipv6_addr_any(&fl6->saddr))
if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
......
......@@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
if (xfrm6_local_dontfrag(skb))
xfrm6_local_rxpmtu(skb, mtu);
......@@ -143,6 +144,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
int mtu;
bool toobig;
#ifdef CONFIG_NETFILTER
if (!x) {
......@@ -151,25 +153,29 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
#endif
if (x->props.mode != XFRM_MODE_TUNNEL)
goto skip_frag;
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
mtu = dst_mtu(skb_dst(skb));
if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
toobig = skb->len > mtu && !skb_is_gso(skb);
if (toobig && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
if (x->props.mode == XFRM_MODE_TUNNEL &&
((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)))) {
if (toobig || dst_allfrag(skb_dst(skb)))
return ip6_fragment(net, sk, skb,
__xfrm6_output_finish);
}
skip_frag:
return x->outer_mode->afinfo->output_finish(sk, skb);
}
......
......@@ -177,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
return;
case IPPROTO_ICMPV6:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
if (!onlyproto && (nh + offset + 2 < skb->data ||
pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
u8 *icmp;
nh = skb_network_header(skb);
......@@ -191,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
offset += ipv6_optlen(exthdr);
if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
if (!onlyproto && (nh + offset + 3 < skb->data ||
pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
struct ip6_mh *mh;
nh = skb_network_header(skb);
......
......@@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
for (element = hashbin_get_first(iter->hashbin);
element != NULL;
element = hashbin_get_next(iter->hashbin)) {
if (!off || *off-- == 0) {
if (!off || (*off)-- == 0) {
/* NB: hashbin left locked */
return element;
}
......
......@@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
/* Error is cleare after succecful sending to at least one
/* Error is cleared after successful sending to at least one
* registered KM */
if ((broadcast_flags & BROADCAST_REGISTERED) && err)
err = err2;
......
......@@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
#endif
synchronize_net();
nf_queue_nf_hook_drop(net, &entry->ops);
/* other cpu might still process nfqueue verdict that used reg */
synchronize_net();
kfree(entry);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
......
......@@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_timeout_expired(ext_timeout(n, set))))
n = NULL;
e = kzalloc(set->dsize, GFP_KERNEL);
e = kzalloc(set->dsize, GFP_ATOMIC);
if (!e)
return -ENOMEM;
e->id = d->id;
......
......@@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
int pos, idx, shift;
err = 0;
netlink_table_grab();
netlink_lock_table();
for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
if (len - pos < sizeof(u32))
break;
......@@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
}
if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
err = -EFAULT;
netlink_table_ungrab();
netlink_unlock_table();
break;
}
case NETLINK_CAP_ACK:
......
......@@ -769,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
......@@ -797,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
if (vport) {
int err;
upcall.egress_tun_info = &info;
err = ovs_vport_get_egress_tun_info(vport, skb,
&upcall);
if (err)
upcall.egress_tun_info = NULL;
err = dev_fill_metadata_dst(vport->dev, skb);
if (!err)
upcall.egress_tun_info = skb_tunnel_info(skb);
}
break;
......
......@@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
state = ovs_ct_get_state(ctinfo);
if (!nf_ct_is_confirmed(ct))
state |= OVS_CS_F_NEW;
if (ct->master)
state |= OVS_CS_F_RELATED;
zone = nf_ct_zone(ct);
......@@ -222,9 +224,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
struct nf_conn *ct;
int err;
if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
return -ENOTSUPP;
/* The connection could be invalid, in which case set_label is no-op.*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
......@@ -377,7 +376,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
return true;
}
static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
......@@ -408,6 +407,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
}
}
ovs_ct_update_key(skb, key, true);
return 0;
}
......@@ -430,8 +431,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = __ovs_ct_lookup(net, key, info, skb);
if (err)
return err;
ovs_ct_update_key(skb, key, true);
}
return 0;
......@@ -460,8 +459,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
return -EINVAL;
ovs_ct_update_key(skb, key, true);
return 0;
}
......@@ -587,6 +584,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
case OVS_CT_ATTR_MARK: {
struct md_mark *mark = nla_data(a);
if (!mark->mask) {
OVS_NLERR(log, "ct_mark mask cannot be 0");
return -EINVAL;
}
info->mark = *mark;
break;
}
......@@ -595,6 +596,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
case OVS_CT_ATTR_LABELS: {
struct md_labels *labels = nla_data(a);
if (!labels_nonzero(&labels->mask)) {
OVS_NLERR(log, "ct_labels mask cannot be 0");
return -EINVAL;
}
info->labels = *labels;
break;
}
......@@ -705,11 +710,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
&ct_info->mark))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
labels_nonzero(&ct_info->labels.mask) &&
nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
&ct_info->labels))
return -EMSGSIZE;
......
......@@ -35,12 +35,9 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
void ovs_ct_free_action(const struct nlattr *a);
static inline bool ovs_ct_state_supported(u32 state)
{
return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED |
OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR |
OVS_CS_F_INVALID | OVS_CS_F_TRACKED));
}
#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
OVS_CS_F_INVALID | OVS_CS_F_TRACKED)
#else
#include <linux/errno.h>
......@@ -53,11 +50,6 @@ static inline bool ovs_ct_verify(struct net *net, int attr)
return false;
}
static inline bool ovs_ct_state_supported(u32 state)
{
return false;
}
static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
const struct sw_flow_key *key,
struct sw_flow_actions **acts, bool log)
......@@ -94,5 +86,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
}
static inline void ovs_ct_free_action(const struct nlattr *a) { }
#define CT_SUPPORTED_MASK 0
#endif /* CONFIG_NF_CONNTRACK */
#endif /* ovs_conntrack.h */
......@@ -489,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
err = ovs_nla_put_egress_tunnel_key(user_skb,
upcall_info->egress_tun_info,
upcall_info->egress_tun_opts);
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
......
......@@ -117,7 +117,6 @@ struct ovs_skb_cb {
*/
struct dp_upcall_info {
struct ip_tunnel_info *egress_tun_info;
const void *egress_tun_opts;
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
......
......@@ -764,7 +764,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (tun_opts) {
if (swkey_tun_opts_len) {
if (output->tun_flags & TUNNEL_GENEVE_OPT &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
......@@ -798,14 +798,13 @@ static int ip_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
const struct ip_tunnel_info *egress_tun_info,
const void *egress_tun_opts)
int ovs_nla_put_tunnel_info(struct sk_buff *skb,
struct ip_tunnel_info *tun_info)
{
return __ip_tun_to_nlattr(skb, &egress_tun_info->key,
egress_tun_opts,
egress_tun_info->options_len,
ip_tunnel_info_af(egress_tun_info));
return __ip_tun_to_nlattr(skb, &tun_info->key,
ip_tunnel_info_opts(tun_info),
tun_info->options_len,
ip_tunnel_info_af(tun_info));
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
......@@ -866,7 +865,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
if (!is_mask && !ovs_ct_state_supported(ct_state)) {
if (ct_state & ~CT_SUPPORTED_MASK) {
OVS_NLERR(log, "ct_state flags %08x unsupported",
ct_state);
return -EINVAL;
......@@ -1149,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,
} else {
memset(nla_data(nla), val, nla_len(nla));
}
if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
}
}
......@@ -2432,11 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
err = ip_tun_to_nlattr(skb, &tun_info->key,
tun_info->options_len ?
ip_tunnel_info_opts(tun_info) : NULL,
tun_info->options_len,
ip_tunnel_info_af(tun_info));
err = ovs_nla_put_tunnel_info(skb, tun_info);
if (err)
return err;
nla_nest_end(skb, start);
......
......@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_get_match(struct net *, struct sw_flow_match *,
const struct nlattr *key, const struct nlattr *mask,
bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ip_tunnel_info *,
const void *egress_tun_opts);
int ovs_nla_put_tunnel_info(struct sk_buff *skb,
struct ip_tunnel_info *tun_info);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
......
......@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
return 0;
}
static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct dp_upcall_info *upcall)
{
struct geneve_port *geneve_port = geneve_vport(vport);
struct net *net = ovs_dp_get_net(vport->dp);
__be16 dport = htons(geneve_port->port_no);
__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
skb, IPPROTO_UDP, sport, dport);
}
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
......@@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
.get_options = geneve_get_options,
.send = dev_queue_xmit,
.owner = THIS_MODULE,
.get_egress_tun_info = geneve_get_egress_tun_info,
};
static int __init ovs_geneve_tnl_init(void)
......
......@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct dp_upcall_info *upcall)
{
return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
skb, IPPROTO_GRE, 0, 0);
}
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.send = dev_queue_xmit,
.get_egress_tun_info = gre_get_egress_tun_info,
.destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
......
......@@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)
free_netdev(dev);
}
static struct rtnl_link_stats64 *
internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
int i;
memset(stats, 0, sizeof(*stats));
stats->rx_errors = dev->stats.rx_errors;
stats->tx_errors = dev->stats.tx_errors;
stats->tx_dropped = dev->stats.tx_dropped;
stats->rx_dropped = dev->stats.rx_dropped;
for_each_possible_cpu(i) {
const struct pcpu_sw_netstats *percpu_stats;
struct pcpu_sw_netstats local_stats;
unsigned int start;
percpu_stats = per_cpu_ptr(dev->tstats, i);
do {
start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
local_stats = *percpu_stats;
} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;
stats->tx_bytes += local_stats.tx_bytes;
stats->tx_packets += local_stats.tx_packets;
}
return stats;
}
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_get_stats,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
......@@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_vport;
}
vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vport->dev->tstats) {
err = -ENOMEM;
goto error_free_netdev;
}
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
......@@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
rtnl_lock();
err = register_netdevice(vport->dev);
if (err)
goto error_free_netdev;
goto error_unlock;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
......@@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
return vport;
error_free_netdev:
error_unlock:
rtnl_unlock();
free_percpu(vport->dev->tstats);
error_free_netdev:
free_netdev(vport->dev);
error_free_vport:
ovs_vport_free(vport);
......@@ -198,7 +238,7 @@ static void internal_dev_destroy(struct vport *vport)
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
free_percpu(vport->dev->tstats);
rtnl_unlock();
}
......
......@@ -146,32 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct dp_upcall_info *upcall)
{
struct vxlan_dev *vxlan = netdev_priv(vport->dev);
struct net *net = ovs_dp_get_net(vport->dp);
unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
__be16 dst_port = vxlan_dev_dst_port(vxlan, family);
__be16 src_port;
int port_min;
int port_max;
inet_get_local_port_range(net, &port_min, &port_max);
src_port = udp_flow_src_port(net, skb, 0, 0, true);
return ovs_tunnel_get_egress_info(upcall, net,
skb, IPPROTO_UDP,
src_port, dst_port);
}
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options,
.send = dev_queue_xmit,
.get_egress_tun_info = vxlan_get_egress_tun_info,
};
static int __init ovs_vxlan_tnl_init(void)
......
......@@ -480,64 +480,6 @@ void ovs_vport_deferred_free(struct vport *vport)
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
struct net *net,
struct sk_buff *skb,
u8 ipproto,
__be16 tp_src,
__be16 tp_dst)
{
struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
const struct ip_tunnel_key *tun_key;
u32 skb_mark = skb->mark;
struct rtable *rt;
struct flowi4 fl;
if (unlikely(!tun_info))
return -EINVAL;
if (ip_tunnel_info_af(tun_info) != AF_INET)
return -EINVAL;
tun_key = &tun_info->key;
/* Route lookup to get srouce IP address.
* The process may need to be changed if the corresponding process
* in vports ops changed.
*/
rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
/* Generate egress_tun_info based on tun_info,
* saddr, tp_src and tp_dst
*/
ip_tunnel_key_init(&egress_tun_info->key,
fl.saddr, tun_key->u.ipv4.dst,
tun_key->tos,
tun_key->ttl,
tp_src, tp_dst,
tun_key->tun_id,
tun_key->tun_flags);
egress_tun_info->options_len = tun_info->options_len;
egress_tun_info->mode = tun_info->mode;
upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
return 0;
}
EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct dp_upcall_info *upcall)
{
/* get_egress_tun_info() is only implemented on tunnel ports. */
if (unlikely(!vport->ops->get_egress_tun_info))
return -EINVAL;
return vport->ops->get_egress_tun_info(vport, skb, upcall);
}
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
......
......@@ -27,7 +27,6 @@
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
#include <net/route.h>
#include "datapath.h"
......@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
struct net *net,
struct sk_buff *,
u8 ipproto,
__be16 tp_src,
__be16 tp_dst);
int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct dp_upcall_info *upcall);
/**
* struct vport_portids - array of netlink portids of a vport.
* must be protected by rcu.
......@@ -140,8 +129,6 @@ struct vport_parms {
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
* @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
* a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
......@@ -154,9 +141,6 @@ struct vport_ops {
int (*get_options)(const struct vport *, struct sk_buff *);
netdev_tx_t (*send) (struct sk_buff *skb);
int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
struct dp_upcall_info *upcall);
struct module *owner;
struct list_head list;
};
......
......@@ -94,10 +94,14 @@ __init int net_sysctl_init(void)
goto out;
ret = register_pernet_subsys(&sysctl_pernet_ops);
if (ret)
goto out;
goto out1;
register_sysctl_root(&net_sysctl_root);
out:
return ret;
out1:
unregister_sysctl_table(net_header);
net_header = NULL;
goto out;
}
struct ctl_table_header *register_net_sysctl(struct net *net,
......
......@@ -42,7 +42,8 @@
#include "core.h"
#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
......@@ -908,9 +909,10 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
if (!bcl)
return -ENOPROTOOPT;
if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
if (limit < BCLINK_WIN_MIN)
limit = BCLINK_WIN_MIN;
if (limit > TIPC_MAX_LINK_WIN)
return -EINVAL;
tipc_bclink_lock(net);
tipc_link_set_queue_limits(bcl, limit);
tipc_bclink_unlock(net);
......
......@@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
{
struct sk_buff *head = *headbuf;
struct sk_buff *frag = *buf;
struct sk_buff *tail;
struct sk_buff *tail = NULL;
struct tipc_msg *msg;
u32 fragid;
int delta;
......@@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
goto err;
head = *headbuf = frag;
skb_frag_list_init(head);
TIPC_SKB_CB(head)->tail = NULL;
*buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
if (skb_is_nonlinear(head)) {
skb_walk_frags(head, tail) {
TIPC_SKB_CB(head)->tail = tail;
}
} else {
skb_frag_list_init(head);
}
return 0;
}
......
......@@ -52,6 +52,8 @@
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
#define UDP_MIN_HEADROOM 28
static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
[TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
[TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
......@@ -156,6 +158,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
struct sk_buff *clone;
struct rtable *rt;
if (skb_headroom(skb) < UDP_MIN_HEADROOM)
pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
clone = skb_clone(skb, GFP_ATOMIC);
skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
......
......@@ -1948,13 +1948,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
err = misc_register(&vsock_device);
if (err) {
pr_err("Failed to register misc device\n");
return -ENOENT;
goto err_reset_transport;
}
err = proto_register(&vsock_proto, 1); /* we want our slab */
if (err) {
pr_err("Cannot register vsock protocol\n");
goto err_misc_deregister;
goto err_deregister_misc;
}
err = sock_register(&vsock_family_ops);
......@@ -1969,8 +1969,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
err_unregister_proto:
proto_unregister(&vsock_proto);
err_misc_deregister:
err_deregister_misc:
misc_deregister(&vsock_device);
err_reset_transport:
transport = NULL;
err_busy:
mutex_unlock(&vsock_register_mutex);
......
......@@ -40,13 +40,11 @@
static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
static void vmci_transport_peer_attach_cb(u32 sub_id,
const struct vmci_event_data *ed,
void *client_data);
static void vmci_transport_peer_detach_cb(u32 sub_id,
const struct vmci_event_data *ed,
void *client_data);
static void vmci_transport_recv_pkt_work(struct work_struct *work);
static void vmci_transport_cleanup(struct work_struct *work);
static int vmci_transport_recv_listen(struct sock *sk,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connecting_server(
......@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
struct vmci_transport_packet pkt;
};
static LIST_HEAD(vmci_transport_cleanup_list);
static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
VMCI_INVALID_ID };
static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
......@@ -791,44 +793,6 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
return err;
}
static void vmci_transport_peer_attach_cb(u32 sub_id,
const struct vmci_event_data *e_data,
void *client_data)
{
struct sock *sk = client_data;
const struct vmci_event_payload_qp *e_payload;
struct vsock_sock *vsk;
e_payload = vmci_event_data_const_payload(e_data);
vsk = vsock_sk(sk);
/* We don't ask for delayed CBs when we subscribe to this event (we
* pass 0 as flags to vmci_event_subscribe()). VMCI makes no
* guarantees in that case about what context we might be running in,
* so it could be BH or process, blockable or non-blockable. So we
* need to account for all possible contexts here.
*/
local_bh_disable();
bh_lock_sock(sk);
/* XXX This is lame, we should provide a way to lookup sockets by
* qp_handle.
*/
if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
e_payload->handle)) {
/* XXX This doesn't do anything, but in the future we may want
* to set a flag here to verify the attach really did occur and
* we weren't just sent a datagram claiming it was.
*/
goto out;
}
out:
bh_unlock_sock(sk);
local_bh_enable();
}
static void vmci_transport_handle_detach(struct sock *sk)
{
struct vsock_sock *vsk;
......@@ -871,28 +835,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
const struct vmci_event_data *e_data,
void *client_data)
{
struct sock *sk = client_data;
struct vmci_transport *trans = client_data;
const struct vmci_event_payload_qp *e_payload;
struct vsock_sock *vsk;
e_payload = vmci_event_data_const_payload(e_data);
vsk = vsock_sk(sk);
if (vmci_handle_is_invalid(e_payload->handle))
return;
/* Same rules for locking as for peer_attach_cb(). */
local_bh_disable();
bh_lock_sock(sk);
/* XXX This is lame, we should provide a way to lookup sockets by
* qp_handle.
*/
if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
e_payload->handle))
vmci_transport_handle_detach(sk);
if (vmci_handle_is_invalid(e_payload->handle) ||
vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
return;
bh_unlock_sock(sk);
local_bh_enable();
/* We don't ask for delayed CBs when we subscribe to this event (we
* pass 0 as flags to vmci_event_subscribe()). VMCI makes no
* guarantees in that case about what context we might be running in,
* so it could be BH or process, blockable or non-blockable. So we
* need to account for all possible contexts here.
*/
spin_lock_bh(&trans->lock);
if (!trans->sk)
goto out;
/* Apart from here, trans->lock is only grabbed as part of sk destruct,
* where trans->sk isn't locked.
*/
bh_lock_sock(trans->sk);
vmci_transport_handle_detach(trans->sk);
bh_unlock_sock(trans->sk);
out:
spin_unlock_bh(&trans->lock);
}
static void vmci_transport_qp_resumed_cb(u32 sub_id,
......@@ -1181,7 +1155,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
*/
err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
vmci_transport_peer_detach_cb,
pending, &detach_sub_id);
vmci_trans(vpending), &detach_sub_id);
if (err < VMCI_SUCCESS) {
vmci_transport_send_reset(pending, pkt);
err = vmci_transport_error_to_vsock_error(err);
......@@ -1321,7 +1295,6 @@ vmci_transport_recv_connecting_client(struct sock *sk,
|| vmci_trans(vsk)->qpair
|| vmci_trans(vsk)->produce_size != 0
|| vmci_trans(vsk)->consume_size != 0
|| vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
|| vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
skerr = EPROTO;
err = -EINVAL;
......@@ -1389,7 +1362,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
struct vsock_sock *vsk;
struct vmci_handle handle;
struct vmci_qp *qpair;
u32 attach_sub_id;
u32 detach_sub_id;
bool is_local;
u32 flags;
......@@ -1399,7 +1371,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
vsk = vsock_sk(sk);
handle = VMCI_INVALID_HANDLE;
attach_sub_id = VMCI_INVALID_ID;
detach_sub_id = VMCI_INVALID_ID;
/* If we have gotten here then we should be past the point where old
......@@ -1444,23 +1415,15 @@ static int vmci_transport_recv_connecting_client_negotiate(
goto destroy;
}
/* Subscribe to attach and detach events first.
/* Subscribe to detach events first.
*
* XXX We attach once for each queue pair created for now so it is easy
* to find the socket (it's provided), but later we should only
* subscribe once and add a way to lookup sockets by queue pair handle.
*/
err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
vmci_transport_peer_attach_cb,
sk, &attach_sub_id);
if (err < VMCI_SUCCESS) {
err = vmci_transport_error_to_vsock_error(err);
goto destroy;
}
err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
vmci_transport_peer_detach_cb,
sk, &detach_sub_id);
vmci_trans(vsk), &detach_sub_id);
if (err < VMCI_SUCCESS) {
err = vmci_transport_error_to_vsock_error(err);
goto destroy;
......@@ -1496,7 +1459,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
pkt->u.size;
vmci_trans(vsk)->attach_sub_id = attach_sub_id;
vmci_trans(vsk)->detach_sub_id = detach_sub_id;
vmci_trans(vsk)->notify_ops->process_negotiate(sk);
......@@ -1504,9 +1466,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
return 0;
destroy:
if (attach_sub_id != VMCI_INVALID_ID)
vmci_event_unsubscribe(attach_sub_id);
if (detach_sub_id != VMCI_INVALID_ID)
vmci_event_unsubscribe(detach_sub_id);
......@@ -1607,9 +1566,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
vmci_trans(vsk)->qpair = NULL;
vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
VMCI_INVALID_ID;
vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
vmci_trans(vsk)->notify_ops = NULL;
INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
vmci_trans(vsk)->sk = &vsk->sk;
spin_lock_init(&vmci_trans(vsk)->lock);
if (psk) {
vmci_trans(vsk)->queue_pair_size =
vmci_trans(psk)->queue_pair_size;
......@@ -1629,29 +1590,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
return 0;
}
static void vmci_transport_destruct(struct vsock_sock *vsk)
static void vmci_transport_free_resources(struct list_head *transport_list)
{
if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
}
while (!list_empty(transport_list)) {
struct vmci_transport *transport =
list_first_entry(transport_list, struct vmci_transport,
elem);
list_del(&transport->elem);
if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
}
if (transport->detach_sub_id != VMCI_INVALID_ID) {
vmci_event_unsubscribe(transport->detach_sub_id);
transport->detach_sub_id = VMCI_INVALID_ID;
}
if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
vmci_qpair_detach(&vmci_trans(vsk)->qpair);
vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
vmci_trans(vsk)->produce_size = 0;
vmci_trans(vsk)->consume_size = 0;
if (!vmci_handle_is_invalid(transport->qp_handle)) {
vmci_qpair_detach(&transport->qpair);
transport->qp_handle = VMCI_INVALID_HANDLE;
transport->produce_size = 0;
transport->consume_size = 0;
}
kfree(transport);
}
}
static void vmci_transport_cleanup(struct work_struct *work)
{
LIST_HEAD(pending);
spin_lock_bh(&vmci_transport_cleanup_lock);
list_replace_init(&vmci_transport_cleanup_list, &pending);
spin_unlock_bh(&vmci_transport_cleanup_lock);
vmci_transport_free_resources(&pending);
}
static void vmci_transport_destruct(struct vsock_sock *vsk)
{
/* Ensure that the detach callback doesn't use the sk/vsk
* we are about to destruct.
*/
spin_lock_bh(&vmci_trans(vsk)->lock);
vmci_trans(vsk)->sk = NULL;
spin_unlock_bh(&vmci_trans(vsk)->lock);
if (vmci_trans(vsk)->notify_ops)
vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
kfree(vsk->trans);
spin_lock_bh(&vmci_transport_cleanup_lock);
list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
spin_unlock_bh(&vmci_transport_cleanup_lock);
schedule_work(&vmci_transport_cleanup_work);
vsk->trans = NULL;
}
......@@ -2146,6 +2135,9 @@ module_init(vmci_transport_init);
static void __exit vmci_transport_exit(void)
{
cancel_work_sync(&vmci_transport_cleanup_work);
vmci_transport_free_resources(&vmci_transport_cleanup_list);
if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
if (vmci_datagram_destroy_handle(
vmci_transport_stream_handle) != VMCI_SUCCESS)
......@@ -2164,6 +2156,7 @@ module_exit(vmci_transport_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
MODULE_VERSION("1.0.2.0-k");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("vmware_vsock");
MODULE_ALIAS_NETPROTO(PF_VSOCK);
......@@ -119,10 +119,12 @@ struct vmci_transport {
u64 queue_pair_size;
u64 queue_pair_min_size;
u64 queue_pair_max_size;
u32 attach_sub_id;
u32 detach_sub_id;
union vmci_transport_notify notify;
struct vmci_transport_notify_ops *notify_ops;
struct list_head elem;
struct sock *sk;
spinlock_t lock; /* protects sk. */
};
int vmci_transport_register(void);
......
......@@ -1928,8 +1928,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
if (!lt && !rp && !re)
if (!lt && !rp && !re && !et && !rt)
return err;
/* pedantic mode - thou shalt sayeth replaceth */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册