提交 e4655e4a 编写于 作者: D David S. Miller

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
40GbE Intel Wired LAN Driver Updates 2017-10-13

This series contains updates to mqprio and i40e.

Amritha introduces a new hardware offload mode in tc/mqprio where the TCs,
the queue configurations and bandwidth rate limits are offloaded to the
hardware. The existing mqprio framework is extended to configure the queue
counts and layout and also added support for rate limiting. This is
achieved through new netlink attributes for the 'mode' option which takes
values such as 'dcb' (default) and 'channel' and a 'shaper' option for
QoS attributes such as bandwidth rate limits in hw mode 1.  Legacy devices
can fall back to the existing setup supporting hw mode 1 without these
additional options where only the TCs are offloaded and then the 'mode'
and 'shaper' options defaults to DCB support.  The i40e driver enables the
new mqprio hardware offload mechanism factoring the TCs, queue
configuration and bandwidth rates by creating HW channel VSIs.
In this new mode, the priority to traffic class mapping and the user
specified queue ranges are used to configure the traffic class when the
'mode' option is set to 'channel'. This is achieved by creating HW
channels(VSI). A new channel is created for each of the traffic class
configuration offloaded via mqprio framework except for the first TC (TC0)
which is for the main VSI. TC0 for the main VSI is also reconfigured as
per user provided queue parameters. Finally, bandwidth rate limits are set
on these traffic classes through the shaper attribute by sending these
rates in addition to the number of TCs and the queue configurations.

Colin Ian King makes an array of constant values "constant".

Alan fixes and issue where on some firmware versions, we were failing to
actually fill out the phy_types which caused ethtool to not report any
link types.  Also hardened against a potentially malicious VF by not
letting the VF to reset itself after requesting to change the number of
queues (via ethtool), let the PF reset the VF to institute the requested
changes.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <linux/clocksource.h> #include <linux/clocksource.h>
#include <linux/net_tstamp.h> #include <linux/net_tstamp.h>
#include <linux/ptp_clock_kernel.h> #include <linux/ptp_clock_kernel.h>
#include <net/pkt_cls.h>
#include "i40e_type.h" #include "i40e_type.h"
#include "i40e_prototype.h" #include "i40e_prototype.h"
#include "i40e_client.h" #include "i40e_client.h"
...@@ -87,6 +88,7 @@ ...@@ -87,6 +88,7 @@
#define I40E_AQ_LEN 256 #define I40E_AQ_LEN 256
#define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ #define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */
#define I40E_MAX_USER_PRIORITY 8 #define I40E_MAX_USER_PRIORITY 8
#define I40E_MAX_QUEUES_PER_CH 64
#define I40E_DEFAULT_TRAFFIC_CLASS BIT(0) #define I40E_DEFAULT_TRAFFIC_CLASS BIT(0)
#define I40E_DEFAULT_MSG_ENABLE 4 #define I40E_DEFAULT_MSG_ENABLE 4
#define I40E_QUEUE_WAIT_RETRY_LIMIT 10 #define I40E_QUEUE_WAIT_RETRY_LIMIT 10
...@@ -126,6 +128,10 @@ ...@@ -126,6 +128,10 @@
/* default to trying for four seconds */ /* default to trying for four seconds */
#define I40E_TRY_LINK_TIMEOUT (4 * HZ) #define I40E_TRY_LINK_TIMEOUT (4 * HZ)
/* BW rate limiting */
#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */
#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */
/* driver state flags */ /* driver state flags */
enum i40e_state_t { enum i40e_state_t {
__I40E_TESTING, __I40E_TESTING,
...@@ -157,6 +163,8 @@ enum i40e_state_t { ...@@ -157,6 +163,8 @@ enum i40e_state_t {
__I40E_STATE_SIZE__, __I40E_STATE_SIZE__,
}; };
#define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED)
/* VSI state flags */ /* VSI state flags */
enum i40e_vsi_state_t { enum i40e_vsi_state_t {
__I40E_VSI_DOWN, __I40E_VSI_DOWN,
...@@ -338,6 +346,25 @@ struct i40e_flex_pit { ...@@ -338,6 +346,25 @@ struct i40e_flex_pit {
u8 pit_index; u8 pit_index;
}; };
struct i40e_channel {
struct list_head list;
bool initialized;
u8 type;
u16 vsi_number; /* Assigned VSI number from AQ 'Add VSI' response */
u16 stat_counter_idx;
u16 base_queue;
u16 num_queue_pairs; /* Requested by user */
u16 seid;
u8 enabled_tc;
struct i40e_aqc_vsi_properties_data info;
u64 max_tx_rate;
/* track this channel belongs to which VSI */
struct i40e_vsi *parent_vsi;
};
/* struct that defines the Ethernet device */ /* struct that defines the Ethernet device */
struct i40e_pf { struct i40e_pf {
struct pci_dev *pdev; struct pci_dev *pdev;
...@@ -454,6 +481,7 @@ struct i40e_pf { ...@@ -454,6 +481,7 @@ struct i40e_pf {
#define I40E_FLAG_CLIENT_RESET BIT(26) #define I40E_FLAG_CLIENT_RESET BIT(26)
#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27) #define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27)
#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28) #define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28)
#define I40E_FLAG_TC_MQPRIO BIT(29)
struct i40e_client_instance *cinst; struct i40e_client_instance *cinst;
bool stat_offsets_loaded; bool stat_offsets_loaded;
...@@ -534,6 +562,8 @@ struct i40e_pf { ...@@ -534,6 +562,8 @@ struct i40e_pf {
u32 ioremap_len; u32 ioremap_len;
u32 fd_inv; u32 fd_inv;
u16 phy_led_val; u16 phy_led_val;
u16 override_q_count;
}; };
/** /**
...@@ -677,6 +707,7 @@ struct i40e_vsi { ...@@ -677,6 +707,7 @@ struct i40e_vsi {
enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */
s16 vf_id; /* Virtual function ID for SRIOV VSIs */ s16 vf_id; /* Virtual function ID for SRIOV VSIs */
struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
struct i40e_tc_configuration tc_config; struct i40e_tc_configuration tc_config;
struct i40e_aqc_vsi_properties_data info; struct i40e_aqc_vsi_properties_data info;
...@@ -698,6 +729,16 @@ struct i40e_vsi { ...@@ -698,6 +729,16 @@ struct i40e_vsi {
bool current_isup; /* Sync 'link up' logging */ bool current_isup; /* Sync 'link up' logging */
enum i40e_aq_link_speed current_speed; /* Sync link speed logging */ enum i40e_aq_link_speed current_speed; /* Sync link speed logging */
/* channel specific fields */
u16 cnt_q_avail; /* num of queues available for channel usage */
u16 orig_rss_size;
u16 current_rss_size;
bool reconfig_rss;
u16 next_base_queue; /* next queue to be used for channel setup */
struct list_head ch_list;
void *priv; /* client driver data reference. */ void *priv; /* client driver data reference. */
/* VSI specific handlers */ /* VSI specific handlers */
...@@ -1002,4 +1043,7 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) ...@@ -1002,4 +1043,7 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{ {
return !!vsi->xdp_prog; return !!vsi->xdp_prog;
} }
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
#endif /* _I40E_H_ */ #endif /* _I40E_H_ */
...@@ -1611,8 +1611,13 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw, ...@@ -1611,8 +1611,13 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
if (report_init) { if (report_init) {
if (hw->mac.type == I40E_MAC_XL710 && if (hw->mac.type == I40E_MAC_XL710 &&
hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
status = i40e_aq_get_link_info(hw, true, NULL, NULL); status = i40e_aq_get_link_info(hw, true, NULL, NULL);
} else {
hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
hw->phy.phy_types |=
((u64)abilities->phy_type_ext << 32);
}
} }
return status; return status;
......
...@@ -798,8 +798,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, ...@@ -798,8 +798,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
*/ */
if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
i40e_do_reset_safe(pf, i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
BIT_ULL(__I40E_PF_RESET_REQUESTED));
} }
vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0); vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
......
...@@ -36,7 +36,9 @@ ...@@ -36,7 +36,9 @@
static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw, static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
u32 reg, u32 mask) u32 reg, u32 mask)
{ {
const u32 patterns[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; static const u32 patterns[] = {
0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
};
u32 pat, val, orig_val; u32 pat, val, orig_val;
int i; int i;
......
...@@ -2652,7 +2652,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, ...@@ -2652,7 +2652,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
switch (cmd->cmd) { switch (cmd->cmd) {
case ETHTOOL_GRXRINGS: case ETHTOOL_GRXRINGS:
cmd->data = vsi->num_queue_pairs; cmd->data = vsi->rss_size;
ret = 0; ret = 0;
break; break;
case ETHTOOL_GRXFH: case ETHTOOL_GRXFH:
...@@ -3897,6 +3897,12 @@ static int i40e_set_channels(struct net_device *dev, ...@@ -3897,6 +3897,12 @@ static int i40e_set_channels(struct net_device *dev,
if (vsi->type != I40E_VSI_MAIN) if (vsi->type != I40E_VSI_MAIN)
return -EINVAL; return -EINVAL;
/* We do not support setting channels via ethtool when TCs are
* configured through mqprio
*/
if (pf->flags & I40E_FLAG_TC_MQPRIO)
return -EINVAL;
/* verify they are not requesting separate vectors */ /* verify they are not requesting separate vectors */
if (!count || ch->rx_count || ch->tx_count) if (!count || ch->rx_count || ch->tx_count)
return -EINVAL; return -EINVAL;
......
...@@ -426,6 +426,8 @@ struct i40e_ring { ...@@ -426,6 +426,8 @@ struct i40e_ring {
* i40e_clean_rx_ring_irq() is called * i40e_clean_rx_ring_irq() is called
* for this ring. * for this ring.
*/ */
struct i40e_channel *ch;
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring) static inline bool ring_uses_build_skb(struct i40e_ring *ring)
......
...@@ -1425,8 +1425,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) ...@@ -1425,8 +1425,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (num_vfs) { if (num_vfs) {
if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
i40e_do_reset_safe(pf, i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
BIT_ULL(__I40E_PF_RESET_REQUESTED));
} }
return i40e_pci_sriov_enable(pdev, num_vfs); return i40e_pci_sriov_enable(pdev, num_vfs);
} }
...@@ -1434,7 +1433,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) ...@@ -1434,7 +1433,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!pci_vfs_assigned(pf->pdev)) { if (!pci_vfs_assigned(pf->pdev)) {
i40e_free_vfs(pf); i40e_free_vfs(pf);
pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
} else { } else {
dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
return -EINVAL; return -EINVAL;
...@@ -2046,8 +2045,9 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) ...@@ -2046,8 +2045,9 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
* @msglen: msg length * @msglen: msg length
* *
* VFs get a default number of queues but can use this message to request a * VFs get a default number of queues but can use this message to request a
* different number. Will respond with either the number requested or the * different number. If the request is successful, PF will reset the VF and
* maximum we can support. * return 0. If unsuccessful, PF will send message informing VF of number of
* available queues and return result of sending VF a message.
**/ **/
static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen)
{ {
...@@ -2078,7 +2078,11 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) ...@@ -2078,7 +2078,11 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen)
pf->queues_left); pf->queues_left);
vfres->num_queue_pairs = pf->queues_left + cur_pairs; vfres->num_queue_pairs = pf->queues_left + cur_pairs;
} else { } else {
/* successful request */
vf->num_req_queues = req_pairs; vf->num_req_queues = req_pairs;
i40e_vc_notify_vf_reset(vf);
i40e_reset_vf(vf, false);
return 0;
} }
return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0,
...@@ -3118,8 +3122,6 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, ...@@ -3118,8 +3122,6 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
return ret; return ret;
} }
#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */
#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* device can accumulate 4 credits max */
/** /**
* i40e_ndo_set_vf_bw * i40e_ndo_set_vf_bw
* @netdev: network interface device structure * @netdev: network interface device structure
...@@ -3135,7 +3137,6 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, ...@@ -3135,7 +3137,6 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
struct i40e_pf *pf = np->vsi->back; struct i40e_pf *pf = np->vsi->back;
struct i40e_vsi *vsi; struct i40e_vsi *vsi;
struct i40e_vf *vf; struct i40e_vf *vf;
int speed = 0;
int ret = 0; int ret = 0;
/* validate the request */ /* validate the request */
...@@ -3160,48 +3161,10 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, ...@@ -3160,48 +3161,10 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
goto error; goto error;
} }
switch (pf->hw.phy.link_info.link_speed) { ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
case I40E_LINK_SPEED_40GB: if (ret)
speed = 40000;
break;
case I40E_LINK_SPEED_25GB:
speed = 25000;
break;
case I40E_LINK_SPEED_20GB:
speed = 20000;
break;
case I40E_LINK_SPEED_10GB:
speed = 10000;
break;
case I40E_LINK_SPEED_1GB:
speed = 1000;
break;
default:
break;
}
if (max_tx_rate > speed) {
dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n",
max_tx_rate, vf->vf_id);
ret = -EINVAL;
goto error; goto error;
}
if ((max_tx_rate < 50) && (max_tx_rate > 0)) {
dev_warn(&pf->pdev->dev, "Setting max Tx rate to minimum usable value of 50Mbps.\n");
max_tx_rate = 50;
}
/* Tx rate credits are in values of 50Mbps, 0 is disabled*/
ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
max_tx_rate / I40E_BW_CREDIT_DIVISOR,
I40E_MAX_BW_INACTIVE_ACCUM, NULL);
if (ret) {
dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n",
ret);
ret = -EIO;
goto error;
}
vf->tx_rate = max_tx_rate; vf->tx_rate = max_tx_rate;
error: error:
return ret; return ret;
......
...@@ -407,6 +407,7 @@ int i40evf_request_queues(struct i40evf_adapter *adapter, int num) ...@@ -407,6 +407,7 @@ int i40evf_request_queues(struct i40evf_adapter *adapter, int num)
vfres.num_queue_pairs = num; vfres.num_queue_pairs = num;
adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES;
adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES,
(u8 *)&vfres, sizeof(vfres)); (u8 *)&vfres, sizeof(vfres));
} }
...@@ -1098,15 +1099,13 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, ...@@ -1098,15 +1099,13 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
case VIRTCHNL_OP_REQUEST_QUEUES: { case VIRTCHNL_OP_REQUEST_QUEUES: {
struct virtchnl_vf_res_request *vfres = struct virtchnl_vf_res_request *vfres =
(struct virtchnl_vf_res_request *)msg; (struct virtchnl_vf_res_request *)msg;
if (vfres->num_queue_pairs == adapter->num_req_queues) { if (vfres->num_queue_pairs != adapter->num_req_queues) {
adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
i40evf_schedule_reset(adapter);
} else {
dev_info(&adapter->pdev->dev, dev_info(&adapter->pdev->dev,
"Requested %d queues, PF can support %d\n", "Requested %d queues, PF can support %d\n",
adapter->num_req_queues, adapter->num_req_queues,
vfres->num_queue_pairs); vfres->num_queue_pairs);
adapter->num_req_queues = 0; adapter->num_req_queues = 0;
adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
} }
} }
break; break;
......
...@@ -333,8 +333,8 @@ struct virtchnl_vsi_queue_config_info { ...@@ -333,8 +333,8 @@ struct virtchnl_vsi_queue_config_info {
* additional queues must be negotiated. This is a best effort request as it * additional queues must be negotiated. This is a best effort request as it
* is possible the PF does not have enough queues left to support the request. * is possible the PF does not have enough queues left to support the request.
* If the PF cannot support the number requested it will respond with the * If the PF cannot support the number requested it will respond with the
* maximum number it is able to support; otherwise it will respond with the * maximum number it is able to support. If the request is successful, PF will
* number requested. * then reset the VF to institute required changes.
*/ */
/* VF resource request */ /* VF resource request */
......
...@@ -546,6 +546,15 @@ struct tc_cls_bpf_offload { ...@@ -546,6 +546,15 @@ struct tc_cls_bpf_offload {
u32 gen_flags; u32 gen_flags;
}; };
struct tc_mqprio_qopt_offload {
/* struct tc_mqprio_qopt must always be the first element */
struct tc_mqprio_qopt qopt;
u16 mode;
u16 shaper;
u32 flags;
u64 min_rate[TC_QOPT_MAX_QUEUE];
u64 max_rate[TC_QOPT_MAX_QUEUE];
};
/* This structure holds cookie structure that is passed from user /* This structure holds cookie structure that is passed from user
* to the kernel for actions and classifiers * to the kernel for actions and classifiers
......
...@@ -625,6 +625,22 @@ enum { ...@@ -625,6 +625,22 @@ enum {
#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) #define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
enum {
TC_MQPRIO_MODE_DCB,
TC_MQPRIO_MODE_CHANNEL,
__TC_MQPRIO_MODE_MAX
};
#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
enum {
TC_MQPRIO_SHAPER_DCB,
TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
__TC_MQPRIO_SHAPER_MAX
};
#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
struct tc_mqprio_qopt { struct tc_mqprio_qopt {
__u8 num_tc; __u8 num_tc;
__u8 prio_tc_map[TC_QOPT_BITMASK + 1]; __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
...@@ -633,6 +649,22 @@ struct tc_mqprio_qopt { ...@@ -633,6 +649,22 @@ struct tc_mqprio_qopt {
__u16 offset[TC_QOPT_MAX_QUEUE]; __u16 offset[TC_QOPT_MAX_QUEUE];
}; };
#define TC_MQPRIO_F_MODE 0x1
#define TC_MQPRIO_F_SHAPER 0x2
#define TC_MQPRIO_F_MIN_RATE 0x4
#define TC_MQPRIO_F_MAX_RATE 0x8
enum {
TCA_MQPRIO_UNSPEC,
TCA_MQPRIO_MODE,
TCA_MQPRIO_SHAPER,
TCA_MQPRIO_MIN_RATE64,
TCA_MQPRIO_MAX_RATE64,
__TCA_MQPRIO_MAX,
};
#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
/* SFB */ /* SFB */
enum { enum {
......
...@@ -18,10 +18,16 @@ ...@@ -18,10 +18,16 @@
#include <net/netlink.h> #include <net/netlink.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
#include <net/sch_generic.h> #include <net/sch_generic.h>
#include <net/pkt_cls.h>
struct mqprio_sched { struct mqprio_sched {
struct Qdisc **qdiscs; struct Qdisc **qdiscs;
u16 mode;
u16 shaper;
int hw_offload; int hw_offload;
u32 flags;
u64 min_rate[TC_QOPT_MAX_QUEUE];
u64 max_rate[TC_QOPT_MAX_QUEUE];
}; };
static void mqprio_destroy(struct Qdisc *sch) static void mqprio_destroy(struct Qdisc *sch)
...@@ -39,9 +45,17 @@ static void mqprio_destroy(struct Qdisc *sch) ...@@ -39,9 +45,17 @@ static void mqprio_destroy(struct Qdisc *sch)
} }
if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
struct tc_mqprio_qopt mqprio = {}; struct tc_mqprio_qopt_offload mqprio = { { 0 } };
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio); switch (priv->mode) {
case TC_MQPRIO_MODE_DCB:
case TC_MQPRIO_MODE_CHANNEL:
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
&mqprio);
break;
default:
return;
}
} else { } else {
netdev_set_num_tc(dev, 0); netdev_set_num_tc(dev, 0);
} }
...@@ -97,6 +111,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) ...@@ -97,6 +111,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return 0; return 0;
} }
static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
[TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
[TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
[TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
[TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
const struct nla_policy *policy, int len)
{
int nested_len = nla_len(nla) - NLA_ALIGN(len);
if (nested_len >= nla_attr_size(0))
return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
nested_len, policy, NULL);
memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
return 0;
}
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
{ {
struct net_device *dev = qdisc_dev(sch); struct net_device *dev = qdisc_dev(sch);
...@@ -105,6 +139,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -105,6 +139,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *qdisc; struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP; int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL; struct tc_mqprio_qopt *qopt = NULL;
struct nlattr *tb[TCA_MQPRIO_MAX + 1];
struct nlattr *attr;
int rem;
int len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
...@@ -122,6 +160,58 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -122,6 +160,58 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (mqprio_parse_opt(dev, qopt)) if (mqprio_parse_opt(dev, qopt))
return -EINVAL; return -EINVAL;
if (len > 0) {
err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
sizeof(*qopt));
if (err < 0)
return err;
if (!qopt->hw)
return -EINVAL;
if (tb[TCA_MQPRIO_MODE]) {
priv->flags |= TC_MQPRIO_F_MODE;
priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
}
if (tb[TCA_MQPRIO_SHAPER]) {
priv->flags |= TC_MQPRIO_F_SHAPER;
priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
}
if (tb[TCA_MQPRIO_MIN_RATE64]) {
if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
return -EINVAL;
i = 0;
nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
rem) {
if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
return -EINVAL;
if (i >= qopt->num_tc)
break;
priv->min_rate[i] = *(u64 *)nla_data(attr);
i++;
}
priv->flags |= TC_MQPRIO_F_MIN_RATE;
}
if (tb[TCA_MQPRIO_MAX_RATE64]) {
if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
return -EINVAL;
i = 0;
nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
rem) {
if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
return -EINVAL;
if (i >= qopt->num_tc)
break;
priv->max_rate[i] = *(u64 *)nla_data(attr);
i++;
}
priv->flags |= TC_MQPRIO_F_MAX_RATE;
}
}
/* pre-allocate qdisc, attachment can't fail */ /* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL); GFP_KERNEL);
...@@ -146,14 +236,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -146,14 +236,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping * supplied and verified mapping
*/ */
if (qopt->hw) { if (qopt->hw) {
struct tc_mqprio_qopt mqprio = *qopt; struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, switch (priv->mode) {
case TC_MQPRIO_MODE_DCB:
if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
return -EINVAL;
break;
case TC_MQPRIO_MODE_CHANNEL:
mqprio.flags = priv->flags;
if (priv->flags & TC_MQPRIO_F_MODE)
mqprio.mode = priv->mode;
if (priv->flags & TC_MQPRIO_F_SHAPER)
mqprio.shaper = priv->shaper;
if (priv->flags & TC_MQPRIO_F_MIN_RATE)
for (i = 0; i < mqprio.qopt.num_tc; i++)
mqprio.min_rate[i] = priv->min_rate[i];
if (priv->flags & TC_MQPRIO_F_MAX_RATE)
for (i = 0; i < mqprio.qopt.num_tc; i++)
mqprio.max_rate[i] = priv->max_rate[i];
break;
default:
return -EINVAL;
}
err = dev->netdev_ops->ndo_setup_tc(dev,
TC_SETUP_MQPRIO,
&mqprio); &mqprio);
if (err) if (err)
return err; return err;
priv->hw_offload = mqprio.hw; priv->hw_offload = mqprio.qopt.hw;
} else { } else {
netdev_set_num_tc(dev, qopt->num_tc); netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++) for (i = 0; i < qopt->num_tc; i++)
...@@ -223,11 +335,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, ...@@ -223,11 +335,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return 0; return 0;
} }
static int dump_rates(struct mqprio_sched *priv,
struct tc_mqprio_qopt *opt, struct sk_buff *skb)
{
struct nlattr *nest;
int i;
if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
if (!nest)
goto nla_put_failure;
for (i = 0; i < opt->num_tc; i++) {
if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
sizeof(priv->min_rate[i]),
&priv->min_rate[i]))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
}
if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
if (!nest)
goto nla_put_failure;
for (i = 0; i < opt->num_tc; i++) {
if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
sizeof(priv->max_rate[i]),
&priv->max_rate[i]))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
}
return 0;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{ {
struct net_device *dev = qdisc_dev(sch); struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch); struct mqprio_sched *priv = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb); struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 }; struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc; struct Qdisc *qdisc;
unsigned int i; unsigned int i;
...@@ -258,12 +410,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) ...@@ -258,12 +410,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.offset[i] = dev->tc_to_txq[i].offset; opt.offset[i] = dev->tc_to_txq[i].offset;
} }
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
goto nla_put_failure;
if ((priv->flags & TC_MQPRIO_F_MODE) &&
nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
goto nla_put_failure;
if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
goto nla_put_failure;
if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
priv->flags & TC_MQPRIO_F_MAX_RATE) &&
(dump_rates(priv, &opt, skb) != 0))
goto nla_put_failure; goto nla_put_failure;
return skb->len; return nla_nest_end(skb, nla);
nla_put_failure: nla_put_failure:
nlmsg_trim(skb, b); nlmsg_trim(skb, nla);
return -1; return -1;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册