提交 6d5db6c3 编写于 作者: D David S. Miller

Merge branch 'nfp-abm-track-all-Qdiscs'

Jakub Kicinski says:

====================
nfp: abm: track all Qdiscs

Our Qdisc offload so far has been very simplistic.  We held
and array of marking thresholds and statistics sized to the
number of PF queues.  This was sufficient since the only
configuration we supported was single layer of RED Qdiscs
(on top of MQ or not, but MQ isn't really about queuing).

As we move to add more Qdiscs it's time to actually try to
track the full Qdisc hierarchy.  This allows us to make sure
our offloaded configuration reflects the SW path better.
We add graft notifications to MQ and RED (PRIO already sends
them) to allow drivers offloading those to learn how Qdiscs
are linked.  MQ graft gives us the obvious advantage of being
able to track when Qdiscs are shared or moved.  It seems
unlikely HW would offload RED's child Qdiscs but since the
behaviour would change based on linked child we should
stop offloading REDs with modified child.  RED will also
handle the child differently during reconfig when limit
parameter is set - so we have to inform the drivers about
the limit, and have them reset the child state when
appropriate.

The NFP driver will now allocate a structure to track each
Qdisc and link it to its children.  We will also maintain
a shadow copy of threshold settings - to save device writes
and make it easier to apply defaults when config is
re-evaluated.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -50,56 +50,37 @@ nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
return 0;
}
static int
nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
unsigned int stride, unsigned int offset, bool is_u64,
u64 *res)
{
u64 val, sum = 0;
unsigned int i;
int err;
for (i = 0; i < alink->vnic->max_rx_rings; i++) {
err = nfp_abm_ctrl_stat(alink, sym, stride, offset, i,
is_u64, &val);
if (err)
return err;
sum += val;
}
*res = sum;
return 0;
}
int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val)
{
struct nfp_cpp *cpp = alink->abm->app->cpp;
struct nfp_cpp *cpp = abm->app->cpp;
u64 sym_offset;
int err;
sym_offset = (alink->queue_base + i) * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
err = __nfp_rtsym_writel(cpp, alink->abm->q_lvls, 4, 0,
sym_offset, val);
__clear_bit(id, abm->threshold_undef);
if (abm->thresholds[id] == val)
return 0;
sym_offset = id * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
err = __nfp_rtsym_writel(cpp, abm->q_lvls, 4, 0, sym_offset, val);
if (err) {
nfp_err(cpp, "RED offload setting level failed on vNIC %d queue %d\n",
alink->id, i);
nfp_err(cpp,
"RED offload setting level failed on subqueue %d\n",
id);
return err;
}
abm->thresholds[id] = val;
return 0;
}
int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val)
int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int queue,
u32 val)
{
int i, err;
unsigned int threshold;
for (i = 0; i < alink->vnic->max_rx_rings; i++) {
err = nfp_abm_ctrl_set_q_lvl(alink, i, val);
if (err)
return err;
}
threshold = alink->queue_base + queue;
return 0;
return __nfp_abm_ctrl_set_q_lvl(alink->abm, threshold, val);
}
u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i)
......@@ -153,42 +134,6 @@ int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
i, true, &stats->overlimits);
}
int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
struct nfp_alink_stats *stats)
{
u64 pkts = 0, bytes = 0;
int i, err;
for (i = 0; i < alink->vnic->max_rx_rings; i++) {
pkts += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
bytes += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
}
stats->tx_pkts = pkts;
stats->tx_bytes = bytes;
err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
false, &stats->backlog_bytes);
if (err)
return err;
err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
false, &stats->backlog_pkts);
if (err)
return err;
err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
true, &stats->drops);
if (err)
return err;
return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
true, &stats->overlimits);
}
int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
struct nfp_alink_xstats *xstats)
{
......@@ -205,22 +150,6 @@ int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
i, true, &xstats->ecn_marked);
}
int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
struct nfp_alink_xstats *xstats)
{
int err;
err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
true, &xstats->pdrop);
if (err)
return err;
return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
true, &xstats->ecn_marked);
}
int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
{
return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
......
......@@ -2,10 +2,12 @@
/* Copyright (C) 2018 Netronome Systems, Inc. */
#include <linux/bitfield.h>
#include <linux/bitmap.h>
#include <linux/etherdevice.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include "../nfpcore/nfp.h"
......@@ -36,6 +38,8 @@ nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
return -EOPNOTSUPP;
switch (type) {
case TC_SETUP_ROOT_QDISC:
return nfp_abm_setup_root(netdev, repr->app_priv, type_data);
case TC_SETUP_QDISC_MQ:
return nfp_abm_setup_tc_mq(netdev, repr->app_priv, type_data);
case TC_SETUP_QDISC_RED:
......@@ -307,31 +311,23 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
alink->abm = abm;
alink->vnic = nn;
alink->id = id;
alink->parent = TC_H_ROOT;
alink->total_queues = alink->vnic->max_rx_rings;
alink->qdiscs = kvcalloc(alink->total_queues, sizeof(*alink->qdiscs),
GFP_KERNEL);
if (!alink->qdiscs) {
err = -ENOMEM;
goto err_free_alink;
}
/* This is a multi-host app, make sure MAC/PHY is up, but don't
* make the MAC/PHY state follow the state of any of the ports.
*/
err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
if (err < 0)
goto err_free_qdiscs;
goto err_free_alink;
netif_keep_dst(nn->dp.netdev);
nfp_abm_vnic_set_mac(app->pf, abm, nn, id);
nfp_abm_ctrl_read_params(alink);
INIT_RADIX_TREE(&alink->qdiscs, GFP_KERNEL);
return 0;
err_free_qdiscs:
kvfree(alink->qdiscs);
err_free_alink:
kfree(alink);
return err;
......@@ -342,7 +338,7 @@ static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
struct nfp_abm_link *alink = nn->app_priv;
nfp_abm_kill_reprs(alink->abm, alink);
kvfree(alink->qdiscs);
WARN(!radix_tree_empty(&alink->qdiscs), "left over qdiscs\n");
kfree(alink);
}
......@@ -398,6 +394,7 @@ static int nfp_abm_init(struct nfp_app *app)
struct nfp_pf *pf = app->pf;
struct nfp_reprs *reprs;
struct nfp_abm *abm;
unsigned int i;
int err;
if (!pf->eth_tbl) {
......@@ -424,15 +421,28 @@ static int nfp_abm_init(struct nfp_app *app)
if (err)
goto err_free_abm;
err = -ENOMEM;
abm->num_thresholds = NFP_NET_MAX_RX_RINGS;
abm->threshold_undef = bitmap_zalloc(abm->num_thresholds, GFP_KERNEL);
if (!abm->threshold_undef)
goto err_free_abm;
abm->thresholds = kvcalloc(abm->num_thresholds,
sizeof(*abm->thresholds), GFP_KERNEL);
if (!abm->thresholds)
goto err_free_thresh_umap;
for (i = 0; i < NFP_NET_MAX_RX_RINGS; i++)
__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
/* We start in legacy mode, make sure advanced queuing is disabled */
err = nfp_abm_ctrl_qm_disable(abm);
if (err)
goto err_free_abm;
goto err_free_thresh;
err = -ENOMEM;
reprs = nfp_reprs_alloc(pf->max_data_vnics);
if (!reprs)
goto err_free_abm;
goto err_free_thresh;
RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs);
reprs = nfp_reprs_alloc(pf->max_data_vnics);
......@@ -444,6 +454,10 @@ static int nfp_abm_init(struct nfp_app *app)
err_free_phys:
nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
err_free_thresh:
kvfree(abm->thresholds);
err_free_thresh_umap:
bitmap_free(abm->threshold_undef);
err_free_abm:
kfree(abm);
app->priv = NULL;
......@@ -457,6 +471,8 @@ static void nfp_abm_clean(struct nfp_app *app)
nfp_abm_eswitch_clean_up(abm);
nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
bitmap_free(abm->threshold_undef);
kvfree(abm->thresholds);
kfree(abm);
app->priv = NULL;
}
......
......@@ -5,9 +5,15 @@
#define __NFP_ABM_H__ 1
#include <linux/bits.h>
#include <linux/radix-tree.h>
#include <net/devlink.h>
#include <net/pkt_cls.h>
/* Dump of 64 PRIOs and 256 REDs seems to take 850us on Xeon v4 @ 2.20GHz;
* 2.5ms / 400Hz seems more than sufficient for stats resolution.
*/
#define NFP_ABM_STATS_REFRESH_IVAL (2500 * 1000) /* ns */
#define NFP_ABM_LVL_INFINITY S32_MAX
struct nfp_app;
......@@ -20,6 +26,11 @@ struct nfp_net;
* struct nfp_abm - ABM NIC app structure
* @app: back pointer to nfp_app
* @pf_id: ID of our PF link
*
* @thresholds: current threshold configuration
* @threshold_undef: bitmap of thresholds which have not been set
* @num_thresholds: number of @thresholds and bits in @threshold_undef
*
* @eswitch_mode: devlink eswitch mode, advanced functions only visible
* in switchdev mode
* @q_lvls: queue level control area
......@@ -28,6 +39,11 @@ struct nfp_net;
struct nfp_abm {
struct nfp_app *app;
unsigned int pf_id;
u32 *thresholds;
unsigned long *threshold_undef;
size_t num_thresholds;
enum devlink_eswitch_mode eswitch_mode;
const struct nfp_rtsym *q_lvls;
const struct nfp_rtsym *qm_stats;
......@@ -61,16 +77,67 @@ struct nfp_alink_xstats {
u64 pdrop;
};
enum nfp_qdisc_type {
NFP_QDISC_NONE = 0,
NFP_QDISC_MQ,
NFP_QDISC_RED,
};
#define NFP_QDISC_UNTRACKED ((struct nfp_qdisc *)1UL)
/**
* struct nfp_red_qdisc - representation of single RED Qdisc
* @handle: handle of currently offloaded RED Qdisc
* @stats: statistics from last refresh
* @xstats: base of extended statistics
* struct nfp_qdisc - tracked TC Qdisc
* @netdev: netdev on which Qdisc was created
* @type: Qdisc type
* @handle: handle of this Qdisc
* @parent_handle: handle of the parent (unreliable if Qdisc was grafted)
* @use_cnt: number of attachment points in the hierarchy
* @num_children: current size of the @children array
* @children: pointers to children
*
* @params_ok: parameters of this Qdisc are OK for offload
* @offload_mark: offload refresh state - selected for offload
* @offloaded: Qdisc is currently offloaded to the HW
*
* @mq: MQ Qdisc specific parameters and state
* @mq.stats: current stats of the MQ Qdisc
* @mq.prev_stats: previously reported @mq.stats
*
* @red: RED Qdisc specific parameters and state
* @red.threshold: ECN marking threshold
* @red.stats: current stats of the RED Qdisc
* @red.prev_stats: previously reported @red.stats
* @red.xstats: extended stats for RED - current
* @red.prev_xstats: extended stats for RED - previously reported
*/
struct nfp_red_qdisc {
struct nfp_qdisc {
struct net_device *netdev;
enum nfp_qdisc_type type;
u32 handle;
struct nfp_alink_stats stats;
struct nfp_alink_xstats xstats;
u32 parent_handle;
unsigned int use_cnt;
unsigned int num_children;
struct nfp_qdisc **children;
bool params_ok;
bool offload_mark;
bool offloaded;
union {
/* NFP_QDISC_MQ */
struct {
struct nfp_alink_stats stats;
struct nfp_alink_stats prev_stats;
} mq;
/* TC_SETUP_QDISC_RED */
struct {
u32 threshold;
struct nfp_alink_stats stats;
struct nfp_alink_stats prev_stats;
struct nfp_alink_xstats xstats;
struct nfp_alink_xstats prev_xstats;
} red;
};
};
/**
......@@ -80,9 +147,11 @@ struct nfp_red_qdisc {
* @id: id of the data vNIC
* @queue_base: id of base to host queue within PCIe (not QC idx)
* @total_queues: number of PF queues
* @parent: handle of expected parent, i.e. handle of MQ, or TC_H_ROOT
* @num_qdiscs: number of currently used qdiscs
* @qdiscs: array of qdiscs
*
* @last_stats_update: ktime of last stats update
*
* @root_qdisc: pointer to the current root of the Qdisc hierarchy
* @qdiscs: all qdiscs recorded by major part of the handle
*/
struct nfp_abm_link {
struct nfp_abm *abm;
......@@ -90,11 +159,16 @@ struct nfp_abm_link {
unsigned int id;
unsigned int queue_base;
unsigned int total_queues;
u32 parent;
unsigned int num_qdiscs;
struct nfp_red_qdisc *qdiscs;
u64 last_stats_update;
struct nfp_qdisc *root_qdisc;
struct radix_tree_root qdiscs;
};
void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink);
int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
struct tc_root_qopt_offload *opt);
int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
struct tc_red_qopt_offload *opt);
int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
......@@ -102,15 +176,11 @@ int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val);
int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i,
int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val);
int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int queue,
u32 val);
int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
struct nfp_alink_stats *stats);
int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
struct nfp_alink_stats *stats);
int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
struct nfp_alink_xstats *xstats);
int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
struct nfp_alink_xstats *xstats);
u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
......
......@@ -845,6 +845,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_PRIO,
TC_SETUP_QDISC_MQ,
TC_SETUP_QDISC_ETF,
TC_SETUP_ROOT_QDISC,
};
/* These structures hold the attributes of bpf state that are being passed
......
......@@ -821,12 +821,21 @@ enum tc_mq_command {
TC_MQ_CREATE,
TC_MQ_DESTROY,
TC_MQ_STATS,
TC_MQ_GRAFT,
};
struct tc_mq_opt_offload_graft_params {
unsigned long queue;
u32 child_handle;
};
struct tc_mq_qopt_offload {
enum tc_mq_command command;
u32 handle;
struct tc_qopt_offload_stats stats;
union {
struct tc_qopt_offload_stats stats;
struct tc_mq_opt_offload_graft_params graft_params;
};
};
enum tc_red_command {
......@@ -834,12 +843,14 @@ enum tc_red_command {
TC_RED_DESTROY,
TC_RED_STATS,
TC_RED_XSTATS,
TC_RED_GRAFT,
};
struct tc_red_qopt_offload_params {
u32 min;
u32 max;
u32 probability;
u32 limit;
bool is_ecn;
bool is_harddrop;
struct gnet_stats_queue *qstats;
......@@ -853,6 +864,7 @@ struct tc_red_qopt_offload {
struct tc_red_qopt_offload_params set;
struct tc_qopt_offload_stats stats;
struct red_stats *xstats;
u32 child_handle;
};
};
......@@ -889,4 +901,14 @@ struct tc_prio_qopt_offload {
};
};
enum tc_root_command {
TC_ROOT_GRAFT,
};
struct tc_root_qopt_offload {
enum tc_root_command command;
u32 handle;
bool ingress;
};
#endif
......@@ -860,6 +860,21 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
}
EXPORT_SYMBOL(qdisc_offload_graft_helper);
static void qdisc_offload_graft_root(struct net_device *dev,
struct Qdisc *new, struct Qdisc *old,
struct netlink_ext_ack *extack)
{
struct tc_root_qopt_offload graft_offload = {
.command = TC_ROOT_GRAFT,
.handle = new ? new->handle : 0,
.ingress = (new && new->flags & TCQ_F_INGRESS) ||
(old && old->flags & TCQ_F_INGRESS),
};
qdisc_offload_graft_helper(dev, NULL, new, old,
TC_SETUP_ROOT_QDISC, &graft_offload, extack);
}
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
u32 portid, u32 seq, u16 flags, int event)
{
......@@ -1026,6 +1041,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
qdisc_offload_graft_root(dev, new, old, extack);
if (new && new->ops->attach)
goto skip;
......
......@@ -193,6 +193,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
struct tc_mq_qopt_offload graft_offload;
struct net_device *dev = qdisc_dev(sch);
if (dev->flags & IFF_UP)
......@@ -203,6 +204,14 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
graft_offload.handle = sch->handle;
graft_offload.graft_params.queue = cl - 1;
graft_offload.graft_params.child_handle = new ? new->handle : 0;
graft_offload.command = TC_MQ_GRAFT;
qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
TC_SETUP_QDISC_MQ, &graft_offload, extack);
return 0;
}
......
......@@ -166,6 +166,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
opt.set.min = q->parms.qth_min >> q->parms.Wlog;
opt.set.max = q->parms.qth_max >> q->parms.Wlog;
opt.set.probability = q->parms.max_P;
opt.set.limit = q->limit;
opt.set.is_ecn = red_use_ecn(q);
opt.set.is_harddrop = red_use_harddrop(q);
opt.set.qstats = &sch->qstats;
......@@ -367,6 +368,21 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl,
return 0;
}
static void red_graft_offload(struct Qdisc *sch,
struct Qdisc *new, struct Qdisc *old,
struct netlink_ext_ack *extack)
{
struct tc_red_qopt_offload graft_offload = {
.handle = sch->handle,
.parent = sch->parent,
.child_handle = new->handle,
.command = TC_RED_GRAFT,
};
qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
TC_SETUP_QDISC_RED, &graft_offload, extack);
}
static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
......@@ -376,6 +392,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->qdisc);
red_graft_offload(sch, new, *old, extack);
return 0;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册