提交 72991b56 编写于 作者: D David S. Miller

Merge branch 'cls-hw-offload-rtnl'

Vlad Buslov says:

====================
Refactor cls hardware offload API to support rtnl-independent drivers

Currently, all cls API hardware offloads driver callbacks require caller
to hold rtnl lock when calling them. This patch set introduces new API
that allows drivers to register callbacks that are not dependent on rtnl
lock and unlocked classifiers to offload filters without obtaining rtnl
lock first, which is intended to allow offloading tc rules in parallel.

Recently, new rtnl registration flag RTNL_FLAG_DOIT_UNLOCKED was added.
TC rule update handlers (RTM_NEWTFILTER, RTM_DELTFILTER, etc.) are
already registered with this flag and only take rtnl lock when qdisc or
classifier requires it. Classifiers can indicate that their ops
callbacks don't require caller to hold rtnl lock by setting the
TCF_PROTO_OPS_DOIT_UNLOCKED flag. Unlocked implementation of flower
classifier is now upstreamed. However, this implementation still obtains
rtnl lock before calling hardware offloads API.

Implement following cls API changes:

- Introduce new "unlocked_driver_cb" flag to struct flow_block_offload
  to allow registering and unregistering block hardware offload
  callbacks that do not require caller to hold rtnl lock. Drivers that
  doesn't require users of its tc offload callbacks to hold rtnl lock
  sets the flag to true on block bind/unbind. Internally tcf_block is
  extended with additional lockeddevcnt counter that is used to count
  number of devices that require rtnl lock that block is bound to. When
  this counter is zero, tc_setup_cb_*() functions execute callbacks
  without obtaining rtnl lock.

- Extend cls API single hardware rule update tc_setup_cb_call() function
  with tc_setup_cb_add(), tc_setup_cb_replace(), tc_setup_cb_destroy()
  and tc_setup_cb_reoffload() functions. These new APIs are needed to
  move management of block offload counter, filter in hardware counter
  and flag from classifier implementations to cls API, which is now
  responsible for managing them in concurrency-safe manner. Access to
  cb_list from callback execution code is synchronized by obtaining new
  'cb_lock' rw_semaphore in read mode, which allows executing callbacks
  in parallel, but excludes any modifications of data from
  register/unregister code. tcf_block offloads counter type is changed
  to atomic integer to allow updating the counter concurrently.

- Extend classifier ops with new ops->hw_add() and ops->hw_del()
  callbacks which are used to notify unlocked classifiers when filter is
  successfully added or deleted to hardware without releasing cb_lock.
  This is necessary to update classifier state atomically with callback
  list traversal and updating of all relevant counters and allows
  unlocked classifiers to synchronize with concurrent reoffload without
  requiring any changes to driver callback API implementations.

New tc flow_action infrastructure is also modified to allow its user to
execute without rtnl lock protection. Function tc_setup_flow_action() is
modified to conditionally obtain rtnl lock before accessing action
state. Action data that is accessed by reference is either copied or
reference counted to prevent concurrent action overwrite from
deallocating it. New function tc_cleanup_flow_action() is introduced to
cleanup/release all such data obtained by tc_setup_flow_action().

Flower classifier (only unlocked classifier at the moment) is modified
to use new cls hardware offloads API and no longer obtains rtnl lock
before calling it.
====================
Acked-by: NJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -3470,10 +3470,12 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct flow_block_offload *f = type_data;
switch (type) {
#ifdef CONFIG_MLX5_ESWITCH
case TC_SETUP_BLOCK:
f->unlocked_driver_cb = true;
return flow_block_cb_setup_simple(type_data,
&mlx5e_block_cb_list,
mlx5e_setup_tc_block_cb,
......
......@@ -763,6 +763,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
return -EOPNOTSUPP;
f->unlocked_driver_cb = true;
f->driver_block_list = &mlx5e_block_cb_list;
switch (f->command) {
......@@ -1245,9 +1246,11 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct flow_block_offload *f = type_data;
switch (type) {
case TC_SETUP_BLOCK:
f->unlocked_driver_cb = true;
return flow_block_cb_setup_simple(type_data,
&mlx5e_rep_block_cb_list,
mlx5e_rep_setup_tc_cb,
......
......@@ -284,6 +284,7 @@ struct flow_block_offload {
enum flow_block_command command;
enum flow_block_binder_type binder_type;
bool block_shared;
bool unlocked_driver_cb;
struct net *net;
struct flow_block *block;
struct list_head cb_list;
......
......@@ -504,9 +504,26 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
}
int tc_setup_flow_action(struct flow_action *flow_action,
const struct tcf_exts *exts);
const struct tcf_exts *exts, bool rtnl_held);
void tc_cleanup_flow_action(struct flow_action *flow_action);
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
void *type_data, bool err_stop);
void *type_data, bool err_stop, bool rtnl_held);
int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
enum tc_setup_type type, void *type_data, bool err_stop,
u32 *flags, unsigned int *in_hw_count, bool rtnl_held);
int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
enum tc_setup_type type, void *type_data, bool err_stop,
u32 *old_flags, unsigned int *old_in_hw_count,
u32 *new_flags, unsigned int *new_in_hw_count,
bool rtnl_held);
int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
enum tc_setup_type type, void *type_data, bool err_stop,
u32 *flags, unsigned int *in_hw_count, bool rtnl_held);
int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
bool add, flow_setup_cb_t *cb,
enum tc_setup_type type, void *type_data,
void *cb_priv, u32 *flags, unsigned int *in_hw_count);
unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
struct tc_cls_u32_knode {
......
......@@ -13,6 +13,8 @@
#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
......@@ -310,6 +312,10 @@ struct tcf_proto_ops {
int (*reoffload)(struct tcf_proto *tp, bool add,
flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack);
void (*hw_add)(struct tcf_proto *tp,
void *type_data);
void (*hw_del)(struct tcf_proto *tp,
void *type_data);
void (*bind_class)(void *, u32, unsigned long);
void * (*tmplt_create)(struct net *net,
struct tcf_chain *chain,
......@@ -396,11 +402,13 @@ struct tcf_block {
refcount_t refcnt;
struct net *net;
struct Qdisc *q;
struct rw_semaphore cb_lock; /* protects cb_list and offload counters */
struct flow_block flow_block;
struct list_head owner_list;
bool keep_dst;
unsigned int offloadcnt; /* Number of oddloaded filters */
atomic_t offloadcnt; /* Number of oddloaded filters */
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
struct {
struct tcf_chain *chain;
struct list_head filter_chain_list;
......@@ -436,37 +444,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
#define tcf_proto_dereference(p, tp) \
rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
{
if (*flags & TCA_CLS_FLAGS_IN_HW)
return;
*flags |= TCA_CLS_FLAGS_IN_HW;
block->offloadcnt++;
}
static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
{
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return;
*flags &= ~TCA_CLS_FLAGS_IN_HW;
block->offloadcnt--;
}
static inline void
tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt,
u32 *flags, bool add)
{
if (add) {
if (!*cnt)
tcf_block_offload_inc(block, flags);
(*cnt)++;
} else {
(*cnt)--;
if (!*cnt)
tcf_block_offload_dec(block, flags);
}
}
static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
struct qdisc_skb_cb *qcb;
......
......@@ -59,4 +59,21 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
return NULL;
#endif
}
static inline struct ip_tunnel_info *
tcf_tunnel_info_copy(const struct tc_action *a)
{
#ifdef CONFIG_NET_CLS_ACT
struct ip_tunnel_info *tun = tcf_tunnel_info(a);
if (tun) {
size_t tun_size = sizeof(*tun) + tun->options_len;
struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size,
GFP_KERNEL);
return tun_copy;
}
#endif
return NULL;
}
#endif /* __NET_TC_TUNNEL_KEY_H */
......@@ -568,9 +568,11 @@ static void tc_indr_block_ing_cmd(struct net_device *dev,
bo.block = &block->flow_block;
down_write(&block->cb_lock);
cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
tcf_block_setup(block, &bo);
up_write(&block->cb_lock);
}
static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
......@@ -627,7 +629,7 @@ static void tc_indr_block_call(struct tcf_block *block,
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
return block->offloadcnt;
return atomic_read(&block->offloadcnt);
}
static int tcf_block_offload_cmd(struct tcf_block *block,
......@@ -661,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
struct net_device *dev = q->dev_queue->dev;
int err;
down_write(&block->cb_lock);
if (!dev->netdev_ops->ndo_setup_tc)
goto no_offload_dev_inc;
......@@ -669,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
*/
if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
return -EOPNOTSUPP;
err = -EOPNOTSUPP;
goto err_unlock;
}
err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
if (err == -EOPNOTSUPP)
goto no_offload_dev_inc;
if (err)
return err;
goto err_unlock;
tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
up_write(&block->cb_lock);
return 0;
no_offload_dev_inc:
if (tcf_block_offload_in_use(block))
return -EOPNOTSUPP;
if (tcf_block_offload_in_use(block)) {
err = -EOPNOTSUPP;
goto err_unlock;
}
err = 0;
block->nooffloaddevcnt++;
tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
return 0;
err_unlock:
up_write(&block->cb_lock);
return err;
}
static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
......@@ -695,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
struct net_device *dev = q->dev_queue->dev;
int err;
down_write(&block->cb_lock);
tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
if (!dev->netdev_ops->ndo_setup_tc)
......@@ -702,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
if (err == -EOPNOTSUPP)
goto no_offload_dev_dec;
up_write(&block->cb_lock);
return;
no_offload_dev_dec:
WARN_ON(block->nooffloaddevcnt-- == 0);
up_write(&block->cb_lock);
}
static int
......@@ -820,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
init_rwsem(&block->cb_lock);
flow_block_init(&block->flow_block);
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->owner_list);
......@@ -1355,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
struct tcf_proto *tp, *tp_prev;
int err;
lockdep_assert_held(&block->cb_lock);
for (chain = __tcf_get_next_chain(block, NULL);
chain;
chain_prev = chain,
......@@ -1393,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block,
struct flow_block_cb *block_cb, *next;
int err, i = 0;
lockdep_assert_held(&block->cb_lock);
list_for_each_entry(block_cb, &bo->cb_list, list) {
err = tcf_block_playback_offloads(block, block_cb->cb,
block_cb->cb_priv, true,
......@@ -1400,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block,
bo->extack);
if (err)
goto err_unroll;
if (!bo->unlocked_driver_cb)
block->lockeddevcnt++;
i++;
}
......@@ -1415,6 +1435,8 @@ static int tcf_block_bind(struct tcf_block *block,
block_cb->cb_priv, false,
tcf_block_offload_in_use(block),
NULL);
if (!bo->unlocked_driver_cb)
block->lockeddevcnt--;
}
flow_block_cb_free(block_cb);
}
......@@ -1427,6 +1449,8 @@ static void tcf_block_unbind(struct tcf_block *block,
{
struct flow_block_cb *block_cb, *next;
lockdep_assert_held(&block->cb_lock);
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
tcf_block_playback_offloads(block, block_cb->cb,
block_cb->cb_priv, false,
......@@ -1434,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block,
NULL);
list_del(&block_cb->list);
flow_block_cb_free(block_cb);
if (!bo->unlocked_driver_cb)
block->lockeddevcnt--;
}
}
......@@ -2980,17 +3006,61 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
void *type_data, bool err_stop)
static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
{
if (*flags & TCA_CLS_FLAGS_IN_HW)
return;
*flags |= TCA_CLS_FLAGS_IN_HW;
atomic_inc(&block->offloadcnt);
}
static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
{
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return;
*flags &= ~TCA_CLS_FLAGS_IN_HW;
atomic_dec(&block->offloadcnt);
}
static void tc_cls_offload_cnt_update(struct tcf_block *block,
struct tcf_proto *tp, u32 *cnt,
u32 *flags, u32 diff, bool add)
{
lockdep_assert_held(&block->cb_lock);
spin_lock(&tp->lock);
if (add) {
if (!*cnt)
tcf_block_offload_inc(block, flags);
*cnt += diff;
} else {
*cnt -= diff;
if (!*cnt)
tcf_block_offload_dec(block, flags);
}
spin_unlock(&tp->lock);
}
static void
tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
u32 *cnt, u32 *flags)
{
lockdep_assert_held(&block->cb_lock);
spin_lock(&tp->lock);
tcf_block_offload_dec(block, flags);
*cnt = 0;
spin_unlock(&tp->lock);
}
static int
__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
void *type_data, bool err_stop)
{
struct flow_block_cb *block_cb;
int ok_count = 0;
int err;
/* Make sure all netdevs sharing this block are offload-capable. */
if (block->nooffloaddevcnt && err_stop)
return -EOPNOTSUPP;
list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
......@@ -3002,17 +3072,235 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
}
return ok_count;
}
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
void *type_data, bool err_stop, bool rtnl_held)
{
bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
int ok_count;
retry:
if (take_rtnl)
rtnl_lock();
down_read(&block->cb_lock);
/* Need to obtain rtnl lock if block is bound to devs that require it.
* In block bind code cb_lock is obtained while holding rtnl, so we must
* obtain the locks in same order here.
*/
if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
up_read(&block->cb_lock);
take_rtnl = true;
goto retry;
}
ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
up_read(&block->cb_lock);
if (take_rtnl)
rtnl_unlock();
return ok_count;
}
EXPORT_SYMBOL(tc_setup_cb_call);
/* Non-destructive filter add. If filter that wasn't already in hardware is
* successfully offloaded, increment block offloads counter. On failure,
* previously offloaded filter is considered to be intact and offloads counter
* is not decremented.
*/
int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
enum tc_setup_type type, void *type_data, bool err_stop,
u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
{
bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
int ok_count;
retry:
if (take_rtnl)
rtnl_lock();
down_read(&block->cb_lock);
/* Need to obtain rtnl lock if block is bound to devs that require it.
* In block bind code cb_lock is obtained while holding rtnl, so we must
* obtain the locks in same order here.
*/
if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
up_read(&block->cb_lock);
take_rtnl = true;
goto retry;
}
/* Make sure all netdevs sharing this block are offload-capable. */
if (block->nooffloaddevcnt && err_stop) {
ok_count = -EOPNOTSUPP;
goto err_unlock;
}
ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
if (ok_count < 0)
goto err_unlock;
if (tp->ops->hw_add)
tp->ops->hw_add(tp, type_data);
if (ok_count > 0)
tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
ok_count, true);
err_unlock:
up_read(&block->cb_lock);
if (take_rtnl)
rtnl_unlock();
return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_add);
/* Destructive filter replace. If filter that wasn't already in hardware is
* successfully offloaded, increment block offload counter. On failure,
* previously offloaded filter is considered to be destroyed and offload counter
* is decremented.
*/
int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
enum tc_setup_type type, void *type_data, bool err_stop,
u32 *old_flags, unsigned int *old_in_hw_count,
u32 *new_flags, unsigned int *new_in_hw_count,
bool rtnl_held)
{
bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
int ok_count;
retry:
if (take_rtnl)
rtnl_lock();
down_read(&block->cb_lock);
/* Need to obtain rtnl lock if block is bound to devs that require it.
* In block bind code cb_lock is obtained while holding rtnl, so we must
* obtain the locks in same order here.
*/
if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
up_read(&block->cb_lock);
take_rtnl = true;
goto retry;
}
/* Make sure all netdevs sharing this block are offload-capable. */
if (block->nooffloaddevcnt && err_stop) {
ok_count = -EOPNOTSUPP;
goto err_unlock;
}
tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
if (tp->ops->hw_del)
tp->ops->hw_del(tp, type_data);
ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
if (ok_count < 0)
goto err_unlock;
if (tp->ops->hw_add)
tp->ops->hw_add(tp, type_data);
if (ok_count > 0)
tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
new_flags, ok_count, true);
err_unlock:
up_read(&block->cb_lock);
if (take_rtnl)
rtnl_unlock();
return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_replace);
/* Destroy filter and decrement block offload counter, if filter was previously
* offloaded.
*/
int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
enum tc_setup_type type, void *type_data, bool err_stop,
u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
{
bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
int ok_count;
retry:
if (take_rtnl)
rtnl_lock();
down_read(&block->cb_lock);
/* Need to obtain rtnl lock if block is bound to devs that require it.
* In block bind code cb_lock is obtained while holding rtnl, so we must
* obtain the locks in same order here.
*/
if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
up_read(&block->cb_lock);
take_rtnl = true;
goto retry;
}
ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
if (tp->ops->hw_del)
tp->ops->hw_del(tp, type_data);
up_read(&block->cb_lock);
if (take_rtnl)
rtnl_unlock();
return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_destroy);
int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
bool add, flow_setup_cb_t *cb,
enum tc_setup_type type, void *type_data,
void *cb_priv, u32 *flags, unsigned int *in_hw_count)
{
int err = cb(type, type_data, cb_priv);
if (err) {
if (add && tc_skip_sw(*flags))
return err;
} else {
tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
add);
}
return 0;
}
EXPORT_SYMBOL(tc_setup_cb_reoffload);
void tc_cleanup_flow_action(struct flow_action *flow_action)
{
struct flow_action_entry *entry;
int i;
flow_action_for_each(i, entry, flow_action) {
switch (entry->id) {
case FLOW_ACTION_REDIRECT:
case FLOW_ACTION_MIRRED:
case FLOW_ACTION_REDIRECT_INGRESS:
case FLOW_ACTION_MIRRED_INGRESS:
if (entry->dev)
dev_put(entry->dev);
break;
case FLOW_ACTION_TUNNEL_ENCAP:
kfree(entry->tunnel);
break;
default:
break;
}
}
}
EXPORT_SYMBOL(tc_cleanup_flow_action);
int tc_setup_flow_action(struct flow_action *flow_action,
const struct tcf_exts *exts)
const struct tcf_exts *exts, bool rtnl_held)
{
const struct tc_action *act;
int i, j, k;
int i, j, k, err = 0;
if (!exts)
return 0;
if (!rtnl_held)
rtnl_lock();
j = 0;
tcf_exts_for_each_action(i, act, exts) {
struct flow_action_entry *entry;
......@@ -3030,15 +3318,23 @@ int tc_setup_flow_action(struct flow_action *flow_action,
} else if (is_tcf_mirred_egress_redirect(act)) {
entry->id = FLOW_ACTION_REDIRECT;
entry->dev = tcf_mirred_dev(act);
if (entry->dev)
dev_hold(entry->dev);
} else if (is_tcf_mirred_egress_mirror(act)) {
entry->id = FLOW_ACTION_MIRRED;
entry->dev = tcf_mirred_dev(act);
if (entry->dev)
dev_hold(entry->dev);
} else if (is_tcf_mirred_ingress_redirect(act)) {
entry->id = FLOW_ACTION_REDIRECT_INGRESS;
entry->dev = tcf_mirred_dev(act);
if (entry->dev)
dev_hold(entry->dev);
} else if (is_tcf_mirred_ingress_mirror(act)) {
entry->id = FLOW_ACTION_MIRRED_INGRESS;
entry->dev = tcf_mirred_dev(act);
if (entry->dev)
dev_hold(entry->dev);
} else if (is_tcf_vlan(act)) {
switch (tcf_vlan_action(act)) {
case TCA_VLAN_ACT_PUSH:
......@@ -3057,11 +3353,16 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->vlan.prio = tcf_vlan_push_prio(act);
break;
default:
err = -EOPNOTSUPP;
goto err_out;
}
} else if (is_tcf_tunnel_set(act)) {
entry->id = FLOW_ACTION_TUNNEL_ENCAP;
entry->tunnel = tcf_tunnel_info(act);
entry->tunnel = tcf_tunnel_info_copy(act);
if (!entry->tunnel) {
err = -ENOMEM;
goto err_out;
}
} else if (is_tcf_tunnel_release(act)) {
entry->id = FLOW_ACTION_TUNNEL_DECAP;
} else if (is_tcf_pedit(act)) {
......@@ -3074,6 +3375,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->id = FLOW_ACTION_ADD;
break;
default:
err = -EOPNOTSUPP;
goto err_out;
}
entry->mangle.htype = tcf_pedit_htype(act, k);
......@@ -3132,15 +3434,22 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->id = FLOW_ACTION_PTYPE;
entry->ptype = tcf_skbedit_ptype(act);
} else {
err = -EOPNOTSUPP;
goto err_out;
}
if (!is_tcf_pedit(act))
j++;
}
return 0;
err_out:
return -EOPNOTSUPP;
if (!rtnl_held)
rtnl_unlock();
if (err)
tc_cleanup_flow_action(flow_action);
return err;
}
EXPORT_SYMBOL(tc_setup_flow_action);
......
......@@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
cls_bpf.exts_integrated = obj->exts_integrated;
if (oldprog)
tcf_block_offload_dec(block, &oldprog->gen_flags);
err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
skip_sw, &oldprog->gen_flags,
&oldprog->in_hw_count,
&prog->gen_flags, &prog->in_hw_count,
true);
else
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
skip_sw, &prog->gen_flags,
&prog->in_hw_count, true);
err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
if (prog) {
if (err < 0) {
cls_bpf_offload_cmd(tp, oldprog, prog, extack);
return err;
} else if (err > 0) {
prog->in_hw_count = err;
tcf_block_offload_inc(block, &prog->gen_flags);
}
if (prog && err) {
cls_bpf_offload_cmd(tp, oldprog, prog, extack);
return err;
}
if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
......@@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false);
tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true);
}
static int cls_bpf_init(struct tcf_proto *tp)
......@@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv);
if (err) {
if (add && tc_skip_sw(prog->gen_flags))
return err;
continue;
}
tc_cls_offload_cnt_update(block, &prog->in_hw_count,
&prog->gen_flags, add);
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF,
&cls_bpf, cb_priv, &prog->gen_flags,
&prog->in_hw_count);
if (err)
return err;
}
return 0;
......
......@@ -412,41 +412,27 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
struct tcf_block *block = tp->chain->block;
struct flow_cls_offload cls_flower = {};
if (!rtnl_held)
rtnl_lock();
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = FLOW_CLS_DESTROY;
cls_flower.cookie = (unsigned long) f;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
spin_lock(&tp->lock);
list_del_init(&f->hw_list);
tcf_block_offload_dec(block, &f->flags);
spin_unlock(&tp->lock);
tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
&f->flags, &f->in_hw_count, rtnl_held);
if (!rtnl_held)
rtnl_unlock();
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct cls_fl_filter *f, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
struct tcf_block *block = tp->chain->block;
struct flow_cls_offload cls_flower = {};
bool skip_sw = tc_skip_sw(f->flags);
int err = 0;
if (!rtnl_held)
rtnl_lock();
cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
if (!cls_flower.rule) {
err = -ENOMEM;
goto errout;
}
if (!cls_flower.rule)
return -ENOMEM;
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = FLOW_CLS_REPLACE;
......@@ -456,43 +442,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
rtnl_held);
if (err) {
kfree(cls_flower.rule);
if (skip_sw)
if (skip_sw) {
NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
else
err = 0;
goto errout;
return err;
}
return 0;
}
err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
skip_sw, &f->flags, &f->in_hw_count, rtnl_held);
tc_cleanup_flow_action(&cls_flower.rule->action);
kfree(cls_flower.rule);
if (err < 0) {
fl_hw_destroy_filter(tp, f, true, NULL);
goto errout;
} else if (err > 0) {
f->in_hw_count = err;
err = 0;
spin_lock(&tp->lock);
tcf_block_offload_inc(block, &f->flags);
spin_unlock(&tp->lock);
}
if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) {
err = -EINVAL;
goto errout;
if (err) {
fl_hw_destroy_filter(tp, f, rtnl_held, NULL);
return err;
}
spin_lock(&tp->lock);
list_add(&f->hw_list, &head->hw_filters);
spin_unlock(&tp->lock);
errout:
if (!rtnl_held)
rtnl_unlock();
if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
return -EINVAL;
return err;
return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
......@@ -501,22 +475,17 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
struct tcf_block *block = tp->chain->block;
struct flow_cls_offload cls_flower = {};
if (!rtnl_held)
rtnl_lock();
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = FLOW_CLS_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.classid = f->res.classid;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
rtnl_held);
tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
cls_flower.stats.pkts,
cls_flower.stats.lastused);
if (!rtnl_held)
rtnl_unlock();
}
static void __fl_put(struct cls_fl_filter *f)
......@@ -1831,7 +1800,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.rule->match.mask = &f->mask->key;
cls_flower.rule->match.key = &f->mkey;
err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
true);
if (err) {
kfree(cls_flower.rule);
if (tc_skip_sw(f->flags)) {
......@@ -1844,21 +1814,17 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.classid = f->res.classid;
err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
err = tc_setup_cb_reoffload(block, tp, add, cb,
TC_SETUP_CLSFLOWER, &cls_flower,
cb_priv, &f->flags,
&f->in_hw_count);
tc_cleanup_flow_action(&cls_flower.rule->action);
kfree(cls_flower.rule);
if (err) {
if (add && tc_skip_sw(f->flags)) {
__fl_put(f);
return err;
}
goto next_flow;
__fl_put(f);
return err;
}
spin_lock(&tp->lock);
tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags,
add);
spin_unlock(&tp->lock);
next_flow:
__fl_put(f);
}
......@@ -1866,6 +1832,30 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
return 0;
}
static void fl_hw_add(struct tcf_proto *tp, void *type_data)
{
struct flow_cls_offload *cls_flower = type_data;
struct cls_fl_filter *f =
(struct cls_fl_filter *) cls_flower->cookie;
struct cls_fl_head *head = fl_head_dereference(tp);
spin_lock(&tp->lock);
list_add(&f->hw_list, &head->hw_filters);
spin_unlock(&tp->lock);
}
static void fl_hw_del(struct tcf_proto *tp, void *type_data)
{
struct flow_cls_offload *cls_flower = type_data;
struct cls_fl_filter *f =
(struct cls_fl_filter *) cls_flower->cookie;
spin_lock(&tp->lock);
if (!list_empty(&f->hw_list))
list_del_init(&f->hw_list);
spin_unlock(&tp->lock);
}
static int fl_hw_create_tmplt(struct tcf_chain *chain,
struct fl_flow_tmplt *tmplt)
{
......@@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain,
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
kfree(cls_flower.rule);
return 0;
......@@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
cls_flower.command = FLOW_CLS_TMPLT_DESTROY;
cls_flower.cookie = (unsigned long) tmplt;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
}
static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
......@@ -2526,6 +2516,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.delete = fl_delete,
.walk = fl_walk,
.reoffload = fl_reoffload,
.hw_add = fl_hw_add,
.hw_del = fl_hw_del,
.dump = fl_dump,
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
......
......@@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
tcf_block_offload_dec(block, &head->flags);
tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false,
&head->flags, &head->in_hw_count, true);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
......@@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.cookie = cookie;
err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts);
err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
if (err) {
kfree(cls_mall.rule);
mall_destroy_hw_filter(tp, head, cookie, NULL);
......@@ -109,15 +109,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return err;
}
err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw);
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
skip_sw, &head->flags, &head->in_hw_count, true);
kfree(cls_mall.rule);
if (err < 0) {
if (err) {
mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
} else if (err > 0) {
head->in_hw_count = err;
tcf_block_offload_inc(block, &head->flags);
}
if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
......@@ -302,7 +300,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = (unsigned long)head;
err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts);
err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
if (err) {
kfree(cls_mall.rule);
if (add && tc_skip_sw(head->flags)) {
......@@ -312,16 +310,13 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
return 0;
}
err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
&cls_mall, cb_priv, &head->flags,
&head->in_hw_count);
kfree(cls_mall.rule);
if (err) {
if (add && tc_skip_sw(head->flags))
return err;
return 0;
}
tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add);
if (err)
return err;
return 0;
}
......@@ -337,7 +332,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_STATS;
cls_mall.cookie = cookie;
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
cls_mall.stats.pkts, cls_mall.stats.lastused);
......
......@@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
......@@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true);
if (err < 0) {
u32_clear_hw_hnode(tp, h, NULL);
return err;
......@@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
tcf_block_offload_dec(block, &n->flags);
tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false,
&n->flags, &n->in_hw_count, true);
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
......@@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
if (n->ht_down)
cls_u32.knode.link_handle = ht->handle;
err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw,
&n->flags, &n->in_hw_count, true);
if (err) {
u32_remove_hw_knode(tp, n, NULL);
return err;
} else if (err > 0) {
n->in_hw_count = err;
tcf_block_offload_inc(block, &n->flags);
}
if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
......@@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.knode.link_handle = ht->handle;
}
err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
if (err) {
if (add && tc_skip_sw(n->flags))
return err;
return 0;
}
tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32,
&cls_u32, cb_priv, &n->flags,
&n->in_hw_count);
if (err)
return err;
return 0;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册