1. 05 1月, 2022 6 次提交
    • V
      net: dsa: move dsa_switch_tree :: ports and lags to first cache line · b035c88c
      Vladimir Oltean 提交于
      dst->ports is accessed most notably by dsa_master_find_slave(), which is
      invoked in the RX path.
      
      dst->lags is accessed by dsa_lag_dev(), which is invoked in the RX path
      of tag_dsa.c.
      
      dst->tag_ops, dst->default_proto and dst->pd don't need to be in the
      first cache line, so they are moved out by this change.
      
      Before:
      
      pahole -C dsa_switch_tree net/dsa/slave.o
      struct dsa_switch_tree {
              struct list_head           list;                 /*     0    16 */
              struct raw_notifier_head   nh;                   /*    16     8 */
              unsigned int               index;                /*    24     4 */
              struct kref                refcount;             /*    28     4 */
              bool                       setup;                /*    32     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              const struct dsa_device_ops  * tag_ops;          /*    40     8 */
              enum dsa_tag_protocol      default_proto;        /*    48     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_platform_data * pd;                   /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct list_head           ports;                /*    64    16 */
              struct list_head           rtable;               /*    80    16 */
              struct net_device * *      lags;                 /*    96     8 */
              unsigned int               lags_len;             /*   104     4 */
              unsigned int               last_switch;          /*   108     4 */
      
              /* size: 112, cachelines: 2, members: 13 */
              /* sum members: 101, holes: 2, sum holes: 11 */
              /* last cacheline: 48 bytes */
      };
      
      After:
      
      pahole -C dsa_switch_tree net/dsa/slave.o
      struct dsa_switch_tree {
              struct list_head           list;                 /*     0    16 */
              struct list_head           ports;                /*    16    16 */
              struct raw_notifier_head   nh;                   /*    32     8 */
              unsigned int               index;                /*    40     4 */
              struct kref                refcount;             /*    44     4 */
              struct net_device * *      lags;                 /*    48     8 */
              bool                       setup;                /*    56     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              /* --- cacheline 1 boundary (64 bytes) --- */
              const struct dsa_device_ops  * tag_ops;          /*    64     8 */
              enum dsa_tag_protocol      default_proto;        /*    72     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_platform_data * pd;                   /*    80     8 */
              struct list_head           rtable;               /*    88    16 */
              unsigned int               lags_len;             /*   104     4 */
              unsigned int               last_switch;          /*   108     4 */
      
              /* size: 112, cachelines: 2, members: 13 */
              /* sum members: 101, holes: 2, sum holes: 11 */
              /* last cacheline: 48 bytes */
      };
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      b035c88c
    • V
      net: dsa: make dsa_switch :: num_ports an unsigned int · 258030ac
      Vladimir Oltean 提交于
      Currently, num_ports is declared as size_t, which is defined as
      __kernel_ulong_t, therefore it occupies 8 bytes of memory.
      
      Even switches with port numbers in the range of tens are exotic, so
      there is no need for this amount of storage.
      
      Additionally, because the max_num_bridges member right above it is also
      4 bytes, it means the compiler needs to add padding between the last 2
      fields. By reducing the size, we don't need that padding and can reduce
      the struct size.
      
      Before:
      
      pahole -C dsa_switch net/dsa/slave.o
      struct dsa_switch {
              struct device *            dev;                  /*     0     8 */
              struct dsa_switch_tree *   dst;                  /*     8     8 */
              unsigned int               index;                /*    16     4 */
              u32                        setup:1;              /*    20: 0  4 */
              u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
              u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
              u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
              u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
              u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
              u32                        vlan_filtering:1;     /*    20: 6  4 */
              u32                        pcs_poll:1;           /*    20: 7  4 */
              u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */
      
              /* XXX 23 bits hole, try to pack */
      
              struct notifier_block      nb;                   /*    24    24 */
      
              /* XXX last struct has 4 bytes of padding */
      
              void *                     priv;                 /*    48     8 */
              void *                     tagger_data;          /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_chip_data *     cd;                   /*    64     8 */
              const struct dsa_switch_ops  * ops;              /*    72     8 */
              u32                        phys_mii_mask;        /*    80     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct mii_bus *           slave_mii_bus;        /*    88     8 */
              unsigned int               ageing_time_min;      /*    96     4 */
              unsigned int               ageing_time_max;      /*   100     4 */
              struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
              struct devlink *           devlink;              /*   112     8 */
              unsigned int               num_tx_queues;        /*   120     4 */
              unsigned int               num_lag_ids;          /*   124     4 */
              /* --- cacheline 2 boundary (128 bytes) --- */
              unsigned int               max_num_bridges;      /*   128     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              size_t                     num_ports;            /*   136     8 */
      
              /* size: 144, cachelines: 3, members: 27 */
              /* sum members: 132, holes: 2, sum holes: 8 */
              /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
              /* paddings: 1, sum paddings: 4 */
              /* last cacheline: 16 bytes */
      };
      
      After:
      
      pahole -C dsa_switch net/dsa/slave.o
      struct dsa_switch {
              struct device *            dev;                  /*     0     8 */
              struct dsa_switch_tree *   dst;                  /*     8     8 */
              unsigned int               index;                /*    16     4 */
              u32                        setup:1;              /*    20: 0  4 */
              u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
              u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
              u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
              u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
              u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
              u32                        vlan_filtering:1;     /*    20: 6  4 */
              u32                        pcs_poll:1;           /*    20: 7  4 */
              u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */
      
              /* XXX 23 bits hole, try to pack */
      
              struct notifier_block      nb;                   /*    24    24 */
      
              /* XXX last struct has 4 bytes of padding */
      
              void *                     priv;                 /*    48     8 */
              void *                     tagger_data;          /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_chip_data *     cd;                   /*    64     8 */
              const struct dsa_switch_ops  * ops;              /*    72     8 */
              u32                        phys_mii_mask;        /*    80     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct mii_bus *           slave_mii_bus;        /*    88     8 */
              unsigned int               ageing_time_min;      /*    96     4 */
              unsigned int               ageing_time_max;      /*   100     4 */
              struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
              struct devlink *           devlink;              /*   112     8 */
              unsigned int               num_tx_queues;        /*   120     4 */
              unsigned int               num_lag_ids;          /*   124     4 */
              /* --- cacheline 2 boundary (128 bytes) --- */
              unsigned int               max_num_bridges;      /*   128     4 */
              unsigned int               num_ports;            /*   132     4 */
      
              /* size: 136, cachelines: 3, members: 27 */
              /* sum members: 128, holes: 1, sum holes: 4 */
              /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
              /* paddings: 1, sum paddings: 4 */
              /* last cacheline: 8 bytes */
      };
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      258030ac
    • V
      net: dsa: merge all bools of struct dsa_switch into a single u32 · 7787ff77
      Vladimir Oltean 提交于
      struct dsa_switch has 9 boolean properties, many of which are in fact
      set by drivers for custom behavior (vlan_filtering_is_global,
      needs_standalone_vlan_filtering, etc etc). The binary layout of the
      structure could be improved. For example, the "bool setup" at the
      beginning introduces a gratuitous 7 byte hole in the first cache line.
      
      The change merges all boolean properties into bitfields of an u32, and
      places that u32 in the first cache line of the structure, since many
      bools are accessed from the data path (untag_bridge_pvid, vlan_filtering,
      vlan_filtering_is_global).
      
      We place this u32 after the existing ds->index, which is also 4 bytes in
      size. As a positive side effect, ds->tagger_data now fits into the first
      cache line too, because 4 bytes are saved.
      
      Before:
      
      pahole -C dsa_switch net/dsa/slave.o
      struct dsa_switch {
              bool                       setup;                /*     0     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct device *            dev;                  /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              unsigned int               index;                /*    24     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct notifier_block      nb;                   /*    32    24 */
      
              /* XXX last struct has 4 bytes of padding */
      
              void *                     priv;                 /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              void *                     tagger_data;          /*    64     8 */
              struct dsa_chip_data *     cd;                   /*    72     8 */
              const struct dsa_switch_ops  * ops;              /*    80     8 */
              u32                        phys_mii_mask;        /*    88     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct mii_bus *           slave_mii_bus;        /*    96     8 */
              unsigned int               ageing_time_min;      /*   104     4 */
              unsigned int               ageing_time_max;      /*   108     4 */
              struct dsa_8021q_context * tag_8021q_ctx;        /*   112     8 */
              struct devlink *           devlink;              /*   120     8 */
              /* --- cacheline 2 boundary (128 bytes) --- */
              unsigned int               num_tx_queues;        /*   128     4 */
              bool                       vlan_filtering_is_global; /*   132     1 */
              bool                       needs_standalone_vlan_filtering; /*   133     1 */
              bool                       configure_vlan_while_not_filtering; /*   134     1 */
              bool                       untag_bridge_pvid;    /*   135     1 */
              bool                       assisted_learning_on_cpu_port; /*   136     1 */
              bool                       vlan_filtering;       /*   137     1 */
              bool                       pcs_poll;             /*   138     1 */
              bool                       mtu_enforcement_ingress; /*   139     1 */
              unsigned int               num_lag_ids;          /*   140     4 */
              unsigned int               max_num_bridges;      /*   144     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              size_t                     num_ports;            /*   152     8 */
      
              /* size: 160, cachelines: 3, members: 27 */
              /* sum members: 141, holes: 4, sum holes: 19 */
              /* paddings: 1, sum paddings: 4 */
              /* last cacheline: 32 bytes */
      };
      
      After:
      
      pahole -C dsa_switch net/dsa/slave.o
      struct dsa_switch {
              struct device *            dev;                  /*     0     8 */
              struct dsa_switch_tree *   dst;                  /*     8     8 */
              unsigned int               index;                /*    16     4 */
              u32                        setup:1;              /*    20: 0  4 */
              u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
              u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
              u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
              u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
              u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
              u32                        vlan_filtering:1;     /*    20: 6  4 */
              u32                        pcs_poll:1;           /*    20: 7  4 */
              u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */
      
              /* XXX 23 bits hole, try to pack */
      
              struct notifier_block      nb;                   /*    24    24 */
      
              /* XXX last struct has 4 bytes of padding */
      
              void *                     priv;                 /*    48     8 */
              void *                     tagger_data;          /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_chip_data *     cd;                   /*    64     8 */
              const struct dsa_switch_ops  * ops;              /*    72     8 */
              u32                        phys_mii_mask;        /*    80     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct mii_bus *           slave_mii_bus;        /*    88     8 */
              unsigned int               ageing_time_min;      /*    96     4 */
              unsigned int               ageing_time_max;      /*   100     4 */
              struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
              struct devlink *           devlink;              /*   112     8 */
              unsigned int               num_tx_queues;        /*   120     4 */
              unsigned int               num_lag_ids;          /*   124     4 */
              /* --- cacheline 2 boundary (128 bytes) --- */
              unsigned int               max_num_bridges;      /*   128     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              size_t                     num_ports;            /*   136     8 */
      
              /* size: 144, cachelines: 3, members: 27 */
              /* sum members: 132, holes: 2, sum holes: 8 */
              /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
              /* paddings: 1, sum paddings: 4 */
              /* last cacheline: 16 bytes */
      };
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7787ff77
    • V
      net: dsa: move dsa_port :: type near dsa_port :: index · 06251258
      Vladimir Oltean 提交于
      Both dsa_port :: type and dsa_port :: index introduce a 4 octet hole
      after them, so we can group them together and the holes would be
      eliminated, turning 16 octets of storage into just 8. This makes the
      cpu_dp pointer fit in the first cache line, which is good, because
      dsa_slave_to_master(), called by dsa_enqueue_skb(), uses it.
      
      Before:
      
      pahole -C dsa_port net/dsa/slave.o
      struct dsa_port {
              union {
                      struct net_device * master;              /*     0     8 */
                      struct net_device * slave;               /*     0     8 */
              };                                               /*     0     8 */
              const struct dsa_device_ops  * tag_ops;          /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
              enum {
                      DSA_PORT_TYPE_UNUSED = 0,
                      DSA_PORT_TYPE_CPU    = 1,
                      DSA_PORT_TYPE_DSA    = 2,
                      DSA_PORT_TYPE_USER   = 3,
              } type;                                          /*    32     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_switch *        ds;                   /*    40     8 */
              unsigned int               index;                /*    48     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              const char  *              name;                 /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_port *          cpu_dp;               /*    64     8 */
              u8                         mac[6];               /*    72     6 */
              u8                         stp_state;            /*    78     1 */
              u8                         vlan_filtering:1;     /*    79: 0  1 */
              u8                         learning:1;           /*    79: 1  1 */
              u8                         lag_tx_enabled:1;     /*    79: 2  1 */
              u8                         devlink_port_setup:1; /*    79: 3  1 */
              u8                         setup:1;              /*    79: 4  1 */
      
              /* XXX 3 bits hole, try to pack */
      
              struct device_node *       dn;                   /*    80     8 */
              unsigned int               ageing_time;          /*    88     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_bridge *        bridge;               /*    96     8 */
              struct devlink_port        devlink_port;         /*   104   288 */
              /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
              struct phylink *           pl;                   /*   392     8 */
              struct phylink_config      pl_config;            /*   400    40 */
              struct net_device *        lag_dev;              /*   440     8 */
              /* --- cacheline 7 boundary (448 bytes) --- */
              struct net_device *        hsr_dev;              /*   448     8 */
              struct list_head           list;                 /*   456    16 */
              const struct ethtool_ops  * orig_ethtool_ops;    /*   472     8 */
              const struct dsa_netdevice_ops  * netdev_ops;    /*   480     8 */
              struct mutex               addr_lists_lock;      /*   488    32 */
              /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */
              struct list_head           fdbs;                 /*   520    16 */
              struct list_head           mdbs;                 /*   536    16 */
      
              /* size: 552, cachelines: 9, members: 30 */
              /* sum members: 539, holes: 3, sum holes: 12 */
              /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */
              /* last cacheline: 40 bytes */
      };
      
      After:
      
      pahole -C dsa_port net/dsa/slave.o
      struct dsa_port {
              union {
                      struct net_device * master;              /*     0     8 */
                      struct net_device * slave;               /*     0     8 */
              };                                               /*     0     8 */
              const struct dsa_device_ops  * tag_ops;          /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
              struct dsa_switch *        ds;                   /*    32     8 */
              unsigned int               index;                /*    40     4 */
              enum {
                      DSA_PORT_TYPE_UNUSED = 0,
                      DSA_PORT_TYPE_CPU    = 1,
                      DSA_PORT_TYPE_DSA    = 2,
                      DSA_PORT_TYPE_USER   = 3,
              } type;                                          /*    44     4 */
              const char  *              name;                 /*    48     8 */
              struct dsa_port *          cpu_dp;               /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              u8                         mac[6];               /*    64     6 */
              u8                         stp_state;            /*    70     1 */
              u8                         vlan_filtering:1;     /*    71: 0  1 */
              u8                         learning:1;           /*    71: 1  1 */
              u8                         lag_tx_enabled:1;     /*    71: 2  1 */
              u8                         devlink_port_setup:1; /*    71: 3  1 */
              u8                         setup:1;              /*    71: 4  1 */
      
              /* XXX 3 bits hole, try to pack */
      
              struct device_node *       dn;                   /*    72     8 */
              unsigned int               ageing_time;          /*    80     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_bridge *        bridge;               /*    88     8 */
              struct devlink_port        devlink_port;         /*    96   288 */
              /* --- cacheline 6 boundary (384 bytes) --- */
              struct phylink *           pl;                   /*   384     8 */
              struct phylink_config      pl_config;            /*   392    40 */
              struct net_device *        lag_dev;              /*   432     8 */
              struct net_device *        hsr_dev;              /*   440     8 */
              /* --- cacheline 7 boundary (448 bytes) --- */
              struct list_head           list;                 /*   448    16 */
              const struct ethtool_ops  * orig_ethtool_ops;    /*   464     8 */
              const struct dsa_netdevice_ops  * netdev_ops;    /*   472     8 */
              struct mutex               addr_lists_lock;      /*   480    32 */
              /* --- cacheline 8 boundary (512 bytes) --- */
              struct list_head           fdbs;                 /*   512    16 */
              struct list_head           mdbs;                 /*   528    16 */
      
              /* size: 544, cachelines: 9, members: 30 */
              /* sum members: 539, holes: 1, sum holes: 4 */
              /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */
              /* last cacheline: 32 bytes */
      };
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      06251258
    • V
      net: dsa: merge all bools of struct dsa_port into a single u8 · bde82f38
      Vladimir Oltean 提交于
      struct dsa_port has 5 bool members which create quite a number of 7 byte
      holes in the structure layout. By merging them all into bitfields of an
      u8, and placing that u8 in the 1-byte hole after dp->mac and dp->stp_state,
      we can reduce the structure size from 576 bytes to 552 bytes on arm64.
      
      Before:
      
      pahole -C dsa_port net/dsa/slave.o
      struct dsa_port {
              union {
                      struct net_device * master;              /*     0     8 */
                      struct net_device * slave;               /*     0     8 */
              };                                               /*     0     8 */
              const struct dsa_device_ops  * tag_ops;          /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
              enum {
                      DSA_PORT_TYPE_UNUSED = 0,
                      DSA_PORT_TYPE_CPU    = 1,
                      DSA_PORT_TYPE_DSA    = 2,
                      DSA_PORT_TYPE_USER   = 3,
              } type;                                          /*    32     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_switch *        ds;                   /*    40     8 */
              unsigned int               index;                /*    48     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              const char  *              name;                 /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_port *          cpu_dp;               /*    64     8 */
              u8                         mac[6];               /*    72     6 */
              u8                         stp_state;            /*    78     1 */
      
              /* XXX 1 byte hole, try to pack */
      
              struct device_node *       dn;                   /*    80     8 */
              unsigned int               ageing_time;          /*    88     4 */
              bool                       vlan_filtering;       /*    92     1 */
              bool                       learning;             /*    93     1 */
      
              /* XXX 2 bytes hole, try to pack */
      
              struct dsa_bridge *        bridge;               /*    96     8 */
              struct devlink_port        devlink_port;         /*   104   288 */
              /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
              bool                       devlink_port_setup;   /*   392     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct phylink *           pl;                   /*   400     8 */
              struct phylink_config      pl_config;            /*   408    40 */
              /* --- cacheline 7 boundary (448 bytes) --- */
              struct net_device *        lag_dev;              /*   448     8 */
              bool                       lag_tx_enabled;       /*   456     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct net_device *        hsr_dev;              /*   464     8 */
              struct list_head           list;                 /*   472    16 */
              const struct ethtool_ops  * orig_ethtool_ops;    /*   488     8 */
              const struct dsa_netdevice_ops  * netdev_ops;    /*   496     8 */
              struct mutex               addr_lists_lock;      /*   504    32 */
              /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
              struct list_head           fdbs;                 /*   536    16 */
              struct list_head           mdbs;                 /*   552    16 */
              bool                       setup;                /*   568     1 */
      
              /* size: 576, cachelines: 9, members: 30 */
              /* sum members: 544, holes: 6, sum holes: 25 */
              /* padding: 7 */
      };
      
      After:
      
      pahole -C dsa_port net/dsa/slave.o
      struct dsa_port {
              union {
                      struct net_device * master;              /*     0     8 */
                      struct net_device * slave;               /*     0     8 */
              };                                               /*     0     8 */
              const struct dsa_device_ops  * tag_ops;          /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
              enum {
                      DSA_PORT_TYPE_UNUSED = 0,
                      DSA_PORT_TYPE_CPU    = 1,
                      DSA_PORT_TYPE_DSA    = 2,
                      DSA_PORT_TYPE_USER   = 3,
              } type;                                          /*    32     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_switch *        ds;                   /*    40     8 */
              unsigned int               index;                /*    48     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              const char  *              name;                 /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_port *          cpu_dp;               /*    64     8 */
              u8                         mac[6];               /*    72     6 */
              u8                         stp_state;            /*    78     1 */
              u8                         vlan_filtering:1;     /*    79: 0  1 */
              u8                         learning:1;           /*    79: 1  1 */
              u8                         lag_tx_enabled:1;     /*    79: 2  1 */
              u8                         devlink_port_setup:1; /*    79: 3  1 */
              u8                         setup:1;              /*    79: 4  1 */
      
              /* XXX 3 bits hole, try to pack */
      
              struct device_node *       dn;                   /*    80     8 */
              unsigned int               ageing_time;          /*    88     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_bridge *        bridge;               /*    96     8 */
              struct devlink_port        devlink_port;         /*   104   288 */
              /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
              struct phylink *           pl;                   /*   392     8 */
              struct phylink_config      pl_config;            /*   400    40 */
              struct net_device *        lag_dev;              /*   440     8 */
              /* --- cacheline 7 boundary (448 bytes) --- */
              struct net_device *        hsr_dev;              /*   448     8 */
              struct list_head           list;                 /*   456    16 */
              const struct ethtool_ops  * orig_ethtool_ops;    /*   472     8 */
              const struct dsa_netdevice_ops  * netdev_ops;    /*   480     8 */
              struct mutex               addr_lists_lock;      /*   488    32 */
              /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */
              struct list_head           fdbs;                 /*   520    16 */
              struct list_head           mdbs;                 /*   536    16 */
      
              /* size: 552, cachelines: 9, members: 30 */
              /* sum members: 539, holes: 3, sum holes: 12 */
              /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */
              /* last cacheline: 40 bytes */
      };
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      bde82f38
    • V
      net: dsa: move dsa_port :: stp_state near dsa_port :: mac · b08db33d
      Vladimir Oltean 提交于
      The MAC address of a port is 6 octets in size, and this creates a 2
      octet hole after it. There are some other u8 members of struct dsa_port
      that we can put in that hole. One such member is the stp_state.
      
      Before:
      
      pahole -C dsa_port net/dsa/slave.o
      struct dsa_port {
              union {
                      struct net_device * master;              /*     0     8 */
                      struct net_device * slave;               /*     0     8 */
              };                                               /*     0     8 */
              const struct dsa_device_ops  * tag_ops;          /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
              enum {
                      DSA_PORT_TYPE_UNUSED = 0,
                      DSA_PORT_TYPE_CPU    = 1,
                      DSA_PORT_TYPE_DSA    = 2,
                      DSA_PORT_TYPE_USER   = 3,
              } type;                                          /*    32     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_switch *        ds;                   /*    40     8 */
              unsigned int               index;                /*    48     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              const char  *              name;                 /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_port *          cpu_dp;               /*    64     8 */
              u8                         mac[6];               /*    72     6 */
      
              /* XXX 2 bytes hole, try to pack */
      
              struct device_node *       dn;                   /*    80     8 */
              unsigned int               ageing_time;          /*    88     4 */
              bool                       vlan_filtering;       /*    92     1 */
              bool                       learning;             /*    93     1 */
              u8                         stp_state;            /*    94     1 */
      
              /* XXX 1 byte hole, try to pack */
      
              struct dsa_bridge *        bridge;               /*    96     8 */
              struct devlink_port        devlink_port;         /*   104   288 */
              /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
              bool                       devlink_port_setup;   /*   392     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct phylink *           pl;                   /*   400     8 */
              struct phylink_config      pl_config;            /*   408    40 */
              /* --- cacheline 7 boundary (448 bytes) --- */
              struct net_device *        lag_dev;              /*   448     8 */
              bool                       lag_tx_enabled;       /*   456     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct net_device *        hsr_dev;              /*   464     8 */
              struct list_head           list;                 /*   472    16 */
              const struct ethtool_ops  * orig_ethtool_ops;    /*   488     8 */
              const struct dsa_netdevice_ops  * netdev_ops;    /*   496     8 */
              struct mutex               addr_lists_lock;      /*   504    32 */
              /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
              struct list_head           fdbs;                 /*   536    16 */
              struct list_head           mdbs;                 /*   552    16 */
              bool                       setup;                /*   568     1 */
      
              /* size: 576, cachelines: 9, members: 30 */
              /* sum members: 544, holes: 6, sum holes: 25 */
              /* padding: 7 */
      };
      
      After:
      
      pahole -C dsa_port net/dsa/slave.o
      struct dsa_port {
              union {
                      struct net_device * master;              /*     0     8 */
                      struct net_device * slave;               /*     0     8 */
              };                                               /*     0     8 */
              const struct dsa_device_ops  * tag_ops;          /*     8     8 */
              struct dsa_switch_tree *   dst;                  /*    16     8 */
              struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
              enum {
                      DSA_PORT_TYPE_UNUSED = 0,
                      DSA_PORT_TYPE_CPU    = 1,
                      DSA_PORT_TYPE_DSA    = 2,
                      DSA_PORT_TYPE_USER   = 3,
              } type;                                          /*    32     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              struct dsa_switch *        ds;                   /*    40     8 */
              unsigned int               index;                /*    48     4 */
      
              /* XXX 4 bytes hole, try to pack */
      
              const char  *              name;                 /*    56     8 */
              /* --- cacheline 1 boundary (64 bytes) --- */
              struct dsa_port *          cpu_dp;               /*    64     8 */
              u8                         mac[6];               /*    72     6 */
              u8                         stp_state;            /*    78     1 */
      
              /* XXX 1 byte hole, try to pack */
      
              struct device_node *       dn;                   /*    80     8 */
              unsigned int               ageing_time;          /*    88     4 */
              bool                       vlan_filtering;       /*    92     1 */
              bool                       learning;             /*    93     1 */
      
              /* XXX 2 bytes hole, try to pack */
      
              struct dsa_bridge *        bridge;               /*    96     8 */
              struct devlink_port        devlink_port;         /*   104   288 */
              /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
              bool                       devlink_port_setup;   /*   392     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct phylink *           pl;                   /*   400     8 */
              struct phylink_config      pl_config;            /*   408    40 */
              /* --- cacheline 7 boundary (448 bytes) --- */
              struct net_device *        lag_dev;              /*   448     8 */
              bool                       lag_tx_enabled;       /*   456     1 */
      
              /* XXX 7 bytes hole, try to pack */
      
              struct net_device *        hsr_dev;              /*   464     8 */
              struct list_head           list;                 /*   472    16 */
              const struct ethtool_ops  * orig_ethtool_ops;    /*   488     8 */
              const struct dsa_netdevice_ops  * netdev_ops;    /*   496     8 */
              struct mutex               addr_lists_lock;      /*   504    32 */
              /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
              struct list_head           fdbs;                 /*   536    16 */
              struct list_head           mdbs;                 /*   552    16 */
              bool                       setup;                /*   568     1 */
      
              /* size: 576, cachelines: 9, members: 30 */
              /* sum members: 544, holes: 6, sum holes: 25 */
              /* padding: 7 */
      };
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      b08db33d
  2. 14 12月, 2021 1 次提交
    • V
      net: dsa: make tagging protocols connect to individual switches from a tree · 7f297314
      Vladimir Oltean 提交于
      On the NXP Bluebox 3 board which uses a multi-switch setup with sja1105,
      the mechanism through which the tagger connects to the switch tree is
      broken, due to improper DSA code design. At the time when tag_ops->connect()
      is called in dsa_port_parse_cpu(), DSA hasn't finished "touching" all
      the ports, so it doesn't know how large the tree is and how many ports
      it has. It has just seen the first CPU port by this time. As a result,
      this function will call the tagger's ->connect method too early, and the
      tagger will connect only to the first switch from the tree.
      
      This could be perhaps addressed a bit more simply by just moving the
      tag_ops->connect(dst) call a bit later (for example in dsa_tree_setup),
      but there is already a design inconsistency at present: on the switch
      side, the notification is on a per-switch basis, but on the tagger side,
      it is on a per-tree basis. Furthermore, the persistent storage itself is
      per switch (ds->tagger_data). And the tagger connect and disconnect
      procedures (at least the ones that exist currently) could see a fair bit
      of simplification if they didn't have to iterate through the switches of
      a tree.
      
      To fix the issue, this change transforms tag_ops->connect(dst) into
      tag_ops->connect(ds) and moves it somewhere where we already iterate
      over all switches of a tree. That is in dsa_switch_setup_tag_protocol(),
      which is a good placement because we already have there the connection
      call to the switch side of things.
      
      As for the dsa_tree_bind_tag_proto() method (called from the code path
      that changes the tag protocol), things are a bit more complicated
      because we receive the tree as argument, yet when we unwind on errors,
      it would be nice to not call tag_ops->disconnect(ds) where we didn't
      previously call tag_ops->connect(ds). We didn't have this problem before
      because the tag_ops connection operations passed the entire dst before,
      and this is more fine grained now. To solve the error rewind case using
      the new API, we have to create yet one more cross-chip notifier for
      disconnection, and stay connected with the old tag protocol to all the
      switches in the tree until we've succeeded to connect with the new one
      as well. So if something fails half way, the whole tree is still
      connected to the old tagger. But there may still be leaks if the tagger
      fails to connect to the 2nd out of 3 switches in a tree: somebody needs
      to tell the tagger to disconnect from the first switch. Nothing comes
      for free, and this was previously handled privately by the tagging
      protocol driver before, but now we need to emit a disconnect cross-chip
      notifier for that, because DSA has to take care of the unwind path. We
      assume that the tagging protocol has connected to a switch if it has set
      ds->tagger_data to something, otherwise we avoid calling its
      disconnection method in the error rewind path.
      
      The rest of the changes are in the tagging protocol drivers, and have to
      do with the replacement of dst with ds. The iteration is removed and the
      error unwind path is simplified, as mentioned above.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7f297314
  3. 12 12月, 2021 2 次提交
    • V
      net: dsa: remove dp->priv · 4f3cb343
      Vladimir Oltean 提交于
      All current in-tree uses of dp->priv have been replaced with
      ds->tagger_data, which provides for a safer API especially when the
      connection isn't the regular 1:1 link between one switch driver and one
      tagging protocol driver, but could be either one switch to many taggers,
      or many switches to one tagger.
      
      Therefore, we can remove this unused pointer.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      4f3cb343
    • V
      net: dsa: introduce tagger-owned storage for private and shared data · dc452a47
      Vladimir Oltean 提交于
      Ansuel is working on register access over Ethernet for the qca8k switch
      family. This requires the qca8k tagging protocol driver to receive
      frames which aren't intended for the network stack, but instead for the
      qca8k switch driver itself.
      
      The dp->priv is currently the prevailing method for passing data back
      and forth between the tagging protocol driver and the switch driver.
      However, this method is riddled with caveats.
      
      The DSA design allows in principle for any switch driver to return any
      protocol it desires in ->get_tag_protocol(). The dsa_loop driver can be
      modified to do just that. But in the current design, the memory behind
      dp->priv has to be allocated by the switch driver, so if the tagging
      protocol is paired to an unexpected switch driver, we may end up in NULL
      pointer dereferences inside the kernel, or worse (a switch driver may
      allocate dp->priv according to the expectations of a different tagger).
      
      The latter possibility is even more plausible considering that DSA
      switches can dynamically change tagging protocols in certain cases
      (dsa <-> edsa, ocelot <-> ocelot-8021q), and the current design lends
      itself to mistakes that are all too easy to make.
      
      This patch proposes that the tagging protocol driver should manage its
      own memory, instead of relying on the switch driver to do so.
      After analyzing the different in-tree needs, it can be observed that the
      required tagger storage is per switch, therefore a ds->tagger_data
      pointer is introduced. In principle, per-port storage could also be
      introduced, although there is no need for it at the moment. Future
      changes will replace the current usage of dp->priv with ds->tagger_data.
      
      We define a "binding" event between the DSA switch tree and the tagging
      protocol. During this binding event, the tagging protocol's ->connect()
      method is called first, and this may allocate some memory for each
      switch of the tree. Then a cross-chip notifier is emitted for the
      switches within that tree, and they are given the opportunity to fix up
      the tagger's memory (for example, they might set up some function
      pointers that represent virtual methods for consuming packets).
      Because the memory is owned by the tagger, there exists a ->disconnect()
      method for the tagger (which is the place to free the resources), but
      there doesn't exist a ->disconnect() method for the switch driver.
      This is part of the design. The switch driver should make minimal use of
      the public part of the tagger data, and only after type-checking it
      using the supplied "proto" argument.
      
      In the code there are in fact two binding events, one is the initial
      event in dsa_switch_setup_tag_protocol(). At this stage, the cross chip
      notifier chains aren't initialized, so we call each switch's connect()
      method by hand. Then there is dsa_tree_bind_tag_proto() during
      dsa_tree_change_tag_proto(), and here we have an old protocol and a new
      one. We first connect to the new one before disconnecting from the old
      one, to simplify error handling a bit and to ensure we remain in a valid
      state at all times.
      Co-developed-by: NAnsuel Smith <ansuelsmth@gmail.com>
      Signed-off-by: NAnsuel Smith <ansuelsmth@gmail.com>
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      dc452a47
  4. 09 12月, 2021 7 次提交
    • V
      net: dsa: eliminate dsa_switch_ops :: port_bridge_tx_fwd_{,un}offload · 857fdd74
      Vladimir Oltean 提交于
      We don't really need new switch API for these, and with new switches
      which intend to add support for this feature, it will become cumbersome
      to maintain.
      
      The change consists in restructuring the two drivers that implement this
      offload (sja1105 and mv88e6xxx) such that the offload is enabled and
      disabled from the ->port_bridge_{join,leave} methods instead of the old
      ->port_bridge_tx_fwd_{,un}offload.
      
      The only non-trivial change is that mv88e6xxx_map_virtual_bridge_to_pvt()
      has been moved to avoid a forward declaration, and the
      mv88e6xxx_reg_lock() calls from inside it have been removed, since
      locking is now done from mv88e6xxx_port_bridge_{join,leave}.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NAlvin Šipraga <alsi@bang-olufsen.dk>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      857fdd74
    • V
      net: dsa: add a "tx_fwd_offload" argument to ->port_bridge_join · b079922b
      Vladimir Oltean 提交于
      This is a preparation patch for the removal of the DSA switch methods
      ->port_bridge_tx_fwd_offload() and ->port_bridge_tx_fwd_unoffload().
      The plan is for the switch to report whether it offloads TX forwarding
      directly as a response to the ->port_bridge_join() method.
      
      This change deals with the noisy portion of converting all existing
      function prototypes to take this new boolean pointer argument.
      The bool is placed in the cross-chip notifier structure for bridge join,
      and a reference to it is provided to drivers. In the next change, DSA
      will then actually look at this value instead of calling
      ->port_bridge_tx_fwd_offload().
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NAlvin Šipraga <alsi@bang-olufsen.dk>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      b079922b
    • V
      net: dsa: keep the bridge_dev and bridge_num as part of the same structure · d3eed0e5
      Vladimir Oltean 提交于
      The main desire behind this is to provide coherent bridge information to
      the fast path without locking.
      
      For example, right now we set dp->bridge_dev and dp->bridge_num from
      separate code paths, it is theoretically possible for a packet
      transmission to read these two port properties consecutively and find a
      bridge number which does not correspond with the bridge device.
      
      Another desire is to start passing more complex bridge information to
      dsa_switch_ops functions. For example, with FDB isolation, it is
      expected that drivers will need to be passed the bridge which requested
      an FDB/MDB entry to be offloaded, and along with that bridge_dev, the
      associated bridge_num should be passed too, in case the driver might
      want to implement an isolation scheme based on that number.
      
      We already pass the {bridge_dev, bridge_num} pair to the TX forwarding
      offload switch API, however we'd like to remove that and squash it into
      the basic bridge join/leave API. So that means we need to pass this
      pair to the bridge join/leave API.
      
      During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we
      call the driver's .port_bridge_leave with what used to be our
      dp->bridge_dev, but provided as an argument.
      
      When bridge_dev and bridge_num get folded into a single structure, we
      need to preserve this behavior in dsa_port_bridge_leave: we need a copy
      of what used to be in dp->bridge.
      
      Switch drivers check bridge membership by comparing dp->bridge_dev with
      the provided bridge_dev, but now, if we provide the struct dsa_bridge as
      a pointer, they cannot keep comparing dp->bridge to the provided
      pointer, since this only points to an on-stack copy. To make this
      obvious and prevent driver writers from forgetting and doing stupid
      things, in this new API, the struct dsa_bridge is provided as a full
      structure (not very large, contains an int and a pointer) instead of a
      pointer. An explicit comparison function needs to be used to determine
      bridge membership: dsa_port_offloads_bridge().
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NAlvin Šipraga <alsi@bang-olufsen.dk>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      d3eed0e5
    • V
      net: dsa: export bridging offload helpers to drivers · 6a43cba3
      Vladimir Oltean 提交于
      Move the static inline helpers from net/dsa/dsa_priv.h to
      include/net/dsa.h, so that drivers can call functions such as
      dsa_port_offloads_bridge_dev(), which will be necessary after the
      transition to a more complex bridge structure.
      
      More functions than are needed right now are being moved, but this is
      done for uniformity.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NAlvin Šipraga <alsi@bang-olufsen.dk>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      6a43cba3
    • V
      net: dsa: hide dp->bridge_dev and dp->bridge_num in the core behind helpers · 36cbf39b
      Vladimir Oltean 提交于
      The location of the bridge device pointer and number is going to change.
      It is not going to be kept individually per port, but in a common
      structure allocated dynamically and which will have lockdep validation.
      
      Create helpers to access these elements so that we have a migration path
      to the new organization.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      36cbf39b
    • V
      net: dsa: assign a bridge number even without TX forwarding offload · 947c8746
      Vladimir Oltean 提交于
      The service where DSA assigns a unique bridge number for each forwarding
      domain is useful even for drivers which do not implement the TX
      forwarding offload feature.
      
      For example, drivers might use the dp->bridge_num for FDB isolation.
      
      So rename ds->num_fwd_offloading_bridges to ds->max_num_bridges, and
      calculate a unique bridge_num for all drivers that set this value.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NAlvin Šipraga <alsi@bang-olufsen.dk>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      947c8746
    • V
      net: dsa: make dp->bridge_num one-based · 3f9bb030
      Vladimir Oltean 提交于
      I have seen too many bugs already due to the fact that we must encode an
      invalid dp->bridge_num as a negative value, because the natural tendency
      is to check that invalid value using (!dp->bridge_num). Latest example
      can be seen in commit 1bec0f05 ("net: dsa: fix bridge_num not
      getting cleared after ports leaving the bridge").
      
      Convert the existing users to assume that dp->bridge_num == 0 is the
      encoding for invalid, and valid bridge numbers start from 1.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NAlvin Šipraga <alsi@bang-olufsen.dk>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      3f9bb030
  5. 02 12月, 2021 1 次提交
  6. 01 11月, 2021 1 次提交
  7. 25 10月, 2021 2 次提交
    • V
      net: dsa: introduce locking for the address lists on CPU and DSA ports · 338a3a47
      Vladimir Oltean 提交于
      Now that the rtnl_mutex is going away for dsa_port_{host_,}fdb_{add,del},
      no one is serializing access to the address lists that DSA keeps for the
      purpose of reference counting on shared ports (CPU and cascade ports).
      
      It can happen for one dsa_switch_do_fdb_del to do list_del on a dp->fdbs
      element while another dsa_switch_do_fdb_{add,del} is traversing dp->fdbs.
      We need to avoid that.
      
      Currently dp->mdbs is not at risk, because dsa_switch_do_mdb_{add,del}
      still runs under the rtnl_mutex. But it would be nice if it would not
      depend on that being the case. So let's introduce a mutex per port (the
      address lists are per port too) and share it between dp->mdbs and
      dp->fdbs.
      
      The place where we put the locking is interesting. It could be tempting
      to put a DSA-level lock which still serializes calls to
      .port_fdb_{add,del}, but it would still not avoid concurrency with other
      driver code paths that are currently under rtnl_mutex (.port_fdb_dump,
      .port_fast_age). So it would add a very false sense of security (and
      adding a global switch-wide lock in DSA to resynchronize with the
      rtnl_lock is also counterproductive and hard).
      
      So the locking is intentionally done only where the dp->fdbs and dp->mdbs
      lists are traversed. That means, from a driver perspective, that
      .port_fdb_add will be called with the dp->addr_lists_lock mutex held on
      the CPU port, but not held on user ports. This is done so that driver
      writers are not encouraged to rely on any guarantee offered by
      dp->addr_lists_lock.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      338a3a47
    • D
      Revert "Merge branch 'dsa-rtnl'" · 2d7e73f0
      David S. Miller 提交于
      This reverts commit 965e6b26, reversing
      changes made to 4d98bb0d.
      2d7e73f0
  8. 24 10月, 2021 1 次提交
    • V
      net: dsa: introduce locking for the address lists on CPU and DSA ports · d3bd8924
      Vladimir Oltean 提交于
      Now that the rtnl_mutex is going away for dsa_port_{host_,}fdb_{add,del},
      no one is serializing access to the address lists that DSA keeps for the
      purpose of reference counting on shared ports (CPU and cascade ports).
      
      It can happen for one dsa_switch_do_fdb_del to do list_del on a dp->fdbs
      element while another dsa_switch_do_fdb_{add,del} is traversing dp->fdbs.
      We need to avoid that.
      
      Currently dp->mdbs is not at risk, because dsa_switch_do_mdb_{add,del}
      still runs under the rtnl_mutex. But it would be nice if it would not
      depend on that being the case. So let's introduce a mutex per port (the
      address lists are per port too) and share it between dp->mdbs and
      dp->fdbs.
      
      The place where we put the locking is interesting. It could be tempting
      to put a DSA-level lock which still serializes calls to
      .port_fdb_{add,del}, but it would still not avoid concurrency with other
      driver code paths that are currently under rtnl_mutex (.port_fdb_dump,
      .port_fast_age). So it would add a very false sense of security (and
      adding a global switch-wide lock in DSA to resynchronize with the
      rtnl_lock is also counterproductive and hard).
      
      So the locking is intentionally done only where the dp->fdbs and dp->mdbs
      lists are traversed. That means, from a driver perspective, that
      .port_fdb_add will be called with the dp->addr_lists_lock mutex held on
      the CPU port, but not held on user ports. This is done so that driver
      writers are not encouraged to rely on any guarantee offered by
      dp->addr_lists_lock.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      d3bd8924
  9. 21 10月, 2021 2 次提交
    • V
      net: dsa: remove the "dsa_to_port in a loop" antipattern from the core · d0004a02
      Vladimir Oltean 提交于
      Ever since Vivien's conversion of the ds->ports array into a dst->ports
      list, and the introduction of dsa_to_port, iterations through the ports
      of a switch became quadratic whenever dsa_to_port was needed.
      
      dsa_to_port can either be called directly, or indirectly through the
      dsa_is_{user,cpu,dsa,unused}_port helpers.
      
      Use the newly introduced dsa_switch_for_each_port() iteration macro
      that works with the iterator variable being a struct dsa_port *dp
      directly, and not an int i. It is an expensive variable to go from i to
      dp, but cheap to go from dp to i.
      
      This macro iterates through the entire ds->dst->ports list and filters
      by the ports belonging just to the switch provided as argument.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      d0004a02
    • V
      net: dsa: introduce helpers for iterating through ports using dp · 82b31898
      Vladimir Oltean 提交于
      Since the DSA conversion from the ds->ports array into the dst->ports
      list, the DSA API has encouraged driver writers, as well as the core
      itself, to write inefficient code.
      
      Currently, code that wants to filter by a specific type of port when
      iterating, like {!unused, user, cpu, dsa}, uses the dsa_is_*_port helper.
      Under the hood, this uses dsa_to_port which iterates again through
      dst->ports. But the driver iterates through the port list already, so
      the complexity is quadratic for the typical case of a single-switch
      tree.
      
      This patch introduces some iteration helpers where the iterator is
      already a struct dsa_port *dp, so that the other variant of the
      filtering functions, dsa_port_is_{unused,user,cpu_dsa}, can be used
      directly on the iterator. This eliminates the second lookup.
      
      These functions can be used both by the core and by drivers.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      82b31898
  10. 18 10月, 2021 2 次提交
  11. 19 9月, 2021 2 次提交
    • V
      net: dsa: tear down devlink port regions when tearing down the devlink port on error · fd292c18
      Vladimir Oltean 提交于
      Commit 86f8b1c0 ("net: dsa: Do not make user port errors fatal")
      decided it was fine to ignore errors on certain ports that fail to
      probe, and go on with the ports that do probe fine.
      
      Commit fb6ec87f ("net: dsa: Fix type was not set for devlink port")
      noticed that devlink_port_type_eth_set(dlp, dp->slave); does not get
      called, and devlink notices after a timeout of 3600 seconds and prints a
      WARN_ON. So it went ahead to unregister the devlink port. And because
      there exists an UNUSED port flavour, we actually re-register the devlink
      port as UNUSED.
      
      Commit 08156ba4 ("net: dsa: Add devlink port regions support to
      DSA") added devlink port regions, which are set up by the driver and not
      by DSA.
      
      When we trigger the devlink port deregistration and reregistration as
      unused, devlink now prints another WARN_ON, from here:
      
      devlink_port_unregister:
      	WARN_ON(!list_empty(&devlink_port->region_list));
      
      So the port still has regions, which makes sense, because they were set
      up by the driver, and the driver doesn't know we're unregistering the
      devlink port.
      
      Somebody needs to tear them down, and optionally (actually it would be
      nice, to be consistent) set them up again for the new devlink port.
      
      But DSA's layering stays in our way quite badly here.
      
      The options I've considered are:
      
      1. Introduce a function in devlink to just change a port's type and
         flavour. No dice, devlink keeps a lot of state, it really wants the
         port to not be registered when you set its parameters, so changing
         anything can only be done by destroying what we currently have and
         recreating it.
      
      2. Make DSA cache the parameters passed to dsa_devlink_port_region_create,
         and the region returned, keep those in a list, then when the devlink
         port unregister needs to take place, the existing devlink regions are
         destroyed by DSA, and we replay the creation of new regions using the
         cached parameters. Problem: mv88e6xxx keeps the region pointers in
         chip->ports[port].region, and these will remain stale after DSA frees
         them. There are many things DSA can do, but updating mv88e6xxx's
         private pointers is not one of them.
      
      3. Just let the driver do it (i.e. introduce a very specific method
         called ds->ops->port_reinit_as_unused, which unregisters its devlink
         port devlink regions, then the old devlink port, then registers the
         new one, then the devlink port regions for it). While it does work,
         as opposed to the others, it's pretty horrible from an API
         perspective and we can do better.
      
      4. Introduce a new pair of methods, ->port_setup and ->port_teardown,
         which in the case of mv88e6xxx must register and unregister the
         devlink port regions. Call these 2 methods when the port must be
         reinitialized as unused.
      
      Naturally, I went for the 4th approach.
      
      Fixes: 08156ba4 ("net: dsa: Add devlink port regions support to DSA")
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      fd292c18
    • V
      net: dsa: be compatible with masters which unregister on shutdown · 0650bf52
      Vladimir Oltean 提交于
      Lino reports that on his system with bcmgenet as DSA master and KSZ9897
      as a switch, rebooting or shutting down never works properly.
      
      What does the bcmgenet driver have special to trigger this, that other
      DSA masters do not? It has an implementation of ->shutdown which simply
      calls its ->remove implementation. Otherwise said, it unregisters its
      network interface on shutdown.
      
      This message can be seen in a loop, and it hangs the reboot process there:
      
      unregister_netdevice: waiting for eth0 to become free. Usage count = 3
      
      So why 3?
      
      A usage count of 1 is normal for a registered network interface, and any
      virtual interface which links itself as an upper of that will increment
      it via dev_hold. In the case of DSA, this is the call path:
      
      dsa_slave_create
      -> netdev_upper_dev_link
         -> __netdev_upper_dev_link
            -> __netdev_adjacent_dev_insert
               -> dev_hold
      
      So a DSA switch with 3 interfaces will result in a usage count elevated
      by two, and netdev_wait_allrefs will wait until they have gone away.
      
      Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
      delete themselves, but DSA cannot just vanish and go poof, at most it
      can unbind itself from the switch devices, but that must happen strictly
      earlier compared to when the DSA master unregisters its net_device, so
      reacting on the NETDEV_UNREGISTER event is way too late.
      
      It seems that it is a pretty established pattern to have a driver's
      ->shutdown hook redirect to its ->remove hook, so the same code is
      executed regardless of whether the driver is unbound from the device, or
      the system is just shutting down. As Florian puts it, it is quite a big
      hammer for bcmgenet to unregister its net_device during shutdown, but
      having a common code path with the driver unbind helps ensure it is well
      tested.
      
      So DSA, for better or for worse, has to live with that and engage in an
      arms race of implementing the ->shutdown hook too, from all individual
      drivers, and do something sane when paired with masters that unregister
      their net_device there. The only sane thing to do, of course, is to
      unlink from the master.
      
      However, complications arise really quickly.
      
      The pattern of redirecting ->shutdown to ->remove is not unique to
      bcmgenet or even to net_device drivers. In fact, SPI controllers do it
      too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
      and MDIO controllers do it too (this is something I have not researched
      too deeply, but even if this is not the case today, it is certainly
      plausible to happen in the future, and must be taken into consideration).
      
      Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
      insane implication is that for the exact same DSA switch device, we
      might have both ->shutdown and ->remove getting called.
      
      So we need to do something with that insane environment. The pattern
      I've come up with is "if this, then not that", so if either ->shutdown
      or ->remove gets called, we set the device's drvdata to NULL, and in the
      other hook, we check whether the drvdata is NULL and just do nothing.
      This is probably not necessary for platform devices, just for devices on
      buses, but I would really insist for consistency among drivers, because
      when code is copy-pasted, it is not always copy-pasted from the best
      sources.
      
      So depending on whether the DSA switch's ->remove or ->shutdown will get
      called first, we cannot really guarantee even for the same driver if
      rebooting will result in the same code path on all platforms. But
      nonetheless, we need to do something minimally reasonable on ->shutdown
      too to fix the bug. Of course, the ->remove will do more (a full
      teardown of the tree, with all data structures freed, and this is why
      the bug was not caught for so long). The new ->shutdown method is kept
      separate from dsa_unregister_switch not because we couldn't have
      unregistered the switch, but simply in the interest of doing something
      quick and to the point.
      
      The big question is: does the DSA switch's ->shutdown get called earlier
      than the DSA master's ->shutdown? If not, there is still a risk that we
      might still trigger the WARN_ON in unregister_netdevice that says we are
      attempting to unregister a net_device which has uppers. That's no good.
      Although the reference to the master net_device won't physically go away
      even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
      on it.
      
      The answer to that question lies in this comment above device_link_add:
      
       * A side effect of the link creation is re-ordering of dpm_list and the
       * devices_kset list by moving the consumer device and all devices depending
       * on it to the ends of these lists (that does not happen to devices that have
       * not been registered when this function is called).
      
      so the fact that DSA uses device_link_add towards its master is not
      exactly for nothing. device_shutdown() walks devices_kset from the back,
      so this is our guarantee that DSA's shutdown happens before the master's
      shutdown.
      
      Fixes: 2f1e8ea7 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
      Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/Reported-by: NLino Sanfilippo <LinoSanfilippo@gmx.de>
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Tested-by: NAndrew Lunn <andrew@lunn.ch>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      0650bf52
  12. 16 9月, 2021 1 次提交
    • V
      net: dsa: flush switchdev workqueue before tearing down CPU/DSA ports · a57d8c21
      Vladimir Oltean 提交于
      Sometimes when unbinding the mv88e6xxx driver on Turris MOX, these error
      messages appear:
      
      mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete be:79:b4:9e:9e:96 vid 1 from fdb: -2
      mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete be:79:b4:9e:9e:96 vid 0 from fdb: -2
      mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete d8:58:d7:00:ca:6d vid 100 from fdb: -2
      mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete d8:58:d7:00:ca:6d vid 1 from fdb: -2
      mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete d8:58:d7:00:ca:6d vid 0 from fdb: -2
      
      (and similarly for other ports)
      
      What happens is that DSA has a policy "even if there are bugs, let's at
      least not leak memory" and dsa_port_teardown() clears the dp->fdbs and
      dp->mdbs lists, which are supposed to be empty.
      
      But deleting that cleanup code, the warnings go away.
      
      => the FDB and MDB lists (used for refcounting on shared ports, aka CPU
      and DSA ports) will eventually be empty, but are not empty by the time
      we tear down those ports. Aka we are deleting them too soon.
      
      The addresses that DSA complains about are host-trapped addresses: the
      local addresses of the ports, and the MAC address of the bridge device.
      
      The problem is that offloading those entries happens from a deferred
      work item scheduled by the SWITCHDEV_FDB_DEL_TO_DEVICE handler, and this
      races with the teardown of the CPU and DSA ports where the refcounting
      is kept.
      
      In fact, not only it races, but fundamentally speaking, if we iterate
      through the port list linearly, we might end up tearing down the shared
      ports even before we delete a DSA user port which has a bridge upper.
      
      So as it turns out, we need to first tear down the user ports (and the
      unused ones, for no better place of doing that), then the shared ports
      (the CPU and DSA ports). In between, we need to ensure that all work
      items scheduled by our switchdev handlers (which only run for user
      ports, hence the reason why we tear them down first) have finished.
      
      Fixes: 161ca59d ("net: dsa: reference count the MDB entries at the cross-chip notifier level")
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Link: https://lore.kernel.org/r/20210914134726.2305133-1-vladimir.oltean@nxp.comSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      a57d8c21
  13. 24 8月, 2021 1 次提交
    • V
      net: dsa: let drivers state that they need VLAN filtering while standalone · 58adf9dc
      Vladimir Oltean 提交于
      As explained in commit e358bef7 ("net: dsa: Give drivers the chance
      to veto certain upper devices"), the hellcreek driver uses some tricks
      to comply with the network stack expectations: it enforces port
      separation in standalone mode using VLANs. For untagged traffic,
      bridging between ports is prevented by using different PVIDs, and for
      VLAN-tagged traffic, it never accepts 8021q uppers with the same VID on
      two ports, so packets with one VLAN cannot leak from one port to another.
      
      That is almost fine*, and has worked because hellcreek relied on an
      implicit behavior of the DSA core that was changed by the previous
      patch: the standalone ports declare the 'rx-vlan-filter' feature as 'on
      [fixed]'. Since most of the DSA drivers are actually VLAN-unaware in
      standalone mode, that feature was actually incorrectly reflecting the
      hardware/driver state, so there was a desire to fix it. This leaves the
      hellcreek driver in a situation where it has to explicitly request this
      behavior from the DSA framework.
      
      We configure the ports as follows:
      
      - Standalone: 'rx-vlan-filter' is on. An 8021q upper on top of a
        standalone hellcreek port will go through dsa_slave_vlan_rx_add_vid
        and will add a VLAN to the hardware tables, giving the driver the
        opportunity to refuse it through .port_prechangeupper.
      
      - Bridged with vlan_filtering=0: 'rx-vlan-filter' is off. An 8021q upper
        on top of a bridged hellcreek port will not go through
        dsa_slave_vlan_rx_add_vid, because there will not be any attempt to
        offload this VLAN. The driver already disables VLAN awareness, so that
        upper should receive the traffic it needs.
      
      - Bridged with vlan_filtering=1: 'rx-vlan-filter' is on. An 8021q upper
        on top of a bridged hellcreek port will call dsa_slave_vlan_rx_add_vid,
        and can again be vetoed through .port_prechangeupper.
      
      *It is not actually completely fine, because if I follow through
      correctly, we can have the following situation:
      
      ip link add br0 type bridge vlan_filtering 0
      ip link set lan0 master br0 # lan0 now becomes VLAN-unaware
      ip link set lan0 nomaster # lan0 fails to become VLAN-aware again, therefore breaking isolation
      
      This patch fixes that corner case by extending the DSA core logic, based
      on this requested attribute, to change the VLAN awareness state of the
      switch (port) when it leaves the bridge.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Acked-by: NKurt Kanzenbach <kurt@linutronix.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      58adf9dc
  14. 23 8月, 2021 1 次提交
    • V
      net: dsa: track unique bridge numbers across all DSA switch trees · f5e165e7
      Vladimir Oltean 提交于
      Right now, cross-tree bridging setups work somewhat by mistake.
      
      In the case of cross-tree bridging with sja1105, all switch instances
      need to agree upon a common VLAN ID for forwarding a packet that belongs
      to a certain bridging domain.
      
      With TX forwarding offload, the VLAN ID is the bridge VLAN for
      VLAN-aware bridging, and the tag_8021q TX forwarding offload VID
      (a VLAN which has non-zero VBID bits) for VLAN-unaware bridging.
      
      The VBID for VLAN-unaware bridging is derived from the dp->bridge_num
      value calculated by DSA independently for each switch tree.
      
      If ports from one tree join one bridge, and ports from another tree join
      another bridge, DSA will assign them the same bridge_num, even though
      the bridges are different. If cross-tree bridging is supported, this
      is an issue.
      
      Modify DSA to calculate the bridge_num globally across all switch trees.
      This has the implication for a driver that the dp->bridge_num value that
      DSA will assign to its ports might not be contiguous, if there are
      boards with multiple DSA drivers instantiated. Additionally, all
      bridge_num values eat up towards each switch's
      ds->num_fwd_offloading_bridges maximum, which is potentially unfortunate,
      and can be seen as a limitation introduced by this patch. However, that
      is the lesser evil for now.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      f5e165e7
  15. 09 8月, 2021 2 次提交
    • V
      net: dsa: sja1105: rely on DSA core tracking of port learning state · 5313a37b
      Vladimir Oltean 提交于
      Now that DSA keeps track of the port learning state, it becomes
      superfluous to keep an additional variable with this information in the
      sja1105 driver. Remove it.
      
      The DSA core's learning state is present in struct dsa_port *dp.
      To avoid the antipattern where we iterate through a DSA switch's
      ports and then call dsa_to_port to obtain the "dp" reference (which is
      bad because dsa_to_port iterates through the DSA switch tree once
      again), just iterate through the dst->ports and operate on those
      directly.
      
      The sja1105 had an extra use of priv->learn_ena on non-user ports. DSA
      does not touch the learning state of those ports - drivers are free to
      do what they wish on them. Mark that information with a comment in
      struct dsa_port and let sja1105 set dp->learning for cascade ports.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      5313a37b
    • V
      net: dsa: centralize fast ageing when address learning is turned off · 045c45d1
      Vladimir Oltean 提交于
      Currently DSA leaves it down to device drivers to fast age the FDB on a
      port when address learning is disabled on it. There are 2 reasons for
      doing that in the first place:
      
      - when address learning is disabled by user space, through
        IFLA_BRPORT_LEARNING or the brport_attr_learning sysfs, what user
        space typically wants to achieve is to operate in a mode with no
        dynamic FDB entry on that port. But if the port is already up, some
        addresses might have been already learned on it, and it seems silly to
        wait for 5 minutes for them to expire until something useful can be
        done.
      
      - when a port leaves a bridge and becomes standalone, DSA turns off
        address learning on it. This also has the nice side effect of flushing
        the dynamically learned bridge FDB entries on it, which is a good idea
        because standalone ports should not have bridge FDB entries on them.
      
      We let drivers manage fast ageing under this condition because if DSA
      were to do it, it would need to track each port's learning state, and
      act upon the transition, which it currently doesn't.
      
      But there are 2 reasons why doing it is better after all:
      
      - drivers might get it wrong and not do it (see b53_port_set_learning)
      
      - we would like to flush the dynamic entries from the software bridge
        too, and letting drivers do that would be another pain point
      
      So track the port learning state and trigger a fast age process
      automatically within DSA.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      045c45d1
  16. 06 8月, 2021 1 次提交
    • V
      net: dsa: don't disable multicast flooding to the CPU even without an IGMP querier · c73c5708
      Vladimir Oltean 提交于
      Commit 08cc83cc ("net: dsa: add support for BRIDGE_MROUTER
      attribute") added an option for users to turn off multicast flooding
      towards the CPU if they turn off the IGMP querier on a bridge which
      already has enslaved ports (echo 0 > /sys/class/net/br0/bridge/multicast_router).
      
      And commit a8b659e7 ("net: dsa: act as passthrough for bridge port flags")
      simply papered over that issue, because it moved the decision to flood
      the CPU with multicast (or not) from the DSA core down to individual drivers,
      instead of taking a more radical position then.
      
      The truth is that disabling multicast flooding to the CPU is simply
      something we are not prepared to do now, if at all. Some reasons:
      
      - ICMP6 neighbor solicitation messages are unregistered multicast
        packets as far as the bridge is concerned. So if we stop flooding
        multicast, the outside world cannot ping the bridge device's IPv6
        link-local address.
      
      - There might be foreign interfaces bridged with our DSA switch ports
        (sending a packet towards the host does not necessarily equal
        termination, but maybe software forwarding). So if there is no one
        interested in that multicast traffic in the local network stack, that
        doesn't mean nobody is.
      
      - PTP over L4 (IPv4, IPv6) is multicast, but is unregistered as far as
        the bridge is concerned. This should reach the CPU port.
      
      - The switch driver might not do FDB partitioning. And since we don't
        even bother to do more fine-grained flood disabling (such as "disable
        flooding _from_port_N_ towards the CPU port" as opposed to "disable
        flooding _from_any_port_ towards the CPU port"), this breaks standalone
        ports, or even multiple bridges where one has an IGMP querier and one
        doesn't.
      
      Reverting the logic makes all of the above work.
      
      Fixes: a8b659e7 ("net: dsa: act as passthrough for bridge port flags")
      Fixes: 08cc83cc ("net: dsa: add support for BRIDGE_MROUTER attribute")
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      c73c5708
  17. 02 8月, 2021 1 次提交
  18. 28 7月, 2021 1 次提交
    • A
      dev_ioctl: split out ndo_eth_ioctl · a7605370
      Arnd Bergmann 提交于
      Most users of ndo_do_ioctl are ethernet drivers that implement
      the MII commands SIOCGMIIPHY/SIOCGMIIREG/SIOCSMIIREG, or hardware
      timestamping with SIOCSHWTSTAMP/SIOCGHWTSTAMP.
      
      Separate these from the few drivers that use ndo_do_ioctl to
      implement SIOCBOND, SIOCBR and SIOCWANDEV commands.
      
      This is a purely cosmetic change intended to help readers find
      their way through the implementation.
      
      Cc: Doug Ledford <dledford@redhat.com>
      Cc: Jason Gunthorpe <jgg@ziepe.ca>
      Cc: Jay Vosburgh <j.vosburgh@gmail.com>
      Cc: Veaceslav Falico <vfalico@gmail.com>
      Cc: Andy Gospodarek <andy@greyhouse.net>
      Cc: Andrew Lunn <andrew@lunn.ch>
      Cc: Vivien Didelot <vivien.didelot@gmail.com>
      Cc: Florian Fainelli <f.fainelli@gmail.com>
      Cc: Vladimir Oltean <olteanv@gmail.com>
      Cc: Leon Romanovsky <leon@kernel.org>
      Cc: linux-rdma@vger.kernel.org
      Signed-off-by: NArnd Bergmann <arnd@arndb.de>
      Acked-by: NJason Gunthorpe <jgg@nvidia.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      a7605370
  19. 27 7月, 2021 1 次提交
  20. 23 7月, 2021 2 次提交
    • V
      net: dsa: add support for bridge TX forwarding offload · 123abc06
      Vladimir Oltean 提交于
      For a DSA switch, to offload the forwarding process of a bridge device
      means to send the packets coming from the software bridge as data plane
      packets. This is contrary to everything that DSA has done so far,
      because the current taggers only know to send control packets (ones that
      target a specific destination port), whereas data plane packets are
      supposed to be forwarded according to the FDB lookup, much like packets
      ingressing on any regular ingress port. If the FDB lookup process
      returns multiple destination ports (flooding, multicast), then
      replication is also handled by the switch hardware - the bridge only
      sends a single packet and avoids the skb_clone().
      
      DSA keeps for each bridge port a zero-based index (the number of the
      bridge). Multiple ports performing TX forwarding offload to the same
      bridge have the same dp->bridge_num value, and ports not offloading the
      TX data plane of a bridge have dp->bridge_num = -1.
      
      The tagger can check if the packet that is being transmitted on has
      skb->offload_fwd_mark = true or not. If it does, it can be sure that the
      packet belongs to the data plane of a bridge, further information about
      which can be obtained based on dp->bridge_dev and dp->bridge_num.
      It can then compose a DSA tag for injecting a data plane packet into
      that bridge number.
      
      For the switch driver side, we offer two new dsa_switch_ops methods,
      called .port_bridge_fwd_offload_{add,del}, which are modeled after
      .port_bridge_{join,leave}.
      These methods are provided in case the driver needs to configure the
      hardware to treat packets coming from that bridge software interface as
      data plane packets. The switchdev <-> bridge interaction happens during
      the netdev_master_upper_dev_link() call, so to switch drivers, the
      effect is that the .port_bridge_fwd_offload_add() method is called
      immediately after .port_bridge_join().
      
      If the bridge number exceeds the number of bridges for which the switch
      driver can offload the TX data plane (and this includes the case where
      the driver can offload none), DSA falls back to simply returning
      tx_fwd_offload = false in the switchdev_bridge_port_offload() call.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      123abc06
    • V
      net: dsa: track the number of switches in a tree · 5b22d366
      Vladimir Oltean 提交于
      In preparation of supporting data plane forwarding on behalf of a
      software bridge, some drivers might need to view bridges as virtual
      switches behind the CPU port in a cross-chip topology.
      
      Give them some help and let them know how many physical switches there
      are in the tree, so that they can count the virtual switches starting
      from that number on.
      
      Note that the first dsa_switch_ops method where this information is
      reliably available is .setup(). This is because of how DSA works:
      in a tree with 3 switches, each calling dsa_register_switch(), the first
      2 will advance until dsa_tree_setup() -> dsa_tree_setup_routing_table()
      and exit with error code 0 because the topology is not complete. Since
      probing is parallel at this point, one switch does not know about the
      existence of the other. Then the third switch comes, and for it,
      dsa_tree_setup_routing_table() returns complete = true. This switch goes
      ahead and calls dsa_tree_setup_switches() for everybody else, calling
      their .setup() methods too. This acts as the synchronization point.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      5b22d366
  21. 20 7月, 2021 2 次提交
    • V
      net: dsa: make tag_8021q operations part of the core · 5da11eb4
      Vladimir Oltean 提交于
      Make tag_8021q a more central element of DSA and move the 2 driver
      specific operations outside of struct dsa_8021q_context (which is
      supposed to hold dynamic data and not really constant function
      pointers).
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      5da11eb4
    • V
      net: dsa: let the core manage the tag_8021q context · d7b1fd52
      Vladimir Oltean 提交于
      The basic problem description is as follows:
      
      Be there 3 switches in a daisy chain topology:
      
                                                   |
          sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
       [  user ] [  user ] [  user ] [  dsa  ] [  cpu  ]
                                         |
                                         +---------+
                                                   |
          sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
       [  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
                                         |
                                         +---------+
                                                   |
          sw2p0     sw2p1     sw2p2     sw2p3     sw2p4
       [  user ] [  user ] [  user ] [  user ] [  dsa  ]
      
      The CPU will not be able to ping through the user ports of the
      bottom-most switch (like for example sw2p0), simply because tag_8021q
      was not coded up for this scenario - it has always assumed DSA switch
      trees with a single switch.
      
      To add support for the topology above, we must admit that the RX VLAN of
      sw2p0 must be added on some ports of switches 0 and 1 as well. This is
      in fact a textbook example of thing that can use the cross-chip notifier
      framework that DSA has set up in switch.c.
      
      There is only one problem: core DSA (switch.c) is not able right now to
      make the connection between a struct dsa_switch *ds and a struct
      dsa_8021q_context *ctx. Right now, it is drivers who call into
      tag_8021q.c and always provide a struct dsa_8021q_context *ctx pointer,
      and tag_8021q.c calls them back with the .tag_8021q_vlan_{add,del}
      methods.
      
      But with cross-chip notifiers, it is possible for tag_8021q to call
      drivers without drivers having ever asked for anything. A good example
      is right above: when sw2p0 wants to set itself up for tag_8021q,
      the .tag_8021q_vlan_add method needs to be called for switches 1 and 0,
      so that they transport sw2p0's VLANs towards the CPU without dropping
      them.
      
      So instead of letting drivers manage the tag_8021q context, add a
      tag_8021q_ctx pointer inside of struct dsa_switch, which will be
      populated when dsa_tag_8021q_register() returns success.
      
      The patch is fairly long-winded because we are partly reverting commit
      5899ee36 ("net: dsa: tag_8021q: add a context structure") which made
      the driver-facing tag_8021q API use "ctx" instead of "ds". Now that we
      can access "ctx" directly from "ds", this is no longer needed.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      d7b1fd52