1. 10 11月, 2021 1 次提交
  2. 24 10月, 2021 1 次提交
    • S
      net: convert users of bitmap_foo() to linkmode_foo() · 4973056c
      Sean Anderson 提交于
      This converts instances of
      	bitmap_foo(args..., __ETHTOOL_LINK_MODE_MASK_NBITS)
      to
      	linkmode_foo(args...)
      
      I manually fixed up some lines to prevent them from being excessively
      long. Otherwise, this change was generated with the following semantic
      patch:
      
      // Generated with
      // echo linux/linkmode.h > includes
      // git grep -Flf includes include/ | cut -f 2- -d / | cat includes - \
      // | sort | uniq | tee new_includes | wc -l && mv new_includes includes
      // and repeating until the number stopped going up
      @i@
      @@
      
      (
       #include <linux/acpi_mdio.h>
      |
       #include <linux/brcmphy.h>
      |
       #include <linux/dsa/loop.h>
      |
       #include <linux/dsa/sja1105.h>
      |
       #include <linux/ethtool.h>
      |
       #include <linux/ethtool_netlink.h>
      |
       #include <linux/fec.h>
      |
       #include <linux/fs_enet_pd.h>
      |
       #include <linux/fsl/enetc_mdio.h>
      |
       #include <linux/fwnode_mdio.h>
      |
       #include <linux/linkmode.h>
      |
       #include <linux/lsm_audit.h>
      |
       #include <linux/mdio-bitbang.h>
      |
       #include <linux/mdio.h>
      |
       #include <linux/mdio-mux.h>
      |
       #include <linux/mii.h>
      |
       #include <linux/mii_timestamper.h>
      |
       #include <linux/mlx5/accel.h>
      |
       #include <linux/mlx5/cq.h>
      |
       #include <linux/mlx5/device.h>
      |
       #include <linux/mlx5/driver.h>
      |
       #include <linux/mlx5/eswitch.h>
      |
       #include <linux/mlx5/fs.h>
      |
       #include <linux/mlx5/port.h>
      |
       #include <linux/mlx5/qp.h>
      |
       #include <linux/mlx5/rsc_dump.h>
      |
       #include <linux/mlx5/transobj.h>
      |
       #include <linux/mlx5/vport.h>
      |
       #include <linux/of_mdio.h>
      |
       #include <linux/of_net.h>
      |
       #include <linux/pcs-lynx.h>
      |
       #include <linux/pcs/pcs-xpcs.h>
      |
       #include <linux/phy.h>
      |
       #include <linux/phy_led_triggers.h>
      |
       #include <linux/phylink.h>
      |
       #include <linux/platform_data/bcmgenet.h>
      |
       #include <linux/platform_data/xilinx-ll-temac.h>
      |
       #include <linux/pxa168_eth.h>
      |
       #include <linux/qed/qed_eth_if.h>
      |
       #include <linux/qed/qed_fcoe_if.h>
      |
       #include <linux/qed/qed_if.h>
      |
       #include <linux/qed/qed_iov_if.h>
      |
       #include <linux/qed/qed_iscsi_if.h>
      |
       #include <linux/qed/qed_ll2_if.h>
      |
       #include <linux/qed/qed_nvmetcp_if.h>
      |
       #include <linux/qed/qed_rdma_if.h>
      |
       #include <linux/sfp.h>
      |
       #include <linux/sh_eth.h>
      |
       #include <linux/smsc911x.h>
      |
       #include <linux/soc/nxp/lpc32xx-misc.h>
      |
       #include <linux/stmmac.h>
      |
       #include <linux/sunrpc/svc_rdma.h>
      |
       #include <linux/sxgbe_platform.h>
      |
       #include <net/cfg80211.h>
      |
       #include <net/dsa.h>
      |
       #include <net/mac80211.h>
      |
       #include <net/selftests.h>
      |
       #include <rdma/ib_addr.h>
      |
       #include <rdma/ib_cache.h>
      |
       #include <rdma/ib_cm.h>
      |
       #include <rdma/ib_hdrs.h>
      |
       #include <rdma/ib_mad.h>
      |
       #include <rdma/ib_marshall.h>
      |
       #include <rdma/ib_pack.h>
      |
       #include <rdma/ib_pma.h>
      |
       #include <rdma/ib_sa.h>
      |
       #include <rdma/ib_smi.h>
      |
       #include <rdma/ib_umem.h>
      |
       #include <rdma/ib_umem_odp.h>
      |
       #include <rdma/ib_verbs.h>
      |
       #include <rdma/iw_cm.h>
      |
       #include <rdma/mr_pool.h>
      |
       #include <rdma/opa_addr.h>
      |
       #include <rdma/opa_port_info.h>
      |
       #include <rdma/opa_smi.h>
      |
       #include <rdma/opa_vnic.h>
      |
       #include <rdma/rdma_cm.h>
      |
       #include <rdma/rdma_cm_ib.h>
      |
       #include <rdma/rdmavt_cq.h>
      |
       #include <rdma/rdma_vt.h>
      |
       #include <rdma/rdmavt_qp.h>
      |
       #include <rdma/rw.h>
      |
       #include <rdma/tid_rdma_defs.h>
      |
       #include <rdma/uverbs_ioctl.h>
      |
       #include <rdma/uverbs_named_ioctl.h>
      |
       #include <rdma/uverbs_std_types.h>
      |
       #include <rdma/uverbs_types.h>
      |
       #include <soc/mscc/ocelot.h>
      |
       #include <soc/mscc/ocelot_ptp.h>
      |
       #include <soc/mscc/ocelot_vcap.h>
      |
       #include <trace/events/ib_mad.h>
      |
       #include <trace/events/rdma_core.h>
      |
       #include <trace/events/rdma.h>
      |
       #include <trace/events/rpcrdma.h>
      |
       #include <uapi/linux/ethtool.h>
      |
       #include <uapi/linux/ethtool_netlink.h>
      |
       #include <uapi/linux/mdio.h>
      |
       #include <uapi/linux/mii.h>
      )
      
      @depends on i@
      expression list args;
      @@
      
      (
      - bitmap_zero(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_zero(args)
      |
      - bitmap_copy(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_copy(args)
      |
      - bitmap_and(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_and(args)
      |
      - bitmap_or(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_or(args)
      |
      - bitmap_empty(args, ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_empty(args)
      |
      - bitmap_andnot(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_andnot(args)
      |
      - bitmap_equal(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_equal(args)
      |
      - bitmap_intersects(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_intersects(args)
      |
      - bitmap_subset(args, __ETHTOOL_LINK_MODE_MASK_NBITS)
      + linkmode_subset(args)
      )
      
      Add missing linux/mii.h include to mellanox. -DaveM
      Signed-off-by: NSean Anderson <sean.anderson@seco.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      4973056c
  3. 12 10月, 2021 1 次提交
  4. 09 10月, 2021 2 次提交
    • V
      net: dsa: mv88e6xxx: isolate the ATU databases of standalone and bridged ports · 5bded825
      Vladimir Oltean 提交于
      Similar to commit 6087175b ("net: dsa: mt7530: use independent VLAN
      learning on VLAN-unaware bridges"), software forwarding between an
      unoffloaded LAG port (a bonding interface with an unsupported policy)
      and a mv88e6xxx user port directly under a bridge is broken.
      
      We adopt the same strategy, which is to make the standalone ports not
      find any ATU entry learned on a bridge port.
      
      Theory: the mv88e6xxx ATU is looked up by FID and MAC address. There are
      as many FIDs as VIDs (4096). The FID is derived from the VID when
      possible (the VTU maps a VID to a FID), with a fallback to the port
      based default FID value when not (802.1Q Mode is disabled on the port,
      or the classified VID isn't present in the VTU).
      
      The mv88e6xxx driver makes the following use of FIDs and VIDs:
      
      - the port's DefaultVID (to which untagged & pvid-tagged packets get
        classified) is 0 and is absent from the VTU, so this kind of packets is
        processed in FID 0, the default FID assigned by mv88e6xxx_setup_port.
      
      - every time a bridge VLAN is created, mv88e6xxx_port_vlan_join() ->
        mv88e6xxx_atu_new() associates a FID with that VID which increases
        linearly starting from 1. Like this:
      
        bridge vlan add dev lan0 vid 100 # FID 1
        bridge vlan add dev lan1 vid 100 # still FID 1
        bridge vlan add dev lan2 vid 1024 # FID 2
      
      The FID allocation made by the driver is sub-optimal for the following
      reasons:
      
      (a) A standalone port has a DefaultPVID of 0 and a default FID of 0 too.
          A VLAN-unaware bridged port has a DefaultPVID of 0 and a default FID
          of 0 too. The difference is that the bridged ports may learn ATU
          entries, while the standalone port has the requirement that it must
          not, and must not find them either. Standalone ports must not use
          the same FID as ports belonging to a bridge. All standalone ports
          can use the same FID, since the ATU will never have an entry in
          that FID.
      
      (b) Multiple VLAN-unaware bridges will all use a DefaultPVID of 0 and a
          default FID of 0 on all their ports. The FDBs will not be isolated
          between these bridges. Every VLAN-unaware bridge must use the same
          FID on all its ports, different from the FID of other bridge ports.
      
      (c) Each bridge VLAN uses a unique FID which is useful for Independent
          VLAN Learning, but the same VLAN ID on multiple VLAN-aware bridges
          will result in the same FID being used by mv88e6xxx_atu_new().
          The correct behavior is for VLAN 1 in br0 to have a different FID
          compared to VLAN 1 in br1.
      
      This patch cannot fix all the above. Traditionally the DSA framework did
      not care about this, and the reality is that DSA core involvement is
      needed for the aforementioned issues to be solved. The only thing we can
      solve here is an issue which does not require API changes, and that is
      issue (a), aka use a different FID for standalone ports vs ports under
      VLAN-unaware bridges.
      
      The first step is deciding what VID and FID to use for standalone ports,
      and what VID and FID for bridged ports. The 0/0 pair for standalone
      ports is what they used up till now, let's keep using that. For bridged
      ports, there are 2 cases:
      
      - VLAN-aware ports will never end up using the port default FID, because
        packets will always be classified to a VID in the VTU or dropped
        otherwise. The FID is the one associated with the VID in the VTU.
      
      - On VLAN-unaware ports, we _could_ leave their DefaultVID (pvid) at
        zero (just as in the case of standalone ports), and just change the
        port's default FID from 0 to a different number (say 1).
      
      However, Tobias points out that there is one more requirement to cater to:
      cross-chip bridging. The Marvell DSA header does not carry the FID in
      it, only the VID. So once a packet crosses a DSA link, if it has a VID
      of zero it will get classified to the default FID of that cascade port.
      Relying on a port default FID for upstream cascade ports results in
      contradictions: a default FID of 0 breaks ATU isolation of bridged ports
      on the downstream switch, a default FID of 1 breaks standalone ports on
      the downstream switch.
      
      So not only must standalone ports have different FIDs compared to
      bridged ports, they must also have different DefaultVID values.
      IEEE 802.1Q defines two reserved VID values: 0 and 4095. So we simply
      choose 4095 as the DefaultVID of ports belonging to VLAN-unaware
      bridges, and VID 4095 maps to FID 1.
      
      For the xmit operation to look up the same ATU database, we need to put
      VID 4095 in DSA tags sent to ports belonging to VLAN-unaware bridges
      too. All shared ports are configured to map this VID to the bridging
      FID, because they are members of that VLAN in the VTU. Shared ports
      don't need to have 802.1QMode enabled in any way, they always parse the
      VID from the DSA header, they don't need to look at the 802.1Q header.
      
      We install VID 4095 to the VTU in mv88e6xxx_setup_port(), with the
      mention that mv88e6xxx_vtu_setup() which was located right below that
      call was flushing the VTU so those entries wouldn't be preserved.
      So we need to relocate the VTU flushing prior to the port initialization
      during ->setup(). Also note that this is why it is safe to assume that
      VID 4095 will get associated with FID 1: the user ports haven't been
      created, so there is no avenue for the user to create a bridge VLAN
      which could otherwise race with the creation of another FID which would
      otherwise use up the non-reserved FID value of 1.
      
      [ Currently mv88e6xxx_port_vlan_join() doesn't have the option of
        specifying a preferred FID, it always calls mv88e6xxx_atu_new(). ]
      
      mv88e6xxx_port_db_load_purge() is the function to access the ATU for
      FDB/MDB entries, and it used to determine the FID to use for
      VLAN-unaware FDB entries (VID=0) using mv88e6xxx_port_get_fid().
      But the driver only called mv88e6xxx_port_set_fid() once, during probe,
      so no surprises, the port FID was always 0, the call to get_fid() was
      redundant. As much as I would have wanted to not touch that code, the
      logic is broken when we add a new FID which is not the port-based
      default. Now the port-based default FID only corresponds to standalone
      ports, and FDB/MDB entries belong to the bridging service. So while in
      the future, when the DSA API will support FDB isolation, we will have to
      figure out the FID based on the bridge number, for now there's a single
      bridging FID, so hardcode that.
      
      Lastly, the tagger needs to check, when it is transmitting a VLAN
      untagged skb, whether it is sending it towards a bridged or a standalone
      port. When we see it is bridged we assume the bridge is VLAN-unaware.
      Not because it cannot be VLAN-aware but:
      
      - if we are transmitting from a VLAN-aware bridge we are likely doing so
        using TX forwarding offload. That code path guarantees that skbs have
        a vlan hwaccel tag in them, so we would not enter the "else" branch
        of the "if (skb->protocol == htons(ETH_P_8021Q))" condition.
      
      - if we are transmitting on behalf of a VLAN-aware bridge but with no TX
        forwarding offload (no PVT support, out of space in the PVT, whatever),
        we would indeed be transmitting with VLAN 4095 instead of the bridge
        device's pvid. However we would be injecting a "From CPU" frame, and
        the switch won't learn from that - it only learns from "Forward" frames.
        So it is inconsequential for address learning. And VLAN 4095 is
        absolutely enough for the frame to exit the switch, since we never
        remove that VLAN from any port.
      
      Fixes: 57e661aa ("net: dsa: mv88e6xxx: Link aggregation support")
      Reported-by: NTobias Waldekranz <tobias@waldekranz.com>
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      5bded825
    • V
      net: dsa: mv88e6xxx: keep the pvid at 0 when VLAN-unaware · 8b6836d8
      Vladimir Oltean 提交于
      The VLAN support in mv88e6xxx has a loaded history. Commit 2ea7a679
      ("net: dsa: Don't add vlans when vlan filtering is disabled") noticed
      some issues with VLAN and decided the best way to deal with them was to
      make the DSA core ignore VLANs added by the bridge while VLAN awareness
      is turned off. Those issues were never explained, just presented as
      "at least one corner case".
      
      That approach had problems of its own, presented by
      commit 54a0ed0d ("net: dsa: provide an option for drivers to always
      receive bridge VLANs") for the DSA core, followed by
      commit 1fb74191 ("net: dsa: mv88e6xxx: fix vlan setup") which
      applied ds->configure_vlan_while_not_filtering = true for mv88e6xxx in
      particular.
      
      We still don't know what corner case Andrew saw when he wrote
      commit 2ea7a679 ("net: dsa: Don't add vlans when vlan filtering is
      disabled"), but Tobias now reports that when we use TX forwarding
      offload, pinging an external station from the bridge device is broken if
      the front-facing DSA user port has flooding turned off. The full
      description is in the link below, but for short, when a mv88e6xxx port
      is under a VLAN-unaware bridge, it inherits that bridge's pvid.
      So packets ingressing a user port will be classified to e.g. VID 1
      (assuming that value for the bridge_default_pvid), whereas when
      tag_dsa.c xmits towards a user port, it always sends packets using a VID
      of 0 if that port is standalone or under a VLAN-unaware bridge - or at
      least it did so prior to commit d82f8ab0 ("net: dsa: tag_dsa:
      offload the bridge forwarding process").
      
      In any case, when there is a conversation between the CPU and a station
      connected to a user port, the station's MAC address is learned in VID 1
      but the CPU tries to transmit through VID 0. The packets reach the
      intended station, but via flooding and not by virtue of matching the
      existing ATU entry.
      
      DSA has established (and enforced in other drivers: sja1105, felix,
      mt7530) that a VLAN-unaware port should use a private pvid, and not
      inherit the one from the bridge. The bridge's pvid should only be
      inherited when that bridge is VLAN-aware, so all state transitions need
      to be handled. On the other hand, all bridge VLANs should sit in the VTU
      starting with the moment when the bridge offloads them via switchdev,
      they are just not used.
      
      This solves the problem that Tobias sees because packets ingressing on
      VLAN-unaware user ports now get classified to VID 0, which is also the
      VID used by tag_dsa.c on xmit.
      
      Fixes: d82f8ab0 ("net: dsa: tag_dsa: offload the bridge forwarding process")
      Link: https://patchwork.kernel.org/project/netdevbpf/patch/20211003222312.284175-2-vladimir.oltean@nxp.com/#24491503Reported-by: NTobias Waldekranz <tobias@waldekranz.com>
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NJakub Kicinski <kuba@kernel.org>
      8b6836d8
  5. 27 9月, 2021 3 次提交
  6. 19 9月, 2021 2 次提交
    • V
      net: dsa: tear down devlink port regions when tearing down the devlink port on error · fd292c18
      Vladimir Oltean 提交于
      Commit 86f8b1c0 ("net: dsa: Do not make user port errors fatal")
      decided it was fine to ignore errors on certain ports that fail to
      probe, and go on with the ports that do probe fine.
      
      Commit fb6ec87f ("net: dsa: Fix type was not set for devlink port")
      noticed that devlink_port_type_eth_set(dlp, dp->slave); does not get
      called, and devlink notices after a timeout of 3600 seconds and prints a
      WARN_ON. So it went ahead to unregister the devlink port. And because
      there exists an UNUSED port flavour, we actually re-register the devlink
      port as UNUSED.
      
      Commit 08156ba4 ("net: dsa: Add devlink port regions support to
      DSA") added devlink port regions, which are set up by the driver and not
      by DSA.
      
      When we trigger the devlink port deregistration and reregistration as
      unused, devlink now prints another WARN_ON, from here:
      
      devlink_port_unregister:
      	WARN_ON(!list_empty(&devlink_port->region_list));
      
      So the port still has regions, which makes sense, because they were set
      up by the driver, and the driver doesn't know we're unregistering the
      devlink port.
      
      Somebody needs to tear them down, and optionally (actually it would be
      nice, to be consistent) set them up again for the new devlink port.
      
      But DSA's layering stays in our way quite badly here.
      
      The options I've considered are:
      
      1. Introduce a function in devlink to just change a port's type and
         flavour. No dice, devlink keeps a lot of state, it really wants the
         port to not be registered when you set its parameters, so changing
         anything can only be done by destroying what we currently have and
         recreating it.
      
      2. Make DSA cache the parameters passed to dsa_devlink_port_region_create,
         and the region returned, keep those in a list, then when the devlink
         port unregister needs to take place, the existing devlink regions are
         destroyed by DSA, and we replay the creation of new regions using the
         cached parameters. Problem: mv88e6xxx keeps the region pointers in
         chip->ports[port].region, and these will remain stale after DSA frees
         them. There are many things DSA can do, but updating mv88e6xxx's
         private pointers is not one of them.
      
      3. Just let the driver do it (i.e. introduce a very specific method
         called ds->ops->port_reinit_as_unused, which unregisters its devlink
         port devlink regions, then the old devlink port, then registers the
         new one, then the devlink port regions for it). While it does work,
         as opposed to the others, it's pretty horrible from an API
         perspective and we can do better.
      
      4. Introduce a new pair of methods, ->port_setup and ->port_teardown,
         which in the case of mv88e6xxx must register and unregister the
         devlink port regions. Call these 2 methods when the port must be
         reinitialized as unused.
      
      Naturally, I went for the 4th approach.
      
      Fixes: 08156ba4 ("net: dsa: Add devlink port regions support to DSA")
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      fd292c18
    • V
      net: dsa: be compatible with masters which unregister on shutdown · 0650bf52
      Vladimir Oltean 提交于
      Lino reports that on his system with bcmgenet as DSA master and KSZ9897
      as a switch, rebooting or shutting down never works properly.
      
      What does the bcmgenet driver have special to trigger this, that other
      DSA masters do not? It has an implementation of ->shutdown which simply
      calls its ->remove implementation. Otherwise said, it unregisters its
      network interface on shutdown.
      
      This message can be seen in a loop, and it hangs the reboot process there:
      
      unregister_netdevice: waiting for eth0 to become free. Usage count = 3
      
      So why 3?
      
      A usage count of 1 is normal for a registered network interface, and any
      virtual interface which links itself as an upper of that will increment
      it via dev_hold. In the case of DSA, this is the call path:
      
      dsa_slave_create
      -> netdev_upper_dev_link
         -> __netdev_upper_dev_link
            -> __netdev_adjacent_dev_insert
               -> dev_hold
      
      So a DSA switch with 3 interfaces will result in a usage count elevated
      by two, and netdev_wait_allrefs will wait until they have gone away.
      
      Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and
      delete themselves, but DSA cannot just vanish and go poof, at most it
      can unbind itself from the switch devices, but that must happen strictly
      earlier compared to when the DSA master unregisters its net_device, so
      reacting on the NETDEV_UNREGISTER event is way too late.
      
      It seems that it is a pretty established pattern to have a driver's
      ->shutdown hook redirect to its ->remove hook, so the same code is
      executed regardless of whether the driver is unbound from the device, or
      the system is just shutting down. As Florian puts it, it is quite a big
      hammer for bcmgenet to unregister its net_device during shutdown, but
      having a common code path with the driver unbind helps ensure it is well
      tested.
      
      So DSA, for better or for worse, has to live with that and engage in an
      arms race of implementing the ->shutdown hook too, from all individual
      drivers, and do something sane when paired with masters that unregister
      their net_device there. The only sane thing to do, of course, is to
      unlink from the master.
      
      However, complications arise really quickly.
      
      The pattern of redirecting ->shutdown to ->remove is not unique to
      bcmgenet or even to net_device drivers. In fact, SPI controllers do it
      too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers
      and MDIO controllers do it too (this is something I have not researched
      too deeply, but even if this is not the case today, it is certainly
      plausible to happen in the future, and must be taken into consideration).
      
      Since DSA switches might be SPI devices, I2C devices, MDIO devices, the
      insane implication is that for the exact same DSA switch device, we
      might have both ->shutdown and ->remove getting called.
      
      So we need to do something with that insane environment. The pattern
      I've come up with is "if this, then not that", so if either ->shutdown
      or ->remove gets called, we set the device's drvdata to NULL, and in the
      other hook, we check whether the drvdata is NULL and just do nothing.
      This is probably not necessary for platform devices, just for devices on
      buses, but I would really insist for consistency among drivers, because
      when code is copy-pasted, it is not always copy-pasted from the best
      sources.
      
      So depending on whether the DSA switch's ->remove or ->shutdown will get
      called first, we cannot really guarantee even for the same driver if
      rebooting will result in the same code path on all platforms. But
      nonetheless, we need to do something minimally reasonable on ->shutdown
      too to fix the bug. Of course, the ->remove will do more (a full
      teardown of the tree, with all data structures freed, and this is why
      the bug was not caught for so long). The new ->shutdown method is kept
      separate from dsa_unregister_switch not because we couldn't have
      unregistered the switch, but simply in the interest of doing something
      quick and to the point.
      
      The big question is: does the DSA switch's ->shutdown get called earlier
      than the DSA master's ->shutdown? If not, there is still a risk that we
      might still trigger the WARN_ON in unregister_netdevice that says we are
      attempting to unregister a net_device which has uppers. That's no good.
      Although the reference to the master net_device won't physically go away
      even if DSA's ->shutdown comes afterwards, remember we have a dev_hold
      on it.
      
      The answer to that question lies in this comment above device_link_add:
      
       * A side effect of the link creation is re-ordering of dpm_list and the
       * devices_kset list by moving the consumer device and all devices depending
       * on it to the ends of these lists (that does not happen to devices that have
       * not been registered when this function is called).
      
      so the fact that DSA uses device_link_add towards its master is not
      exactly for nothing. device_shutdown() walks devices_kset from the back,
      so this is our guarantee that DSA's shutdown happens before the master's
      shutdown.
      
      Fixes: 2f1e8ea7 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings")
      Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/Reported-by: NLino Sanfilippo <LinoSanfilippo@gmx.de>
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Tested-by: NAndrew Lunn <andrew@lunn.ch>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      0650bf52
  7. 09 8月, 2021 1 次提交
    • V
      net: dsa: centralize fast ageing when address learning is turned off · 045c45d1
      Vladimir Oltean 提交于
      Currently DSA leaves it down to device drivers to fast age the FDB on a
      port when address learning is disabled on it. There are 2 reasons for
      doing that in the first place:
      
      - when address learning is disabled by user space, through
        IFLA_BRPORT_LEARNING or the brport_attr_learning sysfs, what user
        space typically wants to achieve is to operate in a mode with no
        dynamic FDB entry on that port. But if the port is already up, some
        addresses might have been already learned on it, and it seems silly to
        wait for 5 minutes for them to expire until something useful can be
        done.
      
      - when a port leaves a bridge and becomes standalone, DSA turns off
        address learning on it. This also has the nice side effect of flushing
        the dynamically learned bridge FDB entries on it, which is a good idea
        because standalone ports should not have bridge FDB entries on them.
      
      We let drivers manage fast ageing under this condition because if DSA
      were to do it, it would need to track each port's learning state, and
      act upon the transition, which it currently doesn't.
      
      But there are 2 reasons why doing it is better after all:
      
      - drivers might get it wrong and not do it (see b53_port_set_learning)
      
      - we would like to flush the dynamic entries from the software bridge
        too, and letting drivers do that would be another pain point
      
      So track the port learning state and trigger a fast age process
      automatically within DSA.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      045c45d1
  8. 06 8月, 2021 1 次提交
    • V
      net: dsa: don't disable multicast flooding to the CPU even without an IGMP querier · c73c5708
      Vladimir Oltean 提交于
      Commit 08cc83cc ("net: dsa: add support for BRIDGE_MROUTER
      attribute") added an option for users to turn off multicast flooding
      towards the CPU if they turn off the IGMP querier on a bridge which
      already has enslaved ports (echo 0 > /sys/class/net/br0/bridge/multicast_router).
      
      And commit a8b659e7 ("net: dsa: act as passthrough for bridge port flags")
      simply papered over that issue, because it moved the decision to flood
      the CPU with multicast (or not) from the DSA core down to individual drivers,
      instead of taking a more radical position then.
      
      The truth is that disabling multicast flooding to the CPU is simply
      something we are not prepared to do now, if at all. Some reasons:
      
      - ICMP6 neighbor solicitation messages are unregistered multicast
        packets as far as the bridge is concerned. So if we stop flooding
        multicast, the outside world cannot ping the bridge device's IPv6
        link-local address.
      
      - There might be foreign interfaces bridged with our DSA switch ports
        (sending a packet towards the host does not necessarily equal
        termination, but maybe software forwarding). So if there is no one
        interested in that multicast traffic in the local network stack, that
        doesn't mean nobody is.
      
      - PTP over L4 (IPv4, IPv6) is multicast, but is unregistered as far as
        the bridge is concerned. This should reach the CPU port.
      
      - The switch driver might not do FDB partitioning. And since we don't
        even bother to do more fine-grained flood disabling (such as "disable
        flooding _from_port_N_ towards the CPU port" as opposed to "disable
        flooding _from_any_port_ towards the CPU port"), this breaks standalone
        ports, or even multiple bridges where one has an IGMP querier and one
        doesn't.
      
      Reverting the logic makes all of the above work.
      
      Fixes: a8b659e7 ("net: dsa: act as passthrough for bridge port flags")
      Fixes: 08cc83cc ("net: dsa: add support for BRIDGE_MROUTER attribute")
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      c73c5708
  9. 24 7月, 2021 1 次提交
  10. 23 7月, 2021 1 次提交
    • V
      net: dsa: mv88e6xxx: map virtual bridges with forwarding offload in the PVT · ce5df689
      Vladimir Oltean 提交于
      The mv88e6xxx switches have the ability to receive FORWARD (data plane)
      frames from the CPU port and route them according to the FDB. We can use
      this to offload the forwarding process of packets sent by the software
      bridge.
      
      Because DSA supports bridge domain isolation between user ports, just
      sending FORWARD frames is not enough, as they might leak the intended
      broadcast domain of the bridge on behalf of which the packets are sent.
      
      It should be noted that FORWARD frames are also (and typically) used to
      forward data plane packets on DSA links in cross-chip topologies. The
      FORWARD frame header contains the source port and switch ID, and
      switches receiving this frame header forward the packet according to
      their cross-chip port-based VLAN table (PVT).
      
      To address the bridging domain isolation in the context of offloading
      the forwarding on TX, the idea is that we can reuse the parts of the PVT
      that don't have any physical switch mapped to them, one entry for each
      software bridge. The switches will therefore think that behind their
      upstream port lie many switches, all in fact backed up by software
      bridges through tag_dsa.c, which constructs FORWARD packets with the
      right switch ID corresponding to each bridge.
      
      The mapping we use is absolutely trivial: DSA gives us a unique bridge
      number, and we add the number of the physical switches in the DSA switch
      tree to that, to obtain a unique virtual bridge device number to use in
      the PVT.
      Co-developed-by: NTobias Waldekranz <tobias@waldekranz.com>
      Signed-off-by: NTobias Waldekranz <tobias@waldekranz.com>
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      ce5df689
  11. 02 7月, 2021 6 次提交
  12. 22 6月, 2021 1 次提交
    • E
      net: dsa: mv88e6xxx: Fix adding vlan 0 · b8b79c41
      Eldar Gasanov 提交于
      8021q module adds vlan 0 to all interfaces when it starts.
      When 8021q module is loaded it isn't possible to create bond
      with mv88e6xxx interfaces, bonding module dipslay error
      "Couldn't add bond vlan ids", because it tries to add vlan 0
      to slave interfaces.
      
      There is unexpected behavior in the switch. When a PVID
      is assigned to a port the switch changes VID to PVID
      in ingress frames with VID 0 on the port. Expected
      that the switch doesn't assign PVID to tagged frames
      with VID 0. But there isn't a way to change this behavior
      in the switch.
      
      Fixes: 57e661aa ("net: dsa: mv88e6xxx: Link aggregation support")
      Signed-off-by: NEldar Gasanov <eldargasanov2@gmail.com>
      Reviewed-by: NVladimir Oltean <olteanv@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      b8b79c41
  13. 22 4月, 2021 1 次提交
  14. 21 4月, 2021 3 次提交
  15. 13 4月, 2021 1 次提交
    • P
      net: phy: marvell: fix detection of PHY on Topaz switches · 1fe976d3
      Pali Rohár 提交于
      Since commit fee2d546 ("net: phy: marvell: mv88e6390 temperature
      sensor reading"), Linux reports the temperature of Topaz hwmon as
      constant -75°C.
      
      This is because switches from the Topaz family (88E6141 / 88E6341) have
      the address of the temperature sensor register different from Peridot.
      
      This address is instead compatible with 88E1510 PHYs, as was used for
      Topaz before the above mentioned commit.
      
      Create a new mapping table between switch family and PHY ID for families
      which don't have a model number. And define PHY IDs for Topaz and Peridot
      families.
      
      Create a new PHY ID and a new PHY driver for Topaz's internal PHY.
      The only difference from Peridot's PHY driver is the HWMON probing
      method.
      
      Prior this change Topaz's internal PHY is detected by kernel as:
      
        PHY [...] driver [Marvell 88E6390] (irq=63)
      
      And afterwards as:
      
        PHY [...] driver [Marvell 88E6341 Family] (irq=63)
      Signed-off-by: NPali Rohár <pali@kernel.org>
      BugLink: https://github.com/globalscaletechnologies/linux/issues/1
      Fixes: fee2d546 ("net: phy: marvell: mv88e6390 temperature sensor reading")
      Reviewed-by: NMarek Behún <kabel@kernel.org>
      Reviewed-by: NAndrew Lunn <andrew@lunn.ch>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      1fe976d3
  16. 19 3月, 2021 7 次提交
  17. 18 3月, 2021 4 次提交
  18. 15 2月, 2021 2 次提交
  19. 13 2月, 2021 1 次提交
    • V
      net: dsa: act as passthrough for bridge port flags · a8b659e7
      Vladimir Oltean 提交于
      There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
      expressed by the bridge through switchdev, and not all of them can be
      emulated by DSA mid-layer API at the same time.
      
      One possible configuration is when the bridge offloads the port flags
      using a mask that has a single bit set - therefore only one feature
      should change. However, DSA currently groups together unicast and
      multicast flooding in the .port_egress_floods method, which limits our
      options when we try to add support for turning off broadcast flooding:
      do we extend .port_egress_floods with a third parameter which b53 and
      mv88e6xxx will ignore? But that means that the DSA layer, which
      currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
      see that .port_egress_floods is implemented, and will report that all 3
      types of flooding are supported - not necessarily true.
      
      Another configuration is when the user specifies more than one flag at
      the same time, in the same netlink message. If we were to create one
      individual function per offloadable bridge port flag, we would limit the
      expressiveness of the switch driver of refusing certain combinations of
      flag values. For example, a switch may not have an explicit knob for
      flooding of unknown multicast, just for flooding in general. In that
      case, the only correct thing to do is to allow changes to BR_FLOOD and
      BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
      a separate .port_set_unicast_flood and .port_set_multicast_flood would
      not allow the driver to possibly reject that.
      
      Also, DSA doesn't consider it necessary to inform the driver that a
      SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
      just calls .port_egress_floods for the CPU port. When we'll add support
      for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
      problem because the flood settings will need to be held statefully in
      the DSA middle layer, otherwise changing the mrouter port attribute will
      impact the flooding attribute. And that's _assuming_ that the underlying
      hardware doesn't have anything else to do when a multicast router
      attaches to a port than flood unknown traffic to it.  If it does, there
      will need to be a dedicated .port_set_mrouter anyway.
      
      So we need to let the DSA drivers see the exact form that the bridge
      passes this switchdev attribute in, otherwise we are standing in the
      way. Therefore we also need to use this form of language when
      communicating to the driver that it needs to configure its initial
      (before bridge join) and final (after bridge leave) port flags.
      
      The b53 and mv88e6xxx drivers are converted to the passthrough API and
      their implementation of .port_egress_floods is split into two: a
      function that configures unicast flooding and another for multicast.
      The mv88e6xxx implementation is quite hairy, and it turns out that
      the implementations of unknown unicast flooding are actually the same
      for 6185 and for 6352:
      
      behind the confusing names actually lie two individual bits:
      NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
      NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
      
      so there was no reason to entangle them in the first place.
      
      Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
      PORT_CTL0, which has the exact same bit index. I have left the
      implementations separate though, for the only reason that the names are
      different enough to confuse me, since I am not able to double-check with
      a user manual. The multicast flooding setting for 6185 is in a different
      register than for 6352 though.
      Signed-off-by: NVladimir Oltean <vladimir.oltean@nxp.com>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      a8b659e7