From 9da712d2ede7e3e3a0da180351505310ee271773 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 23 Aug 2011 03:14:22 +0000 Subject: [PATCH] ixgbe: update {P}FC thresholds to account for X540 and loopback Revise high and low threshold marks wrt flow control to account for the X540 devices and latency introduced by the loopback switch. Without this it was in theory possible to drop frames on a supposedly lossless link with X540 or SR-IOV enabled. Previously we used a magic number in a define to calculate the threshold values. This made it difficult to sort out exactly which latencies were or were not being accounted for. Here I was overly explicit and tried to used #define names that would be recognizable after reading the IEEE 802.1Qbb specification. Signed-off-by: John Fastabend Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbe/ixgbe_82598.c | 8 +- .../net/ethernet/intel/ixgbe/ixgbe_common.c | 12 +- drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h | 1 - .../ethernet/intel/ixgbe/ixgbe_dcb_82598.c | 9 +- .../ethernet/intel/ixgbe/ixgbe_dcb_82599.c | 8 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 128 +++++++++++++++++- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 62 ++++++++- 7 files changed, 190 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index b816a624a6ce..fa079bbab89a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -358,7 +358,6 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num) u32 fctrl_reg; u32 rmcs_reg; u32 reg; - u32 rx_pba_size; u32 link_speed = 0; bool link_up; @@ -461,16 +460,13 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num) /* Set up and enable Rx high/low water mark thresholds, enable XON. */ if (hw->fc.current_mode & ixgbe_fc_tx_pause) { - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - - reg = (rx_pba_size - hw->fc.low_water) << 6; + reg = hw->fc.low_water << 6; if (hw->fc.send_xon) reg |= IXGBE_FCRTL_XONE; IXGBE_WRITE_REG(hw, IXGBE_FCRTL(packetbuf_num), reg); - reg = (rx_pba_size - hw->fc.high_water) << 6; + reg = hw->fc.high_water[packetbuf_num] << 6; reg |= IXGBE_FCRTH_FCEN; IXGBE_WRITE_REG(hw, IXGBE_FCRTH(packetbuf_num), reg); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 84ed9ef7288d..59cd54cfdc1f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1932,7 +1932,6 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packetbuf_num) s32 ret_val = 0; u32 mflcn_reg, fccfg_reg; u32 reg; - u32 rx_pba_size; u32 fcrtl, fcrth; #ifdef CONFIG_DCB @@ -2012,11 +2011,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packetbuf_num) IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - - fcrth = (rx_pba_size - hw->fc.high_water) << 10; - fcrtl = (rx_pba_size - hw->fc.low_water) << 10; + fcrth = hw->fc.high_water[packetbuf_num] << 10; + fcrtl = hw->fc.low_water << 10; if (hw->fc.current_mode & ixgbe_fc_tx_pause) { fcrth |= IXGBE_FCRTH_FCEN; @@ -2293,7 +2289,9 @@ static s32 ixgbe_setup_fc(struct ixgbe_hw *hw, s32 packetbuf_num) * Validate the water mark configuration. Zero water marks are invalid * because it causes the controller to just blast out fc packets. */ - if (!hw->fc.low_water || !hw->fc.high_water || !hw->fc.pause_time) { + if (!hw->fc.low_water || + !hw->fc.high_water[packetbuf_num] || + !hw->fc.pause_time) { hw_dbg(hw, "Invalid water mark configuration\n"); ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; goto out; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h index 0a68aa7f5d18..df095a9bbe2b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h @@ -36,7 +36,6 @@ #define IXGBE_MAX_PACKET_BUFFERS 8 #define MAX_USER_PRIORITY 8 -#define MAX_TRAFFIC_CLASS 8 #define MAX_BW_GROUP 8 #define BW_PERCENT 100 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c index 2288c3cac010..fcd0e479721f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c @@ -191,7 +191,7 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw, */ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) { - u32 reg, rx_pba_size; + u32 reg; u8 i; if (pfc_en) { @@ -222,9 +222,8 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { int enabled = pfc_en & (1 << i); - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - reg = (rx_pba_size - hw->fc.low_water) << 10; + + reg = hw->fc.low_water << 10; if (enabled == pfc_enabled_tx || enabled == pfc_enabled_full) @@ -232,7 +231,7 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg); - reg = (rx_pba_size - hw->fc.high_water) << 10; + reg = hw->fc.high_water[i] << 10; if (enabled == pfc_enabled_tx || enabled == pfc_enabled_full) reg |= IXGBE_FCRTH_FCEN; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c index d64fb872978e..02f6724bf48e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c @@ -210,21 +210,19 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw, */ s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en) { - u32 i, reg, rx_pba_size; + u32 i, reg; /* Configure PFC Tx thresholds per TC */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { int enabled = pfc_en & (1 << i); - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - reg = (rx_pba_size - hw->fc.low_water) << 10; + reg = hw->fc.low_water << 10; if (enabled) reg |= IXGBE_FCRTL_XONE; IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), reg); - reg = (rx_pba_size - hw->fc.high_water) << 10; + reg = hw->fc.high_water[i] << 10; if (enabled) reg |= IXGBE_FCRTH_FCEN; IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), reg); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3594b09f4993..ba703d30f3a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3351,9 +3351,128 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_RQTC, reg); } } +#endif + +/* Additional bittime to account for IXGBE framing */ +#define IXGBE_ETH_FRAMING 20 + +/* + * ixgbe_hpbthresh - calculate high water mark for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int link, tc, kb, marker; + u32 dv_id, rx_pba; + + /* Calculate max LAN frame size */ + tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING; + +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if (dev->features & NETIF_F_FCOE_MTU) { + int fcoe_pb = 0; +#ifdef CONFIG_IXGBE_DCB + fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); + +#endif + if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; + } #endif + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_DV_X540(link, tc); + break; + default: + dv_id = IXGBE_DV(link, tc); + break; + } + + /* Loopback switch introduces additional latency */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + dv_id += IXGBE_B2BT(tc); + + /* Delay value is calculated in bit times convert to KB */ + kb = IXGBE_BT2KB(dv_id); + rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10; + + marker = rx_pba - kb; + + /* It is possible that the packet buffer is not large enough + * to provide required headroom. In this case throw an error + * to user and a do the best we can. + */ + if (marker < 0) { + e_warn(drv, "Packet Buffer(%i) can not provide enough" + "headroom to support flow control." + "Decrease MTU or number of traffic classes\n", pb); + marker = tc + 1; + } + + return marker; +} + +/* + * ixgbe_lpbthresh - calculate low water mark for for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int tc; + u32 dv_id; + + /* Calculate max LAN frame size */ + tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_LOW_DV_X540(tc); + break; + default: + dv_id = IXGBE_LOW_DV(tc); + break; + } + + /* Delay value is calculated in bit times convert to KB */ + return IXGBE_BT2KB(dv_id); +} + +/* + * ixgbe_pbthresh_setup - calculate and setup high low water marks + */ +static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int num_tc = netdev_get_num_tc(adapter->netdev); + int i; + + if (!num_tc) + num_tc = 1; + + hw->fc.low_water = ixgbe_lpbthresh(adapter); + + for (i = 0; i < num_tc; i++) { + hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); + + /* Low water marks must not be larger than high water marks */ + if (hw->fc.low_water > hw->fc.high_water[i]) + hw->fc.low_water = 0; + } +} + static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -3367,6 +3486,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) hdrm = 0; hw->mac.ops.set_rxpba(hw, tc, hdrm, PBA_STRATEGY_EQUAL); + ixgbe_pbthresh_setup(adapter); } static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) @@ -4769,13 +4889,11 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct net_device *dev = adapter->netdev; unsigned int rss; #ifdef CONFIG_IXGBE_DCB int j; struct tc_configuration *tc; #endif - int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN; /* PCI config space info */ @@ -4851,8 +4969,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) #ifdef CONFIG_DCB adapter->last_lfc_mode = hw->fc.current_mode; #endif - hw->fc.high_water = FC_HIGH_WATER(max_frame); - hw->fc.low_water = FC_LOW_WATER(max_frame); + ixgbe_pbthresh_setup(adapter); hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; hw->fc.send_xon = true; hw->fc.disable_fc_autoneg = false; @@ -5119,9 +5236,6 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; - hw->fc.high_water = FC_HIGH_WATER(max_frame); - hw->fc.low_water = FC_LOW_WATER(max_frame); - if (netif_running(netdev)) ixgbe_reinit_locked(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 9a03341e5261..16dd461d4af3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -404,6 +404,7 @@ #define IXGBE_WUPL_LENGTH_MASK 0xFFFF /* DCB registers */ +#define MAX_TRAFFIC_CLASS 8 #define IXGBE_RMCS 0x03D00 #define IXGBE_DPMCS 0x07F40 #define IXGBE_PDPMCS 0x0CD00 @@ -2323,13 +2324,60 @@ typedef u32 ixgbe_physical_layer; #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 #define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 -/* Flow Control Macros */ -#define PAUSE_RTT 8 -#define PAUSE_MTU(MTU) ((MTU + 1024 - 1) / 1024) +/* Flow Control Data Sheet defined values + * Calculation and defines taken from 802.1bb Annex O + */ + +/* BitTimes (BT) conversion */ +#define IXGBE_BT2KB(BT) ((BT + 1023) / (8 * 1024)) +#define IXGBE_B2BT(BT) (BT * 8) + +/* Calculate Delay to respond to PFC */ +#define IXGBE_PFC_D 672 + +/* Calculate Cable Delay */ +#define IXGBE_CABLE_DC 5556 /* Delay Copper */ +#define IXGBE_CABLE_DO 5000 /* Delay Optical */ + +/* Calculate Interface Delay X540 */ +#define IXGBE_PHY_DC 25600 /* Delay 10G BASET */ +#define IXGBE_MAC_DC 8192 /* Delay Copper XAUI interface */ +#define IXGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */ + +#define IXGBE_ID_X540 (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC) + +/* Calculate Interface Delay 82598, 82599 */ +#define IXGBE_PHY_D 12800 +#define IXGBE_MAC_D 4096 +#define IXGBE_XAUI_D (2 * 1024) + +#define IXGBE_ID (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D) + +/* Calculate Delay incurred from higher layer */ +#define IXGBE_HD 6144 + +/* Calculate PCI Bus delay for low thresholds */ +#define IXGBE_PCI_DELAY 10000 + +/* Calculate X540 delay value in bit times */ +#define IXGBE_FILL_RATE (36 / 25) + +#define IXGBE_DV_X540(LINK, TC) (IXGBE_FILL_RATE * \ + (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID_X540) + \ + IXGBE_HD + IXGBE_B2BT(TC))) + +/* Calculate 82599, 82598 delay value in bit times */ +#define IXGBE_DV(LINK, TC) (IXGBE_FILL_RATE * \ + (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + (2 * IXGBE_ID) + \ + IXGBE_HD + IXGBE_B2BT(TC))) -#define FC_HIGH_WATER(MTU) ((((PAUSE_RTT + PAUSE_MTU(MTU)) * 144) + 99) / 100 +\ - PAUSE_MTU(MTU)) -#define FC_LOW_WATER(MTU) (2 * (2 * PAUSE_MTU(MTU) + PAUSE_RTT)) +/* Calculate low threshold delay values */ +#define IXGBE_LOW_DV_X540(TC) (2 * IXGBE_B2BT(TC) + \ + (IXGBE_FILL_RATE * IXGBE_PCI_DELAY)) +#define IXGBE_LOW_DV(TC) (2 * IXGBE_LOW_DV_X540(TC)) /* Software ATR hash keys */ #define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2 @@ -2548,7 +2596,7 @@ struct ixgbe_bus_info { /* Flow control parameters */ struct ixgbe_fc_info { - u32 high_water; /* Flow Control High-water */ + u32 high_water[MAX_TRAFFIC_CLASS]; /* Flow Control High-water */ u32 low_water; /* Flow Control Low-water */ u16 pause_time; /* Flow Control Pause timer */ bool send_xon; /* Flow control send XON */ -- GitLab