1. 06 5月, 2022 10 次提交
  2. 29 4月, 2022 1 次提交
  3. 27 4月, 2022 4 次提交
    • J
      ice: fix use-after-free when deinitializing mailbox snapshot · b668f4cd
      Jacob Keller 提交于
      During ice_sriov_configure, if num_vfs is 0, we are being asked by the
      kernel to remove all VFs.
      
      The driver first de-initializes the snapshot before freeing all the VFs.
      This results in a use-after-free BUG detected by KASAN. The bug occurs
      because the snapshot can still be accessed until all VFs are removed.
      
      Fix this by freeing all the VFs first before calling
      ice_mbx_deinit_snapshot.
      
      [  +0.032591] ==================================================================
      [  +0.000021] BUG: KASAN: use-after-free in ice_mbx_vf_state_handler+0x1c3/0x410 [ice]
      [  +0.000315] Write of size 28 at addr ffff889908eb6f28 by task kworker/55:2/1530996
      
      [  +0.000029] CPU: 55 PID: 1530996 Comm: kworker/55:2 Kdump: loaded Tainted: G S        I       5.17.0-dirty #1
      [  +0.000022] Hardware name: Dell Inc. PowerEdge R740/0923K0, BIOS 1.6.13 12/17/2018
      [  +0.000013] Workqueue: ice ice_service_task [ice]
      [  +0.000279] Call Trace:
      [  +0.000012]  <TASK>
      [  +0.000011]  dump_stack_lvl+0x33/0x42
      [  +0.000030]  print_report.cold.13+0xb2/0x6b3
      [  +0.000028]  ? ice_mbx_vf_state_handler+0x1c3/0x410 [ice]
      [  +0.000295]  kasan_report+0xa5/0x120
      [  +0.000026]  ? __switch_to_asm+0x21/0x70
      [  +0.000024]  ? ice_mbx_vf_state_handler+0x1c3/0x410 [ice]
      [  +0.000298]  kasan_check_range+0x183/0x1e0
      [  +0.000019]  memset+0x1f/0x40
      [  +0.000018]  ice_mbx_vf_state_handler+0x1c3/0x410 [ice]
      [  +0.000304]  ? ice_conv_link_speed_to_virtchnl+0x160/0x160 [ice]
      [  +0.000297]  ? ice_vsi_dis_spoofchk+0x40/0x40 [ice]
      [  +0.000305]  ice_is_malicious_vf+0x1aa/0x250 [ice]
      [  +0.000303]  ? ice_restore_all_vfs_msi_state+0x160/0x160 [ice]
      [  +0.000297]  ? __mutex_unlock_slowpath.isra.15+0x410/0x410
      [  +0.000022]  ? ice_debug_cq+0xb7/0x230 [ice]
      [  +0.000273]  ? __kasan_slab_alloc+0x2f/0x90
      [  +0.000022]  ? memset+0x1f/0x40
      [  +0.000017]  ? do_raw_spin_lock+0x119/0x1d0
      [  +0.000022]  ? rwlock_bug.part.2+0x60/0x60
      [  +0.000024]  __ice_clean_ctrlq+0x3a6/0xd60 [ice]
      [  +0.000273]  ? newidle_balance+0x5b1/0x700
      [  +0.000026]  ? ice_print_link_msg+0x2f0/0x2f0 [ice]
      [  +0.000271]  ? update_cfs_group+0x1b/0x140
      [  +0.000018]  ? load_balance+0x1260/0x1260
      [  +0.000022]  ? ice_process_vflr_event+0x27/0x130 [ice]
      [  +0.000301]  ice_service_task+0x136e/0x1470 [ice]
      [  +0.000281]  process_one_work+0x3b4/0x6c0
      [  +0.000030]  worker_thread+0x65/0x660
      [  +0.000023]  ? __kthread_parkme+0xe4/0x100
      [  +0.000021]  ? process_one_work+0x6c0/0x6c0
      [  +0.000020]  kthread+0x179/0x1b0
      [  +0.000018]  ? kthread_complete_and_exit+0x20/0x20
      [  +0.000022]  ret_from_fork+0x22/0x30
      [  +0.000026]  </TASK>
      
      [  +0.000018] Allocated by task 10742:
      [  +0.000013]  kasan_save_stack+0x1c/0x40
      [  +0.000018]  __kasan_kmalloc+0x84/0xa0
      [  +0.000016]  kmem_cache_alloc_trace+0x16c/0x2e0
      [  +0.000015]  intel_iommu_probe_device+0xeb/0x860
      [  +0.000015]  __iommu_probe_device+0x9a/0x2f0
      [  +0.000016]  iommu_probe_device+0x43/0x270
      [  +0.000015]  iommu_bus_notifier+0xa7/0xd0
      [  +0.000015]  blocking_notifier_call_chain+0x90/0xc0
      [  +0.000017]  device_add+0x5f3/0xd70
      [  +0.000014]  pci_device_add+0x404/0xa40
      [  +0.000015]  pci_iov_add_virtfn+0x3b0/0x550
      [  +0.000016]  sriov_enable+0x3bb/0x600
      [  +0.000013]  ice_ena_vfs+0x113/0xa79 [ice]
      [  +0.000293]  ice_sriov_configure.cold.17+0x21/0xe0 [ice]
      [  +0.000291]  sriov_numvfs_store+0x160/0x200
      [  +0.000015]  kernfs_fop_write_iter+0x1db/0x270
      [  +0.000018]  new_sync_write+0x21d/0x330
      [  +0.000013]  vfs_write+0x376/0x410
      [  +0.000013]  ksys_write+0xba/0x150
      [  +0.000012]  do_syscall_64+0x3a/0x80
      [  +0.000012]  entry_SYSCALL_64_after_hwframe+0x44/0xae
      
      [  +0.000028] Freed by task 10742:
      [  +0.000011]  kasan_save_stack+0x1c/0x40
      [  +0.000015]  kasan_set_track+0x21/0x30
      [  +0.000016]  kasan_set_free_info+0x20/0x30
      [  +0.000012]  __kasan_slab_free+0x104/0x170
      [  +0.000016]  kfree+0x9b/0x470
      [  +0.000013]  devres_destroy+0x1c/0x20
      [  +0.000015]  devm_kfree+0x33/0x40
      [  +0.000012]  ice_mbx_deinit_snapshot+0x39/0x70 [ice]
      [  +0.000295]  ice_sriov_configure+0xb0/0x260 [ice]
      [  +0.000295]  sriov_numvfs_store+0x1bc/0x200
      [  +0.000015]  kernfs_fop_write_iter+0x1db/0x270
      [  +0.000016]  new_sync_write+0x21d/0x330
      [  +0.000012]  vfs_write+0x376/0x410
      [  +0.000012]  ksys_write+0xba/0x150
      [  +0.000012]  do_syscall_64+0x3a/0x80
      [  +0.000012]  entry_SYSCALL_64_after_hwframe+0x44/0xae
      
      [  +0.000024] Last potentially related work creation:
      [  +0.000010]  kasan_save_stack+0x1c/0x40
      [  +0.000016]  __kasan_record_aux_stack+0x98/0xa0
      [  +0.000013]  insert_work+0x34/0x160
      [  +0.000015]  __queue_work+0x20e/0x650
      [  +0.000016]  queue_work_on+0x4c/0x60
      [  +0.000015]  nf_nat_masq_schedule+0x297/0x2e0 [nf_nat]
      [  +0.000034]  masq_device_event+0x5a/0x60 [nf_nat]
      [  +0.000031]  raw_notifier_call_chain+0x5f/0x80
      [  +0.000017]  dev_close_many+0x1d6/0x2c0
      [  +0.000015]  unregister_netdevice_many+0x4e3/0xa30
      [  +0.000015]  unregister_netdevice_queue+0x192/0x1d0
      [  +0.000014]  iavf_remove+0x8f9/0x930 [iavf]
      [  +0.000058]  pci_device_remove+0x65/0x110
      [  +0.000015]  device_release_driver_internal+0xf8/0x190
      [  +0.000017]  pci_stop_bus_device+0xb5/0xf0
      [  +0.000014]  pci_stop_and_remove_bus_device+0xe/0x20
      [  +0.000016]  pci_iov_remove_virtfn+0x19c/0x230
      [  +0.000015]  sriov_disable+0x4f/0x170
      [  +0.000014]  ice_free_vfs+0x9a/0x490 [ice]
      [  +0.000306]  ice_sriov_configure+0xb8/0x260 [ice]
      [  +0.000294]  sriov_numvfs_store+0x1bc/0x200
      [  +0.000015]  kernfs_fop_write_iter+0x1db/0x270
      [  +0.000016]  new_sync_write+0x21d/0x330
      [  +0.000012]  vfs_write+0x376/0x410
      [  +0.000012]  ksys_write+0xba/0x150
      [  +0.000012]  do_syscall_64+0x3a/0x80
      [  +0.000012]  entry_SYSCALL_64_after_hwframe+0x44/0xae
      
      [  +0.000025] The buggy address belongs to the object at ffff889908eb6f00
                     which belongs to the cache kmalloc-96 of size 96
      [  +0.000016] The buggy address is located 40 bytes inside of
                     96-byte region [ffff889908eb6f00, ffff889908eb6f60)
      
      [  +0.000026] The buggy address belongs to the physical page:
      [  +0.000010] page:00000000b7e99a2e refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1908eb6
      [  +0.000016] flags: 0x57ffffc0000200(slab|node=1|zone=2|lastcpupid=0x1fffff)
      [  +0.000024] raw: 0057ffffc0000200 ffffea0069d9fd80 dead000000000002 ffff88810004c780
      [  +0.000015] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
      [  +0.000009] page dumped because: kasan: bad access detected
      
      [  +0.000016] Memory state around the buggy address:
      [  +0.000012]  ffff889908eb6e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
      [  +0.000014]  ffff889908eb6e80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
      [  +0.000014] >ffff889908eb6f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
      [  +0.000011]                                   ^
      [  +0.000013]  ffff889908eb6f80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
      [  +0.000013]  ffff889908eb7000: fa fb fb fb fb fb fb fb fc fc fc fc fa fb fb fb
      [  +0.000012] ==================================================================
      
      Fixes: 0891c896 ("ice: warn about potentially malicious VFs")
      Reported-by: NSlawomir Laba <slawomirx.laba@intel.com>
      Signed-off-by: NJacob Keller <jacob.e.keller@intel.com>
      Tested-by: NKonrad Jankowski <konrad0.jankowski@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      b668f4cd
    • P
      ice: wait 5 s for EMP reset after firmware flash · b537752e
      Petr Oros 提交于
      We need to wait 5 s for EMP reset after firmware flash. Code was extracted
      from OOT driver (ice v1.8.3 downloaded from sourceforge). Without this
      wait, fw_activate let card in inconsistent state and recoverable only
      by second flash/activate. Flash was tested on these fw's:
      From -> To
       3.00 -> 3.10/3.20
       3.10 -> 3.00/3.20
       3.20 -> 3.00/3.10
      
      Reproducer:
      [root@host ~]# devlink dev flash pci/0000:ca:00.0 file E810_XXVDA4_FH_O_SEC_FW_1p6p1p9_NVM_3p10_PLDMoMCTP_0.11_8000AD7B.bin
      Preparing to flash
      [fw.mgmt] Erasing
      [fw.mgmt] Erasing done
      [fw.mgmt] Flashing 100%
      [fw.mgmt] Flashing done 100%
      [fw.undi] Erasing
      [fw.undi] Erasing done
      [fw.undi] Flashing 100%
      [fw.undi] Flashing done 100%
      [fw.netlist] Erasing
      [fw.netlist] Erasing done
      [fw.netlist] Flashing 100%
      [fw.netlist] Flashing done 100%
      Activate new firmware by devlink reload
      [root@host ~]# devlink dev reload pci/0000:ca:00.0 action fw_activate
      reload_actions_performed:
          fw_activate
      [root@host ~]# ip link show ens7f0
      71: ens7f0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000
          link/ether b4:96:91:dc:72:e0 brd ff:ff:ff:ff:ff:ff
          altname enp202s0f0
      
      dmesg after flash:
      [   55.120788] ice: Copyright (c) 2018, Intel Corporation.
      [   55.274734] ice 0000:ca:00.0: Get PHY capabilities failed status = -5, continuing anyway
      [   55.569797] ice 0000:ca:00.0: The DDP package was successfully loaded: ICE OS Default Package version 1.3.28.0
      [   55.603629] ice 0000:ca:00.0: Get PHY capability failed.
      [   55.608951] ice 0000:ca:00.0: ice_init_nvm_phy_type failed: -5
      [   55.647348] ice 0000:ca:00.0: PTP init successful
      [   55.675536] ice 0000:ca:00.0: DCB is enabled in the hardware, max number of TCs supported on this port are 8
      [   55.685365] ice 0000:ca:00.0: FW LLDP is disabled, DCBx/LLDP in SW mode.
      [   55.692179] ice 0000:ca:00.0: Commit DCB Configuration to the hardware
      [   55.701382] ice 0000:ca:00.0: 126.024 Gb/s available PCIe bandwidth, limited by 16.0 GT/s PCIe x8 link at 0000:c9:02.0 (capable of 252.048 Gb/s with 16.0 GT/s PCIe x16 link)
      Reboot doesn’t help, only second flash/activate with OOT or patched
      driver put card back in consistent state.
      
      After patch:
      [root@host ~]# devlink dev flash pci/0000:ca:00.0 file E810_XXVDA4_FH_O_SEC_FW_1p6p1p9_NVM_3p10_PLDMoMCTP_0.11_8000AD7B.bin
      Preparing to flash
      [fw.mgmt] Erasing
      [fw.mgmt] Erasing done
      [fw.mgmt] Flashing 100%
      [fw.mgmt] Flashing done 100%
      [fw.undi] Erasing
      [fw.undi] Erasing done
      [fw.undi] Flashing 100%
      [fw.undi] Flashing done 100%
      [fw.netlist] Erasing
      [fw.netlist] Erasing done
      [fw.netlist] Flashing 100%
      [fw.netlist] Flashing done 100%
      Activate new firmware by devlink reload
      [root@host ~]# devlink dev reload pci/0000:ca:00.0 action fw_activate
      reload_actions_performed:
          fw_activate
      [root@host ~]# ip link show ens7f0
      19: ens7f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
          link/ether b4:96:91:dc:72:e0 brd ff:ff:ff:ff:ff:ff
          altname enp202s0f0
      
      Fixes: 399e27db ("ice: support immediate firmware activation via devlink reload")
      Signed-off-by: NPetr Oros <poros@redhat.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      b537752e
    • I
      ice: Protect vf_state check by cfg_lock in ice_vc_process_vf_msg() · 77d64d28
      Ivan Vecera 提交于
      Previous patch labelled "ice: Fix incorrect locking in
      ice_vc_process_vf_msg()"  fixed an issue with ignored messages
      sent by VF driver but a small race window still left.
      
      Recently caught trace during 'ip link set ... vf 0 vlan ...' operation:
      
      [ 7332.995625] ice 0000:3b:00.0: Clearing port VLAN on VF 0
      [ 7333.001023] iavf 0000:3b:01.0: Reset indication received from the PF
      [ 7333.007391] iavf 0000:3b:01.0: Scheduling reset task
      [ 7333.059575] iavf 0000:3b:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 3
      [ 7333.059626] ice 0000:3b:00.0: Invalid message from VF 0, opcode 3, len 4, error -1
      
      Setting of VLAN for VF causes a reset of the affected VF using
      ice_reset_vf() function that runs with cfg_lock taken:
      
      1. ice_notify_vf_reset() informs IAVF driver that reset is needed and
         IAVF schedules its own reset procedure
      2. Bit ICE_VF_STATE_DIS is set in vf->vf_state
      3. Misc initialization steps
      4. ice_sriov_post_vsi_rebuild() -> ice_vf_set_initialized() and that
         clears ICE_VF_STATE_DIS in vf->vf_state
      
      Step 3 is mentioned race window because IAVF reset procedure runs in
      parallel and one of its step is sending of VIRTCHNL_OP_GET_VF_RESOURCES
      message (opcode==3). This message is handled in ice_vc_process_vf_msg()
      and if it is received during the mentioned race window then it's
      marked as invalid and error is returned to VF driver.
      
      Protect vf_state check in ice_vc_process_vf_msg() by cfg_lock to avoid
      this race condition.
      
      Fixes: e6ba5273 ("ice: Fix race conditions between virtchnl handling and VF ndo ops")
      Tested-by: NFei Liu <feliu@redhat.com>
      Signed-off-by: NIvan Vecera <ivecera@redhat.com>
      Reviewed-by: NJacob Keller <jacob.e.keller@intel.com>
      Tested-by: NKonrad Jankowski <konrad0.jankowski@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      77d64d28
    • I
      ice: Fix incorrect locking in ice_vc_process_vf_msg() · aaf461af
      Ivan Vecera 提交于
      Usage of mutex_trylock() in ice_vc_process_vf_msg() is incorrect
      because message sent from VF is ignored and never processed.
      
      Use mutex_lock() instead to fix the issue. It is safe because this
      mutex is used to prevent races between VF related NDOs and
      handlers processing request messages from VF and these handlers
      are running in ice_service_task() context. Additionally move this
      mutex lock prior ice_vc_is_opcode_allowed() call to avoid potential
      races during allowlist access.
      
      Fixes: e6ba5273 ("ice: Fix race conditions between virtchnl handling and VF ndo ops")
      Signed-off-by: NIvan Vecera <ivecera@redhat.com>
      Tested-by: NKonrad Jankowski <konrad0.jankowski@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      aaf461af
  4. 21 4月, 2022 2 次提交
    • M
      i40e, xsk: Get rid of redundant 'fallthrough' · 9d87e41a
      Maciej Fijalkowski 提交于
      Intel drivers translate actions returned from XDP programs to their own
      return codes that have the following mapping:
      
      XDP_REDIRECT -> I40E_XDP_{REDIR,CONSUMED}
      XDP_TX -> I40E_XDP_{TX,CONSUMED}
      XDP_DROP -> I40E_XDP_CONSUMED
      XDP_ABORTED -> I40E_XDP_CONSUMED
      XDP_PASS -> I40E_XDP_PASS
      
      Commit b8aef650 ("i40e, xsk: Terminate Rx side of NAPI when XSK Rx
      queue gets full") introduced new translation
      
      XDP_REDIRECT -> I40E_XDP_EXIT
      
      which is set when XSK RQ gets full and to indicate that driver should
      stop further Rx processing. This happens for unsuccessful
      xdp_do_redirect() so it is valuable to call trace_xdp_exception() for
      this case. In order to avoid I40E_XDP_EXIT -> IXGBE_XDP_CONSUMED
      overwrite, XDP_DROP case was moved above which in turn made the
      'fallthrough' that is in XDP_ABORTED useless as it became the last label
      in the switch statement.
      
      Simply drop this leftover.
      
      Fixes: b8aef650 ("i40e, xsk: Terminate Rx side of NAPI when XSK Rx queue gets full")
      Signed-off-by: NMaciej Fijalkowski <maciej.fijalkowski@intel.com>
      Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
      Link: https://lore.kernel.org/bpf/20220421132126.471515-3-maciej.fijalkowski@intel.com
      9d87e41a
    • M
      ixgbe, xsk: Get rid of redundant 'fallthrough' · e130e8d5
      Maciej Fijalkowski 提交于
      Intel drivers translate actions returned from XDP programs to their own
      return codes that have the following mapping:
      
      XDP_REDIRECT -> IXGBE_XDP_{REDIR,CONSUMED}
      XDP_TX -> IXGBE_XDP_{TX,CONSUMED}
      XDP_DROP -> IXGBE_XDP_CONSUMED
      XDP_ABORTED -> IXGBE_XDP_CONSUMED
      XDP_PASS -> IXGBE_XDP_PASS
      
      Commit c7dd09fd ("ixgbe, xsk: Terminate Rx side of NAPI when XSK Rx
      queue gets full") introduced new translation
      
      XDP_REDIRECT -> IXGBE_XDP_EXIT
      
      which is set when XSK RQ gets full and to indicate that driver should
      stop further Rx processing. This happens for unsuccessful
      xdp_do_redirect() so it is valuable to call trace_xdp_exception() for
      this case. In order to avoid IXGBE_XDP_EXIT -> IXGBE_XDP_CONSUMED
      overwrite, XDP_DROP case was moved above which in turn made the
      'fallthrough' that is in XDP_ABORTED useless as it became the last label
      in the switch statement.
      
      Simply drop this leftover.
      
      Fixes: c7dd09fd ("ixgbe, xsk: Terminate Rx side of NAPI when XSK Rx queue gets full")
      Signed-off-by: NMaciej Fijalkowski <maciej.fijalkowski@intel.com>
      Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
      Link: https://lore.kernel.org/bpf/20220421132126.471515-2-maciej.fijalkowski@intel.com
      e130e8d5
  5. 16 4月, 2022 9 次提交
  6. 14 4月, 2022 6 次提交
    • J
      ice: Fix memory leak in ice_get_orom_civd_data() · 7c8881b7
      Jianglei Nie 提交于
      A memory chunk was allocated for orom_data in ice_get_orom_civd_data()
      by vzmalloc(). But when ice_read_flash_module() fails, the allocated
      memory is not freed, which will lead to a memory leak.
      
      We can fix it by freeing the orom_data when ce_read_flash_module() fails.
      
      Fixes: af18d886 ("ice: reduce time to read Option ROM CIVD data")
      Signed-off-by: NJianglei Nie <niejianglei2021@163.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      7c8881b7
    • W
      ice: fix crash in switchdev mode · d2016651
      Wojciech Drewek 提交于
      Below steps end up with crash:
      - modprobe ice
      - devlink dev eswitch set $PF1_PCI mode switchdev
      - echo 64 > /sys/class/net/$PF1/device/sriov_numvfs
      - rmmod ice
      
      Calling ice_eswitch_port_start_xmit while the process of removing
      VFs is in progress ends up with NULL pointer dereference.
      That's because PR netdev is not released but some resources
      are already freed. Fix it by checking if ICE_VF_DIS bit is set.
      
      Call trace:
      [ 1379.595146] BUG: kernel NULL pointer dereference, address: 0000000000000040
      [ 1379.595284] #PF: supervisor read access in kernel mode
      [ 1379.595410] #PF: error_code(0x0000) - not-present page
      [ 1379.595535] PGD 0 P4D 0
      [ 1379.595657] Oops: 0000 [#1] PREEMPT SMP PTI
      [ 1379.595783] CPU: 4 PID: 974 Comm: NetworkManager Kdump: loaded Tainted: G           OE     5.17.0-rc8_mrq_dev-queue+ #12
      [ 1379.595926] Hardware name: Intel Corporation S1200SP/S1200SP, BIOS S1200SP.86B.03.01.0042.013020190050 01/30/2019
      [ 1379.596063] RIP: 0010:ice_eswitch_port_start_xmit+0x46/0xd0 [ice]
      [ 1379.596292] Code: c7 c8 09 00 00 e8 9a c9 fc ff 84 c0 0f 85 82 00 00 00 4c 89 e7 e8 ca 70 fe ff 48 8b 7d 58 48 89 c3 48 85 ff 75 5e 48 8b 53 20 <8b> 42 40 85 c0 74 78 8d 48 01 f0 0f b1 4a 40 75 f2 0f b6 95 84 00
      [ 1379.596456] RSP: 0018:ffffaba0c0d7bad0 EFLAGS: 00010246
      [ 1379.596584] RAX: ffff969c14c71680 RBX: ffff969c14c71680 RCX: 000100107a0f0000
      [ 1379.596715] RDX: 0000000000000000 RSI: ffff969b9d631000 RDI: 0000000000000000
      [ 1379.596846] RBP: ffff969c07b46500 R08: ffff969becfca8ac R09: 0000000000000001
      [ 1379.596977] R10: 0000000000000004 R11: ffffaba0c0d7bbec R12: ffff969b9d631000
      [ 1379.597106] R13: ffffffffc08357a0 R14: ffff969c07b46500 R15: ffff969b9d631000
      [ 1379.597237] FS:  00007f72c0e25c80(0000) GS:ffff969f13500000(0000) knlGS:0000000000000000
      [ 1379.597414] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      [ 1379.597562] CR2: 0000000000000040 CR3: 000000012b316006 CR4: 00000000003706e0
      [ 1379.597713] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      [ 1379.597863] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
      [ 1379.598015] Call Trace:
      [ 1379.598153]  <TASK>
      [ 1379.598294]  dev_hard_start_xmit+0xd9/0x220
      [ 1379.598444]  sch_direct_xmit+0x8a/0x340
      [ 1379.598592]  __dev_queue_xmit+0xa3c/0xd30
      [ 1379.598739]  ? packet_parse_headers+0xb4/0xf0
      [ 1379.598890]  packet_sendmsg+0xa15/0x1620
      [ 1379.599038]  ? __check_object_size+0x46/0x140
      [ 1379.599186]  sock_sendmsg+0x5e/0x60
      [ 1379.599330]  ____sys_sendmsg+0x22c/0x270
      [ 1379.599474]  ? import_iovec+0x17/0x20
      [ 1379.599622]  ? sendmsg_copy_msghdr+0x59/0x90
      [ 1379.599771]  ___sys_sendmsg+0x81/0xc0
      [ 1379.599917]  ? __pollwait+0xd0/0xd0
      [ 1379.600061]  ? preempt_count_add+0x68/0xa0
      [ 1379.600210]  ? _raw_write_lock_irq+0x1a/0x40
      [ 1379.600369]  ? ep_done_scan+0xc9/0x110
      [ 1379.600494]  ? _raw_spin_unlock_irqrestore+0x25/0x40
      [ 1379.600622]  ? preempt_count_add+0x68/0xa0
      [ 1379.600747]  ? _raw_spin_lock_irq+0x1a/0x40
      [ 1379.600899]  ? __fget_light+0x8f/0x110
      [ 1379.601024]  __sys_sendmsg+0x49/0x80
      [ 1379.601148]  ? release_ds_buffers+0x50/0xe0
      [ 1379.601274]  do_syscall_64+0x3b/0x90
      [ 1379.601399]  entry_SYSCALL_64_after_hwframe+0x44/0xae
      [ 1379.601525] RIP: 0033:0x7f72c1e2e35d
      
      Fixes: f5396b8a ("ice: switchdev slow path")
      Signed-off-by: NWojciech Drewek <wojciech.drewek@intel.com>
      Reported-by: NMarcin Szycik <marcin.szycik@linux.intel.com>
      Reviewed-by: NMichal Swiatkowski <michal.swiatkowski@linux.intel.com>
      Tested-by: NSandeep Penigalapati <sandeep.penigalapati@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      d2016651
    • M
      ice: allow creating VFs for !CONFIG_NET_SWITCHDEV · aacca7a8
      Maciej Fijalkowski 提交于
      Currently for !CONFIG_NET_SWITCHDEV kernel builds it is not possible to
      create VFs properly as call to ice_eswitch_configure() returns
      -EOPNOTSUPP for us. This is because CONFIG_ICE_SWITCHDEV depends on
      CONFIG_NET_SWITCHDEV.
      
      Change the ice_eswitch_configure() implementation for
      !CONFIG_ICE_SWITCHDEV to return 0 instead -EOPNOTSUPP and let
      ice_ena_vfs() finish its work properly.
      
      CC: Grzegorz Nitka <grzegorz.nitka@intel.com>
      Fixes: 1a1c40df ("ice: set and release switchdev environment")
      Signed-off-by: NMaciej Fijalkowski <maciej.fijalkowski@intel.com>
      Signed-off-by: NMichal Swiatkowski <michal.swiatkowski@intel.com>
      Tested-by: NKonrad Jankowski <konrad0.jankowski@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      aacca7a8
    • M
      ice: xsk: check if Rx ring was filled up to the end · d1fc4c6f
      Maciej Fijalkowski 提交于
      __ice_alloc_rx_bufs_zc() checks if a number of the descriptors to be
      allocated would cause the ring wrap. In that case, driver will issue two
      calls to xsk_buff_alloc_batch() - one that will fill the ring up to the
      end and the second one that will start with filling descriptors from the
      beginning of the ring.
      
      ice_fill_rx_descs() is a wrapper for taking care of what
      xsk_buff_alloc_batch() gave back to the driver. It works in a best
      effort approach, so for example when driver asks for 64 buffers,
      ice_fill_rx_descs() could assign only 32. Such case needs to be checked
      when ring is being filled up to the end, because in that situation ntu
      might not reached the end of the ring.
      
      Fix the ring wrap by checking if nb_buffs_extra has the expected value.
      If not, bump ntu and go directly to tail update.
      
      Fixes: 3876ff52 ("ice: xsk: Handle SW XDP ring wrap and bump tail more often")
      Signed-off-by: NMagnus Karlsson <magnus.karlsson@intel.com>
      Signed-off-by: NMaciej Fijalkowski <maciej.fijalkowski@intel.com>
      Tested-by: NShwetha Nagaraju <Shwetha.nagaraju@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      d1fc4c6f
    • S
      e1000e: Fix possible overflow in LTR decoding · 04ebaa1c
      Sasha Neftin 提交于
      When we decode the latency and the max_latency, u16 value may not fit
      the required size and could lead to the wrong LTR representation.
      
      Scaling is represented as:
      scale 0 - 1         (2^(5*0)) = 2^0
      scale 1 - 32        (2^(5 *1))= 2^5
      scale 2 - 1024      (2^(5 *2)) =2^10
      scale 3 - 32768     (2^(5 *3)) =2^15
      scale 4 - 1048576   (2^(5 *4)) = 2^20
      scale 5 - 33554432  (2^(5 *4)) = 2^25
      scale 4 and scale 5 required 20 and 25 bits respectively.
      scale 6 reserved.
      
      Replace the u16 type with the u32 type and allow corrected LTR
      representation.
      
      Cc: stable@vger.kernel.org
      Fixes: 44a13a5d ("e1000e: Fix the max snoop/no-snoop latency for 10M")
      Reported-by: NJames Hutchinson <jahutchinson99@googlemail.com>
      Link: https://bugzilla.kernel.org/show_bug.cgi?id=215689Suggested-by: NDima Ruinskiy <dima.ruinskiy@intel.com>
      Signed-off-by: NSasha Neftin <sasha.neftin@intel.com>
      Tested-by: NNaama Meir <naamax.meir@linux.intel.com>
      Tested-by: NJames Hutchinson <jahutchinson99@googlemail.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      04ebaa1c
    • V
      igc: Fix suspending when PTM is active · 822f52e7
      Vinicius Costa Gomes 提交于
      Some mainboard/CPU combinations, in particular, Alder Lake-S with a
      W680 mainboard, have shown problems (system hangs usually, no kernel
      logs) with suspend/resume when PCIe PTM is enabled and active. In some
      cases, it could be reproduced when removing the igc module.
      
      The best we can do is to stop PTM dialogs from the downstream/device
      side before the interface is brought down. PCIe PTM will be re-enabled
      when the interface is being brought up.
      
      Fixes: a90ec848 ("igc: Add support for PTP getcrosststamp()")
      Signed-off-by: NVinicius Costa Gomes <vinicius.gomes@intel.com>
      Tested-by: NNaama Meir <naamax.meir@linux.intel.com>
      Acked-by: NPaul Menzel <pmenzel@molgen.mpg.de>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      822f52e7
  7. 13 4月, 2022 7 次提交
    • S
      igc: Fix BUG: scheduling while atomic · c80a29f0
      Sasha Neftin 提交于
      Replace usleep_range() method with udelay() method to allow atomic contexts
      in low-level MDIO access functions.
      
      The following issue can be seen by doing the following:
      $ modprobe -r bonding
      $ modprobe -v bonding max_bonds=1 mode=1 miimon=100 use_carrier=0
      $ ip link set bond0 up
      $ ifenslave bond0 eth0 eth1
      
      [  982.357308] BUG: scheduling while atomic: kworker/u64:0/9/0x00000002
      [  982.364431] INFO: lockdep is turned off.
      [  982.368824] Modules linked in: bonding sctp ip6_udp_tunnel udp_tunnel mlx4_ib ib_uverbs ib_core mlx4_en mlx4_core nfp tls sunrpc intel_rapl_msr iTCO_wdt iTCO_vendor_support mxm_wmi dcdbas intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel rapl intel_cstate intel_uncore pcspkr lpc_ich mei_me ipmi_ssif mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_power_meter xfs libcrc32c sr_mod cdrom sd_mod t10_pi sg mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ahci libahci crc32c_intel libata i2c_algo_bit tg3 megaraid_sas igc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: bonding]
      [  982.437941] CPU: 25 PID: 9 Comm: kworker/u64:0 Kdump: loaded Tainted: G        W        --------- -  - 4.18.0-348.el8.x86_64+debug #1
      [  982.451333] Hardware name: Dell Inc. PowerEdge R730/0H21J3, BIOS 2.7.0 12/005/2017
      [  982.459791] Workqueue: bond0 bond_mii_monitor [bonding]
      [  982.465622] Call Trace:
      [  982.468355]  dump_stack+0x8e/0xd0
      [  982.472056]  __schedule_bug.cold.60+0x3a/0x60
      [  982.476919]  __schedule+0x147b/0x1bc0
      [  982.481007]  ? firmware_map_remove+0x16b/0x16b
      [  982.485967]  ? hrtimer_fixup_init+0x40/0x40
      [  982.490625]  schedule+0xd9/0x250
      [  982.494227]  schedule_hrtimeout_range_clock+0x10d/0x2c0
      [  982.500058]  ? hrtimer_nanosleep_restart+0x130/0x130
      [  982.505598]  ? hrtimer_init_sleeper_on_stack+0x90/0x90
      [  982.511332]  ? usleep_range+0x88/0x130
      [  982.515514]  ? recalibrate_cpu_khz+0x10/0x10
      [  982.520279]  ? ktime_get+0xab/0x1c0
      [  982.524175]  ? usleep_range+0x88/0x130
      [  982.528355]  usleep_range+0xdd/0x130
      [  982.532344]  ? console_conditional_schedule+0x30/0x30
      [  982.537987]  ? igc_put_hw_semaphore+0x17/0x60 [igc]
      [  982.543432]  igc_read_phy_reg_gpy+0x111/0x2b0 [igc]
      [  982.548887]  igc_phy_has_link+0xfa/0x260 [igc]
      [  982.553847]  ? igc_get_phy_id+0x210/0x210 [igc]
      [  982.558894]  ? lock_acquire+0x34d/0x890
      [  982.563187]  ? lock_downgrade+0x710/0x710
      [  982.567659]  ? rcu_read_unlock+0x50/0x50
      [  982.572039]  igc_check_for_copper_link+0x106/0x210 [igc]
      [  982.577970]  ? igc_config_fc_after_link_up+0x840/0x840 [igc]
      [  982.584286]  ? rcu_read_unlock+0x50/0x50
      [  982.588661]  ? lock_release+0x591/0xb80
      [  982.592939]  ? lock_release+0x591/0xb80
      [  982.597220]  igc_has_link+0x113/0x330 [igc]
      [  982.601887]  ? lock_downgrade+0x710/0x710
      [  982.606362]  igc_ethtool_get_link+0x6d/0x90 [igc]
      [  982.611614]  bond_check_dev_link+0x131/0x2c0 [bonding]
      [  982.617350]  ? bond_time_in_interval+0xd0/0xd0 [bonding]
      [  982.623277]  ? rcu_read_lock_held+0x62/0xc0
      [  982.627944]  ? rcu_read_lock_sched_held+0xe0/0xe0
      [  982.633198]  bond_mii_monitor+0x314/0x2500 [bonding]
      [  982.638738]  ? lock_contended+0x880/0x880
      [  982.643214]  ? bond_miimon_link_change+0xa0/0xa0 [bonding]
      [  982.649336]  ? lock_acquire+0x34d/0x890
      [  982.653615]  ? lock_downgrade+0x710/0x710
      [  982.658089]  ? debug_object_deactivate+0x221/0x340
      [  982.663436]  ? rcu_read_unlock+0x50/0x50
      [  982.667811]  ? debug_print_object+0x2b0/0x2b0
      [  982.672672]  ? __switch_to_asm+0x41/0x70
      [  982.677049]  ? __switch_to_asm+0x35/0x70
      [  982.681426]  ? _raw_spin_unlock_irq+0x24/0x40
      [  982.686288]  ? trace_hardirqs_on+0x20/0x195
      [  982.690956]  ? _raw_spin_unlock_irq+0x24/0x40
      [  982.695818]  process_one_work+0x8f0/0x1770
      [  982.700390]  ? pwq_dec_nr_in_flight+0x320/0x320
      [  982.705443]  ? debug_show_held_locks+0x50/0x50
      [  982.710403]  worker_thread+0x87/0xb40
      [  982.714489]  ? process_one_work+0x1770/0x1770
      [  982.719349]  kthread+0x344/0x410
      [  982.722950]  ? kthread_insert_work_sanity_check+0xd0/0xd0
      [  982.728975]  ret_from_fork+0x3a/0x50
      
      Fixes: 5586838f ("igc: Add code for PHY support")
      Reported-by: NCorinna Vinschen <vinschen@redhat.com>
      Suggested-by: NDima Ruinskiy <dima.ruinskiy@intel.com>
      Signed-off-by: NSasha Neftin <sasha.neftin@intel.com>
      Tested-by: NCorinna Vinschen <vinschen@redhat.com>
      Tested-by: NNaama Meir <naamax.meir@linux.intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      c80a29f0
    • S
      igc: Fix infinite loop in release_swfw_sync · 907862e9
      Sasha Neftin 提交于
      An infinite loop may occur if we fail to acquire the HW semaphore,
      which is needed for resource release.
      This will typically happen if the hardware is surprise-removed.
      At this stage there is nothing to do, except log an error and quit.
      
      Fixes: c0071c7a ("igc: Add HW initialization code")
      Suggested-by: NDima Ruinskiy <dima.ruinskiy@intel.com>
      Signed-off-by: NSasha Neftin <sasha.neftin@intel.com>
      Tested-by: NNaama Meir <naamax.meir@linux.intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      907862e9
    • M
      i40e: Add Ethernet Connection X722 for 10GbE SFP+ support · a941d5ee
      Mateusz Palczewski 提交于
      Add support for Ethernet Connection X722 for 10GbE SFP+ cards.
      Make possible for the driver to bind to the card.
      Signed-off-by: NPrzemyslaw Patynowski <przemyslawx.patynowski@intel.com>
      Signed-off-by: NMateusz Palczewski <mateusz.palczewski@intel.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      a941d5ee
    • N
      i40e: Add vsi.tx_restart to i40e ethtool stats · c8631e61
      Nabil S. Alramli 提交于
      Add vsi.tx_restart to the i40e driver ethtool statistics
      Signed-off-by: NNabil S. Alramli <dev@nalramli.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      c8631e61
    • J
      i40e: Add tx_stopped stat · f728fa01
      Joe Damato 提交于
      Track TX queue stop events and export the new stat with ethtool.
      Signed-off-by: NJoe Damato <jdamato@fastly.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      f728fa01
    • J
      ice: Add mpls+tso support · 69e66c04
      Joe Damato 提交于
      Attempt to add mpls+tso support.
      
      I don't have ice hardware available to test myself, but I just implemented
      this feature in i40e and thought it might be useful to implement for ice
      while this is fresh in my brain.
      
      Hoping some one at intel will be able to test this on my behalf.
      Signed-off-by: NJoe Damato <jdamato@fastly.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      69e66c04
    • J
      i40e: Add support for MPLS + TSO · b4fb2d33
      Joe Damato 提交于
      This change adds support for TSO of MPLS packets.
      
      In my tests with tcpdump it seems to work. Note this test setup has
      a 9000 byte MTU:
      
      MPLS (label 100, exp 0, [S], ttl 64) IP srcip.50086 > dstip.1234:
        Flags [P.], seq 593345:644401, ack 0, win 420,
        options [nop,nop,TS val 45022534 ecr 1722291395], length 51056
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 593345, win 122,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 602289, win 105,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 620177, win 71,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      
      MPLS (label 100, exp 0, [S], ttl 64) IP srcip.50086 > dstip.1234:
        Flags [P.], seq 644401:655953, ack 0, win 420,
        options [nop,nop,TS val 45022534 ecr 1722291395], length 11552
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 638065, win 37,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 644401, win 25,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 653345, win 8,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      
      IP dstip.1234 > srcip.50086: Flags [.], ack 655953, win 3,
        options [nop,nop,TS val 1722291395 ecr 45022534], length 0
      Signed-off-by: NJoe Damato <jdamato@fastly.com>
      Co-developed-by: NMike Gallo <mgallo@fastly.com>
      Signed-off-by: NMike Gallo <mgallo@fastly.com>
      Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      b4fb2d33
  8. 09 4月, 2022 1 次提交