1. 19 6月, 2022 1 次提交
    • W
      drivers/net/ethernet/neterion/vxge: Fix a use-after-free bug in vxge-main.c · 8fc74d18
      Wentao_Liang 提交于
      The pointer vdev points to a memory region adjacent to a net_device
      structure ndev, which is a field of hldev. At line 4740, the invocation
      to vxge_device_unregister unregisters device hldev, and it also releases
      the memory region pointed by vdev->bar0. At line 4743, the freed memory
      region is referenced (i.e., iounmap(vdev->bar0)), resulting in a
      use-after-free vulnerability. We can fix the bug by calling iounmap
      before vxge_device_unregister.
      
      4721.      static void vxge_remove(struct pci_dev *pdev)
      4722.      {
      4723.             struct __vxge_hw_device *hldev;
      4724.             struct vxgedev *vdev;
      …
      4731.             vdev = netdev_priv(hldev->ndev);
      …
      4740.             vxge_device_unregister(hldev);
      4741.             /* Do not call pci_disable_sriov here, as it
      						will break child devices */
      4742.             vxge_hw_device_terminate(hldev);
      4743.             iounmap(vdev->bar0);
      …
      4749              vxge_debug_init(vdev->level_trace, "%s:%d
      								Device unregistered",
      4750                            __func__, __LINE__);
      4751              vxge_debug_entryexit(vdev->level_trace, "%s:%d
      								Exiting...", __func__,
      4752                          __LINE__);
      4753.      }
      
      This is the screenshot when the vulnerability is triggered by using
      KASAN. We can see that there is a use-after-free reported by KASAN.
      
      /***************************start**************************/
      
      root@kernel:~# echo 1 > /sys/bus/pci/devices/0000:00:03.0/remove
      [  178.296316] vxge_remove
      [  182.057081]
       ==================================================================
      [  182.057548] BUG: KASAN: use-after-free in vxge_remove+0xe0/0x15c
      [  182.057760] Read of size 8 at addr ffff888006c76598 by task bash/119
      [  182.057983]
      [  182.058747] CPU: 0 PID: 119 Comm: bash Not tainted 5.18.0 #5
      [  182.058919] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
      rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
      [  182.059463] Call Trace:
      [  182.059726]  <TASK>
      [  182.060017]  dump_stack_lvl+0x34/0x44
      [  182.060316]  print_report.cold+0xb2/0x6b7
      [  182.060401]  ? kfree+0x89/0x290
      [  182.060478]  ? vxge_remove+0xe0/0x15c
      [  182.060545]  kasan_report+0xa9/0x120
      [  182.060629]  ? vxge_remove+0xe0/0x15c
      [  182.060706]  vxge_remove+0xe0/0x15c
      [  182.060793]  pci_device_remove+0x5d/0xe0
      [  182.060968]  device_release_driver_internal+0xf1/0x180
      [  182.061063]  pci_stop_bus_device+0xae/0xe0
      [  182.061150]  pci_stop_and_remove_bus_device_locked+0x11/0x20
      [  182.061236]  remove_store+0xc6/0xe0
      [  182.061297]  ? subordinate_bus_number_show+0xc0/0xc0
      [  182.061359]  ? __mutex_lock_slowpath+0x10/0x10
      [  182.061438]  ? sysfs_kf_write+0x6d/0xa0
      [  182.061525]  kernfs_fop_write_iter+0x1b0/0x260
      [  182.061610]  ? sysfs_kf_bin_read+0xf0/0xf0
      [  182.061695]  new_sync_write+0x209/0x310
      [  182.061789]  ? new_sync_read+0x310/0x310
      [  182.061865]  ? cgroup_rstat_updated+0x5c/0x170
      [  182.061937]  ? preempt_count_sub+0xf/0xb0
      [  182.061995]  ? pick_next_entity+0x13a/0x220
      [  182.062063]  ? __inode_security_revalidate+0x44/0x80
      [  182.062155]  ? security_file_permission+0x46/0x2a0
      [  182.062230]  vfs_write+0x33f/0x3e0
      [  182.062303]  ksys_write+0xb4/0x150
      [  182.062369]  ? __ia32_sys_read+0x40/0x40
      [  182.062451]  do_syscall_64+0x3b/0x90
      [  182.062531]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
      [  182.062894] RIP: 0033:0x7f3f37d17274
      [  182.063558] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f
      80 00 00 00 00 48 8d 05 89 54 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f
      05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
      [  182.063797] RSP: 002b:00007ffd5ba9e178 EFLAGS: 00000246
      ORIG_RAX: 0000000000000001
      [  182.064117] RAX: ffffffffffffffda RBX: 0000000000000002
      RCX: 00007f3f37d17274
      [  182.064219] RDX: 0000000000000002 RSI: 000055bbec327180
      RDI: 0000000000000001
      [  182.064315] RBP: 000055bbec327180 R08: 000000000000000a
      R09: 00007f3f37de7cf0
      [  182.064414] R10: 000000000000000a R11: 0000000000000246
      R12: 00007f3f37de8760
      [  182.064513] R13: 0000000000000002 R14: 00007f3f37de3760
      R15: 0000000000000002
      [  182.064691]  </TASK>
      [  182.064916]
      [  182.065224] The buggy address belongs to the physical page:
      [  182.065804] page:00000000ef31e4f4 refcount:0 mapcount:0
      mapping:0000000000000000 index:0x0 pfn:0x6c76
      [  182.067419] flags: 0x100000000000000(node=0|zone=1)
      [  182.068997] raw: 0100000000000000 0000000000000000
      ffffea00001b1d88 0000000000000000
      [  182.069118] raw: 0000000000000000 0000000000000000
      00000000ffffffff 0000000000000000
      [  182.069294] page dumped because: kasan: bad access detected
      [  182.069331]
      [  182.069360] Memory state around the buggy address:
      [  182.070006]  ffff888006c76480: ff ff ff ff ff ff ff ff ff ff ff
       ff ff ff ff ff
      [  182.070136]  ffff888006c76500: ff ff ff ff ff ff ff ff ff ff ff
       ff ff ff ff ff
      [  182.070230] >ffff888006c76580: ff ff ff ff ff ff ff ff ff ff ff
       ff ff ff ff ff
      [  182.070305]                             ^
      [  182.070456]  ffff888006c76600: ff ff ff ff ff ff ff ff ff ff ff
       ff ff ff ff ff
      [  182.070505]  ffff888006c76680: ff ff ff ff ff ff ff ff ff ff ff
       ff ff ff ff ff
      [  182.070606]
      ==================================================================
      [  182.071374] Disabling lock debugging due to kernel taint
      
      /*****************************end*****************************/
      
      After fixing the bug as done in the patch, we can find KASAN do not report
       the bug and the device(00:03.0) has been successfully removed.
      
      /*****************************start***************************/
      
      root@kernel:~# echo 1 > /sys/bus/pci/devices/0000:00:03.0/remove
      root@kernel:~#
      
      /******************************end****************************/
      Signed-off-by: NWentao_Liang <Wentao_Liang_g@163.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      8fc74d18
  2. 18 6月, 2022 8 次提交
  3. 17 6月, 2022 16 次提交
    • R
      ipv4: ping: fix bind address validity check · b4a028c4
      Riccardo Paolo Bestetti 提交于
      Commit 8ff978b8 ("ipv4/raw: support binding to nonlocal addresses")
      introduced a helper function to fold duplicated validity checks of bind
      addresses into inet_addr_valid_or_nonlocal(). However, this caused an
      unintended regression in ping_check_bind_addr(), which previously would
      reject binding to multicast and broadcast addresses, but now these are
      both incorrectly allowed as reported in [1].
      
      This patch restores the original check. A simple reordering is done to
      improve readability and make it evident that multicast and broadcast
      addresses should not be allowed. Also, add an early exit for INADDR_ANY
      which replaces lost behavior added by commit 0ce779a9 ("net: Avoid
      unnecessary inet_addr_type() call when addr is INADDR_ANY").
      
      Furthermore, this patch introduces regression selftests to catch these
      specific cases.
      
      [1] https://lore.kernel.org/netdev/CANP3RGdkAcDyAZoT1h8Gtuu0saq+eOrrTiWbxnOs+5zn+cpyKg@mail.gmail.com/
      
      Fixes: 8ff978b8 ("ipv4/raw: support binding to nonlocal addresses")
      Cc: Miaohe Lin <linmiaohe@huawei.com>
      Reported-by: NMaciej Żenczykowski <maze@google.com>
      Signed-off-by: NCarlos Llamas <cmllamas@google.com>
      Signed-off-by: NRiccardo Paolo Bestetti <pbl@bestov.io>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      b4a028c4
    • X
      hamradio: 6pack: fix array-index-out-of-bounds in decode_std_command() · 2b04495e
      Xu Jia 提交于
      Hulk Robot reports incorrect sp->rx_count_cooked value in decode_std_command().
      This should be caused by the subtracting from sp->rx_count_cooked before.
      It seems that sp->rx_count_cooked value is changed to 0, which bypassed the
      previous judgment.
      
      The situation is shown below:
      
               (Thread 1)			|  (Thread 2)
      decode_std_command()		| resync_tnc()
      ...					|
      if (rest == 2)			|
      	sp->rx_count_cooked -= 2;	|
      else if (rest == 3)			| ...
      					| sp->rx_count_cooked = 0;
      	sp->rx_count_cooked -= 1;	|
      for (i = 0; i < sp->rx_count_cooked; i++) // report error
      	checksum += sp->cooked_buf[i];
      
      sp->rx_count_cooked is a shared variable but is not protected by a lock.
      The same applies to sp->rx_count. This patch adds a lock to fix the bug.
      
      The fail log is shown below:
      =======================================================================
      UBSAN: array-index-out-of-bounds in drivers/net/hamradio/6pack.c:925:31
      index 400 is out of range for type 'unsigned char [400]'
      CPU: 3 PID: 7433 Comm: kworker/u10:1 Not tainted 5.18.0-rc5-00163-g4b97bac0 #2
      Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
      Workqueue: events_unbound flush_to_ldisc
      Call Trace:
       <TASK>
       dump_stack_lvl+0xcd/0x134
       ubsan_epilogue+0xb/0x50
       __ubsan_handle_out_of_bounds.cold+0x62/0x6c
       sixpack_receive_buf+0xfda/0x1330
       tty_ldisc_receive_buf+0x13e/0x180
       tty_port_default_receive_buf+0x6d/0xa0
       flush_to_ldisc+0x213/0x3f0
       process_one_work+0x98f/0x1620
       worker_thread+0x665/0x1080
       kthread+0x2e9/0x3a0
       ret_from_fork+0x1f/0x30
       ...
      Reported-by: NHulk Robot <hulkci@huawei.com>
      Signed-off-by: NXu Jia <xujia39@huawei.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      2b04495e
    • H
      tipc: fix use-after-free Read in tipc_named_reinit · 911600bf
      Hoang Le 提交于
      syzbot found the following issue on:
      ==================================================================
      BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0
      net/tipc/name_distr.c:413
      Read of size 8 at addr ffff88805299a000 by task kworker/1:9/23764
      
      CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted
      5.18.0-rc4-syzkaller-00878-g17d49e6e #0
      Hardware name: Google Compute Engine/Google Compute Engine,
      BIOS Google 01/01/2011
      Workqueue: events tipc_net_finalize_work
      Call Trace:
       <TASK>
       __dump_stack lib/dump_stack.c:88 [inline]
       dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
       print_address_description.constprop.0.cold+0xeb/0x495
      mm/kasan/report.c:313
       print_report mm/kasan/report.c:429 [inline]
       kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
       tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413
       tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138
       process_one_work+0x996/0x1610 kernel/workqueue.c:2289
       worker_thread+0x665/0x1080 kernel/workqueue.c:2436
       kthread+0x2e9/0x3a0 kernel/kthread.c:376
       ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298
       </TASK>
      [...]
      ==================================================================
      
      In the commit
      d966ddcc ("tipc: fix a deadlock when flushing scheduled work"),
      the cancel_work_sync() function just to make sure ONLY the work
      tipc_net_finalize_work() is executing/pending on any CPU completed before
      tipc namespace is destroyed through tipc_exit_net(). But this function
      is not guaranteed the work is the last queued. So, the destroyed instance
      may be accessed in the work which will try to enqueue later.
      
      In order to completely fix, we re-order the calling of cancel_work_sync()
      to make sure the work tipc_net_finalize_work() was last queued and it
      must be completed by calling cancel_work_sync().
      
      Reported-by: syzbot+47af19f3307fc9c5c82e@syzkaller.appspotmail.com
      Fixes: d966ddcc ("tipc: fix a deadlock when flushing scheduled work")
      Acked-by: NJon Maloy <jmaloy@redhat.com>
      Signed-off-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NHoang Le <hoang.h.le@dektech.com.au>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      911600bf
    • J
      veth: Add updating of trans_start · e66e257a
      Jay Vosburgh 提交于
      Since commit 21a75f09 ("bonding: Fix ARP monitor validation"),
      the bonding ARP / ND link monitors depend on the trans_start time to
      determine link availability.  NETIF_F_LLTX drivers must update trans_start
      directly, which veth does not do.  This prevents use of the ARP or ND link
      monitors with veth interfaces in a bond.
      
      	Resolve this by having veth_xmit update the trans_start time.
      Reported-by: NJonathan Toppins <jtoppins@redhat.com>
      Tested-by: NJonathan Toppins <jtoppins@redhat.com>
      Signed-off-by: NJay Vosburgh <jay.vosburgh@canonical.com>
      Fixes: 21a75f09 ("bonding: Fix ARP monitor validation")
      Link: https://lore.kernel.org/netdev/b2fd4147-8f50-bebd-963a-1a3e8d1d9715@redhat.com/Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      e66e257a
    • E
      net: fix data-race in dev_isalive() · cc26c266
      Eric Dumazet 提交于
      dev_isalive() is called under RTNL or dev_base_lock protection.
      
      This means that changes to dev->reg_state should be done with both locks held.
      
      syzbot reported:
      
      BUG: KCSAN: data-race in register_netdevice / type_show
      
      write to 0xffff888144ecf518 of 1 bytes by task 20886 on cpu 0:
      register_netdevice+0xb9f/0xdf0 net/core/dev.c:10050
      lapbeth_new_device drivers/net/wan/lapbether.c:414 [inline]
      lapbeth_device_event+0x4a0/0x6c0 drivers/net/wan/lapbether.c:456
      notifier_call_chain kernel/notifier.c:87 [inline]
      raw_notifier_call_chain+0x53/0xb0 kernel/notifier.c:455
      __dev_notify_flags+0x1d6/0x3a0
      dev_change_flags+0xa2/0xc0 net/core/dev.c:8607
      do_setlink+0x778/0x2230 net/core/rtnetlink.c:2780
      __rtnl_newlink net/core/rtnetlink.c:3546 [inline]
      rtnl_newlink+0x114c/0x16a0 net/core/rtnetlink.c:3593
      rtnetlink_rcv_msg+0x811/0x8c0 net/core/rtnetlink.c:6089
      netlink_rcv_skb+0x13e/0x240 net/netlink/af_netlink.c:2501
      rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:6107
      netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
      netlink_unicast+0x58a/0x660 net/netlink/af_netlink.c:1345
      netlink_sendmsg+0x661/0x750 net/netlink/af_netlink.c:1921
      sock_sendmsg_nosec net/socket.c:714 [inline]
      sock_sendmsg net/socket.c:734 [inline]
      __sys_sendto+0x21e/0x2c0 net/socket.c:2119
      __do_sys_sendto net/socket.c:2131 [inline]
      __se_sys_sendto net/socket.c:2127 [inline]
      __x64_sys_sendto+0x74/0x90 net/socket.c:2127
      do_syscall_x64 arch/x86/entry/common.c:50 [inline]
      do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
      entry_SYSCALL_64_after_hwframe+0x46/0xb0
      
      read to 0xffff888144ecf518 of 1 bytes by task 20423 on cpu 1:
      dev_isalive net/core/net-sysfs.c:38 [inline]
      netdev_show net/core/net-sysfs.c:50 [inline]
      type_show+0x24/0x90 net/core/net-sysfs.c:112
      dev_attr_show+0x35/0x90 drivers/base/core.c:2095
      sysfs_kf_seq_show+0x175/0x240 fs/sysfs/file.c:59
      kernfs_seq_show+0x75/0x80 fs/kernfs/file.c:162
      seq_read_iter+0x2c3/0x8e0 fs/seq_file.c:230
      kernfs_fop_read_iter+0xd1/0x2f0 fs/kernfs/file.c:235
      call_read_iter include/linux/fs.h:2052 [inline]
      new_sync_read fs/read_write.c:401 [inline]
      vfs_read+0x5a5/0x6a0 fs/read_write.c:482
      ksys_read+0xe8/0x1a0 fs/read_write.c:620
      __do_sys_read fs/read_write.c:630 [inline]
      __se_sys_read fs/read_write.c:628 [inline]
      __x64_sys_read+0x3e/0x50 fs/read_write.c:628
      do_syscall_x64 arch/x86/entry/common.c:50 [inline]
      do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
      entry_SYSCALL_64_after_hwframe+0x46/0xb0
      
      value changed: 0x00 -> 0x01
      
      Reported by Kernel Concurrency Sanitizer on:
      CPU: 1 PID: 20423 Comm: udevd Tainted: G W 5.19.0-rc2-syzkaller-dirty #0
      Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
      
      Fixes: 1da177e4 ("Linux-2.6.12-rc2")
      Signed-off-by: NEric Dumazet <edumazet@google.com>
      Reported-by: Nsyzbot <syzkaller@googlegroups.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      cc26c266
    • C
      phy: aquantia: Fix AN when higher speeds than 1G are not advertised · 9b7fd167
      Claudiu Manoil 提交于
      Even when the eth port is resticted to work with speeds not higher than 1G,
      and so the eth driver is requesting the phy (via phylink) to advertise up
      to 1000BASET support, the aquantia phy device is still advertising for 2.5G
      and 5G speeds.
      Clear these advertising defaults when requested.
      
      Cc: Ondrej Spacek <ondrej.spacek@nxp.com>
      Fixes: 09c4c57f ("net: phy: aquantia: add support for auto-negotiation configuration")
      Signed-off-by: NClaudiu Manoil <claudiu.manoil@nxp.com>
      Link: https://lore.kernel.org/r/20220610084037.7625-1-claudiu.manoil@nxp.comSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      9b7fd167
    • A
      Merge branch 'bpf: Fix cookie values for kprobe multi' · a4a8b2ee
      Alexei Starovoitov 提交于
      Jiri Olsa says:
      
      ====================
      
      hi,
      there's bug in kprobe_multi link that makes cookies misplaced when
      using symbols to attach. The reason is that we sort symbols by name
      but not adjacent cookie values. Current test did not find it because
      bpf_fentry_test* are already sorted by name.
      
      v3 changes:
        - fixed kprobe_multi bench test to filter out invalid entries
          from available_filter_functions
      
      v2 changes:
        - rebased on top of bpf/master
        - checking if cookies are defined later in swap function [Andrii]
        - added acks
      
      thanks,
      jirka
      ====================
      Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
      a4a8b2ee
    • J
      selftest/bpf: Fix kprobe_multi bench test · 73006702
      Jiri Olsa 提交于
      With [1] the available_filter_functions file contains records
      starting with __ftrace_invalid_address___ and marking disabled
      entries.
      
      We need to filter them out for the bench test to pass only
      resolvable symbols to kernel.
      
      [1] commit b39181f7 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function")
      
      Fixes: b39181f7 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function")
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Link: https://lore.kernel.org/r/20220615112118.497303-5-jolsa@kernel.orgSigned-off-by: NAlexei Starovoitov <ast@kernel.org>
      73006702
    • J
      bpf: Force cookies array to follow symbols sorting · eb5fb032
      Jiri Olsa 提交于
      When user specifies symbols and cookies for kprobe_multi link
      interface it's very likely the cookies will be misplaced and
      returned to wrong functions (via get_attach_cookie helper).
      
      The reason is that to resolve the provided functions we sort
      them before passing them to ftrace_lookup_symbols, but we do
      not do the same sort on the cookie values.
      
      Fixing this by using sort_r function with custom swap callback
      that swaps cookie values as well.
      
      Fixes: 0236fec5 ("bpf: Resolve symbols with ftrace_lookup_symbols for kprobe multi link")
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Link: https://lore.kernel.org/r/20220615112118.497303-4-jolsa@kernel.orgSigned-off-by: NAlexei Starovoitov <ast@kernel.org>
      eb5fb032
    • J
      ftrace: Keep address offset in ftrace_lookup_symbols · eb1b2985
      Jiri Olsa 提交于
      We want to store the resolved address on the same index as
      the symbol string, because that's the user (bpf kprobe link)
      code assumption.
      
      Also making sure we don't store duplicates that might be
      present in kallsyms.
      Acked-by: NSong Liu <songliubraving@fb.com>
      Acked-by: NSteven Rostedt (Google) <rostedt@goodmis.org>
      Fixes: bed0d9a5 ("ftrace: Add ftrace_lookup_symbols function")
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Link: https://lore.kernel.org/r/20220615112118.497303-3-jolsa@kernel.orgSigned-off-by: NAlexei Starovoitov <ast@kernel.org>
      eb1b2985
    • J
      selftests/bpf: Shuffle cookies symbols in kprobe multi test · ad884853
      Jiri Olsa 提交于
      There's a kernel bug that causes cookies to be misplaced and
      the reason we did not catch this with this test is that we
      provide bpf_fentry_test* functions already sorted by name.
      
      Shuffling function bpf_fentry_test2 deeper in the list and
      keeping the current cookie values as before will trigger
      the bug.
      
      The kernel fix is coming in following changes.
      Acked-by: NSong Liu <songliubraving@fb.com>
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Link: https://lore.kernel.org/r/20220615112118.497303-2-jolsa@kernel.orgSigned-off-by: NAlexei Starovoitov <ast@kernel.org>
      ad884853
    • J
      selftests/bpf: Test tail call counting with bpf2bpf and data on stack · 5e0b0a4c
      Jakub Sitnicki 提交于
      Cover the case when tail call count needs to be passed from BPF function to
      BPF function, and the caller has data on stack. Specifically when the size
      of data allocated on BPF stack is not a multiple on 8.
      Signed-off-by: NJakub Sitnicki <jakub@cloudflare.com>
      Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
      Link: https://lore.kernel.org/bpf/20220616162037.535469-3-jakub@cloudflare.com
      5e0b0a4c
    • J
      bpf, x86: Fix tail call count offset calculation on bpf2bpf call · ff672c67
      Jakub Sitnicki 提交于
      On x86-64 the tail call count is passed from one BPF function to another
      through %rax. Additionally, on function entry, the tail call count value
      is stored on stack right after the BPF program stack, due to register
      shortage.
      
      The stored count is later loaded from stack either when performing a tail
      call - to check if we have not reached the tail call limit - or before
      calling another BPF function call in order to pass it via %rax.
      
      In the latter case, we miscalculate the offset at which the tail call count
      was stored on function entry. The JIT does not take into account that the
      allocated BPF program stack is always a multiple of 8 on x86, while the
      actual stack depth does not have to be.
      
      This leads to a load from an offset that belongs to the BPF stack, as shown
      in the example below:
      
      SEC("tc")
      int entry(struct __sk_buff *skb)
      {
      	/* Have data on stack which size is not a multiple of 8 */
      	volatile char arr[1] = {};
      	return subprog_tail(skb);
      }
      
      int entry(struct __sk_buff * skb):
         0: (b4) w2 = 0
         1: (73) *(u8 *)(r10 -1) = r2
         2: (85) call pc+1#bpf_prog_ce2f79bb5f3e06dd_F
         3: (95) exit
      
      int entry(struct __sk_buff * skb):
         0xffffffffa0201788:  nop    DWORD PTR [rax+rax*1+0x0]
         0xffffffffa020178d:  xor    eax,eax
         0xffffffffa020178f:  push   rbp
         0xffffffffa0201790:  mov    rbp,rsp
         0xffffffffa0201793:  sub    rsp,0x8
         0xffffffffa020179a:  push   rax
         0xffffffffa020179b:  xor    esi,esi
         0xffffffffa020179d:  mov    BYTE PTR [rbp-0x1],sil
         0xffffffffa02017a1:  mov    rax,QWORD PTR [rbp-0x9]	!!! tail call count
         0xffffffffa02017a8:  call   0xffffffffa02017d8       !!! is at rbp-0x10
         0xffffffffa02017ad:  leave
         0xffffffffa02017ae:  ret
      
      Fix it by rounding up the BPF stack depth to a multiple of 8, when
      calculating the tail call count offset on stack.
      
      Fixes: ebf7d1f5 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT")
      Signed-off-by: NJakub Sitnicki <jakub@cloudflare.com>
      Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
      Acked-by: NMaciej Fijalkowski <maciej.fijalkowski@intel.com>
      Acked-by: NDaniel Borkmann <daniel@iogearbox.net>
      Link: https://lore.kernel.org/bpf/20220616162037.535469-2-jakub@cloudflare.com
      ff672c67
    • L
      Merge tag 'net-5.19-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net · 48a23ec6
      Linus Torvalds 提交于
      Pull networking fixes from Jakub Kicinski:
       "Mostly driver fixes.
      
        Current release - regressions:
      
         - Revert "net: Add a second bind table hashed by port and address",
           needs more work
      
         - amd-xgbe: use platform_irq_count(), static setup of IRQ resources
           had been removed from DT core
      
         - dts: at91: ksz9477_evb: add phy-mode to fix port/phy validation
      
        Current release - new code bugs:
      
         - hns3: modify the ring param print info
      
        Previous releases - always broken:
      
         - axienet: make the 64b addressable DMA depends on 64b architectures
      
         - iavf: fix issue with MAC address of VF shown as zero
      
         - ice: fix PTP TX timestamp offset calculation
      
         - usb: ax88179_178a needs FLAG_SEND_ZLP
      
        Misc:
      
         - document some net.sctp.* sysctls"
      
      * tag 'net-5.19-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (31 commits)
        net: axienet: add missing error return code in axienet_probe()
        Revert "net: Add a second bind table hashed by port and address"
        net: ax25: Fix deadlock caused by skb_recv_datagram in ax25_recvmsg
        net: usb: ax88179_178a needs FLAG_SEND_ZLP
        MAINTAINERS: add include/dt-bindings/net to NETWORKING DRIVERS
        ARM: dts: at91: ksz9477_evb: fix port/phy validation
        net: bgmac: Fix an erroneous kfree() in bgmac_remove()
        ice: Fix memory corruption in VF driver
        ice: Fix queue config fail handling
        ice: Sync VLAN filtering features for DVM
        ice: Fix PTP TX timestamp offset calculation
        mlxsw: spectrum_cnt: Reorder counter pools
        docs: networking: phy: Fix a typo
        amd-xgbe: Use platform_irq_count()
        octeontx2-vf: Add support for adaptive interrupt coalescing
        xilinx:  Fix build on x86.
        net: axienet: Use iowrite64 to write all 64b descriptor pointers
        net: axienet: make the 64b addresable DMA depends on 64b archectures
        net: hns3: fix tm port shapping of fibre port is incorrect after driver initialization
        net: hns3: fix PF rss size initialization bug
        ...
      48a23ec6
    • Y
      net: axienet: add missing error return code in axienet_probe() · 2e7bf4a6
      Yang Yingliang 提交于
      It should return error code in error path in axienet_probe().
      
      Fixes: 00be43a7 ("net: axienet: make the 64b addresable DMA depends on 64b archectures")
      Reported-by: NHulk Robot <hulkci@huawei.com>
      Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
      Link: https://lore.kernel.org/r/20220616062917.3601-1-yangyingliang@huawei.comSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      2e7bf4a6
    • J
      Revert "net: Add a second bind table hashed by port and address" · 593d1ebe
      Joanne Koong 提交于
      This reverts:
      
      commit d5a42de8 ("net: Add a second bind table hashed by port and address")
      commit 538aaf9b ("selftests: Add test for timing a bind request to a port with a populated bhash entry")
      Link: https://lore.kernel.org/netdev/20220520001834.2247810-1-kuba@kernel.org/
      
      There are a few things that need to be fixed here:
      * Updating bhash2 in cases where the socket's rcv saddr changes
      * Adding bhash2 hashbucket locks
      
      Links to syzbot reports:
      https://lore.kernel.org/netdev/00000000000022208805e0df247a@google.com/
      https://lore.kernel.org/netdev/0000000000003f33bc05dfaf44fe@google.com/
      
      Fixes: d5a42de8 ("net: Add a second bind table hashed by port and address")
      Reported-by: syzbot+015d756bbd1f8b5c8f09@syzkaller.appspotmail.com
      Reported-by: syzbot+98fd2d1422063b0f8c44@syzkaller.appspotmail.com
      Reported-by: syzbot+0a847a982613c6438fba@syzkaller.appspotmail.com
      Signed-off-by: NJoanne Koong <joannelkoong@gmail.com>
      Link: https://lore.kernel.org/r/20220615193213.2419568-1-joannelkoong@gmail.comSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      593d1ebe
  4. 16 6月, 2022 6 次提交
  5. 15 6月, 2022 9 次提交
    • J
      bpf: Fix request_sock leak in sk lookup helpers · 3046a827
      Jon Maxwell 提交于
      A customer reported a request_socket leak in a Calico cloud environment. We
      found that a BPF program was doing a socket lookup with takes a refcnt on
      the socket and that it was finding the request_socket but returning the parent
      LISTEN socket via sk_to_full_sk() without decrementing the child request socket
      1st, resulting in request_sock slab object leak. This patch retains the
      existing behaviour of returning full socks to the caller but it also decrements
      the child request_socket if one is present before doing so to prevent the leak.
      
      Thanks to Curtis Taylor for all the help in diagnosing and testing this. And
      thanks to Antoine Tenart for the reproducer and patch input.
      
      v2 of this patch contains, refactor as per Daniel Borkmann's suggestions to
      validate RCU flags on the listen socket so that it balances with bpf_sk_release()
      and update comments as per Martin KaFai Lau's suggestion. One small change to
      Daniels suggestion, put "sk = sk2" under "if (sk2 != sk)" to avoid an extra
      instruction.
      
      Fixes: f7355a6c ("bpf: Check sk_fullsock() before returning from bpf_sk_lookup()")
      Fixes: edbf8c01 ("bpf: add skc_lookup_tcp helper")
      Co-developed-by: NAntoine Tenart <atenart@kernel.org>
      Signed-off-by: NAntoine Tenart <atenart@kernel.org>
      Signed-off-by: NJon Maxwell <jmaxwell37@gmail.com>
      Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
      Tested-by: NCurtis Taylor <cutaylor-pub@yahoo.com>
      Cc: Martin KaFai Lau <kafai@fb.com>
      Link: https://lore.kernel.org/bpf/56d6f898-bde0-bb25-3427-12a330b29fb8@iogearbox.net
      Link: https://lore.kernel.org/bpf/20220615011540.813025-1-jmaxwell37@gmail.com
      3046a827
    • D
      net: ax25: Fix deadlock caused by skb_recv_datagram in ax25_recvmsg · 219b51a6
      Duoming Zhou 提交于
      The skb_recv_datagram() in ax25_recvmsg() will hold lock_sock
      and block until it receives a packet from the remote. If the client
      doesn`t connect to server and calls read() directly, it will not
      receive any packets forever. As a result, the deadlock will happen.
      
      The fail log caused by deadlock is shown below:
      
      [  369.606973] INFO: task ax25_deadlock:157 blocked for more than 245 seconds.
      [  369.608919] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
      [  369.613058] Call Trace:
      [  369.613315]  <TASK>
      [  369.614072]  __schedule+0x2f9/0xb20
      [  369.615029]  schedule+0x49/0xb0
      [  369.615734]  __lock_sock+0x92/0x100
      [  369.616763]  ? destroy_sched_domains_rcu+0x20/0x20
      [  369.617941]  lock_sock_nested+0x6e/0x70
      [  369.618809]  ax25_bind+0xaa/0x210
      [  369.619736]  __sys_bind+0xca/0xf0
      [  369.620039]  ? do_futex+0xae/0x1b0
      [  369.620387]  ? __x64_sys_futex+0x7c/0x1c0
      [  369.620601]  ? fpregs_assert_state_consistent+0x19/0x40
      [  369.620613]  __x64_sys_bind+0x11/0x20
      [  369.621791]  do_syscall_64+0x3b/0x90
      [  369.622423]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
      [  369.623319] RIP: 0033:0x7f43c8aa8af7
      [  369.624301] RSP: 002b:00007f43c8197ef8 EFLAGS: 00000246 ORIG_RAX: 0000000000000031
      [  369.625756] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f43c8aa8af7
      [  369.626724] RDX: 0000000000000010 RSI: 000055768e2021d0 RDI: 0000000000000005
      [  369.628569] RBP: 00007f43c8197f00 R08: 0000000000000011 R09: 00007f43c8198700
      [  369.630208] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff845e6afe
      [  369.632240] R13: 00007fff845e6aff R14: 00007f43c8197fc0 R15: 00007f43c8198700
      
      This patch replaces skb_recv_datagram() with an open-coded variant of it
      releasing the socket lock before the __skb_wait_for_more_packets() call
      and re-acquiring it after such call in order that other functions that
      need socket lock could be executed.
      
      what's more, the socket lock will be released only when recvmsg() will
      block and that should produce nicer overall behavior.
      
      Fixes: 1da177e4 ("Linux-2.6.12-rc2")
      Suggested-by: NThomas Osterried <thomas@osterried.de>
      Signed-off-by: NDuoming Zhou <duoming@zju.edu.cn>
      Reported-by: Thomas Habets <thomas@@habets.se>
      Acked-by: NPaolo Abeni <pabeni@redhat.com>
      Reviewed-by: NEric Dumazet <edumazet@google.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      219b51a6
    • J
      net: usb: ax88179_178a needs FLAG_SEND_ZLP · 36a15e1c
      Jose Alonso 提交于
      The extra byte inserted by usbnet.c when
       (length % dev->maxpacket == 0) is causing problems to device.
      
      This patch sets FLAG_SEND_ZLP to avoid this.
      
      Tested with: 0b95:1790 ASIX Electronics Corp. AX88179 Gigabit Ethernet
      
      Problems observed:
      ======================================================================
      1) Using ssh/sshfs. The remote sshd daemon can abort with the message:
         "message authentication code incorrect"
         This happens because the tcp message sent is corrupted during the
         USB "Bulk out". The device calculate the tcp checksum and send a
         valid tcp message to the remote sshd. Then the encryption detects
         the error and aborts.
      2) NETDEV WATCHDOG: ... (ax88179_178a): transmit queue 0 timed out
      3) Stop normal work without any log message.
         The "Bulk in" continue receiving packets normally.
         The host sends "Bulk out" and the device responds with -ECONNRESET.
         (The netusb.c code tx_complete ignore -ECONNRESET)
      Under normal conditions these errors take days to happen and in
      intense usage take hours.
      
      A test with ping gives packet loss, showing that something is wrong:
      ping -4 -s 462 {destination}	# 462 = 512 - 42 - 8
      Not all packets fail.
      My guess is that the device tries to find another packet starting
      at the extra byte and will fail or not depending on the next
      bytes (old buffer content).
      ======================================================================
      Signed-off-by: NJose Alonso <joalonsof@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      36a15e1c
    • D
      Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue · 371de1aa
      David S. Miller 提交于
      Tony Nguyen says:
      
      ====================
      Intel Wired LAN Driver Updates 2022-06-14
      
      This series contains updates to ice driver only.
      
      Michal fixes incorrect Tx timestamp offset calculation for E822 devices.
      
      Roman enforces required VLAN filtering settings for double VLAN mode.
      
      Przemyslaw fixes memory corruption issues with VFs by ensuring
      queues are disabled in the error path of VF queue configuration and to
      disabled VFs during reset.
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      371de1aa
    • L
      MAINTAINERS: add include/dt-bindings/net to NETWORKING DRIVERS · b60377de
      Lukas Bulwahn 提交于
      Maintainers of the directory Documentation/devicetree/bindings/net
      are also the maintainers of the corresponding directory
      include/dt-bindings/net.
      
      Add the file entry for include/dt-bindings/net to the appropriate
      section in MAINTAINERS.
      Signed-off-by: NLukas Bulwahn <lukas.bulwahn@gmail.com>
      Link: https://lore.kernel.org/r/20220613121826.11484-1-lukas.bulwahn@gmail.comSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      b60377de
    • O
      ARM: dts: at91: ksz9477_evb: fix port/phy validation · 56315b6b
      Oleksij Rempel 提交于
      Latest drivers version requires phy-mode to be set. Otherwise we will
      use "NA" mode and the switch driver will invalidate this port mode.
      
      Fixes: 65ac79e1 ("net: dsa: microchip: add the phylink get_caps")
      Signed-off-by: NOleksij Rempel <o.rempel@pengutronix.de>
      Link: https://lore.kernel.org/r/20220610081621.584393-1-o.rempel@pengutronix.deSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      56315b6b
    • C
      net: bgmac: Fix an erroneous kfree() in bgmac_remove() · d7dd6ecc
      Christophe JAILLET 提交于
      'bgmac' is part of a managed resource allocated with bgmac_alloc(). It
      should not be freed explicitly.
      
      Remove the erroneous kfree() from the .remove() function.
      
      Fixes: 34a5102c ("net: bgmac: allocate struct bgmac just once & don't copy it")
      Signed-off-by: NChristophe JAILLET <christophe.jaillet@wanadoo.fr>
      Reviewed-by: NFlorian Fainelli <f.fainelli@gmail.com>
      Link: https://lore.kernel.org/r/a026153108dd21239036a032b95c25b5cece253b.1655153616.git.christophe.jaillet@wanadoo.frSigned-off-by: NJakub Kicinski <kuba@kernel.org>
      d7dd6ecc
    • L
      netfs: fix up netfs_inode_init() docbook comment · 018ab4fa
      Linus Torvalds 提交于
      Commit e81fb419 ("netfs: Further cleanups after struct netfs_inode
      wrapper introduced") changed the argument types and names, and actually
      updated the comment too (although that was thanks to David Howells, not
      me: my original patch only changed the code).
      
      But the comment fixup didn't go quite far enough, and didn't change the
      argument name in the comment, resulting in
      
        include/linux/netfs.h:314: warning: Function parameter or member 'ctx' not described in 'netfs_inode_init'
        include/linux/netfs.h:314: warning: Excess function parameter 'inode' description in 'netfs_inode_init'
      
      during htmldoc generation.
      
      Fixes: e81fb419 ("netfs: Further cleanups after struct netfs_inode wrapper introduced")
      Reported-by: NStephen Rothwell <sfr@canb.auug.org.au>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      018ab4fa
    • P
      ice: Fix memory corruption in VF driver · efe41860
      Przemyslaw Patynowski 提交于
      Disable VF's RX/TX queues, when it's disabled. VF can have queues enabled,
      when it requests a reset. If PF driver assumes that VF is disabled,
      while VF still has queues configured, VF may unmap DMA resources.
      In such scenario device still can map packets to memory, which ends up
      silently corrupting it.
      Previously, VF driver could experience memory corruption, which lead to
      crash:
      [ 5119.170157] BUG: unable to handle kernel paging request at 00001b9780003237
      [ 5119.170166] PGD 0 P4D 0
      [ 5119.170173] Oops: 0002 [#1] PREEMPT_RT SMP PTI
      [ 5119.170181] CPU: 30 PID: 427592 Comm: kworker/u96:2 Kdump: loaded Tainted: G        W I      --------- -  - 4.18.0-372.9.1.rt7.166.el8.x86_64 #1
      [ 5119.170189] Hardware name: Dell Inc. PowerEdge R740/014X06, BIOS 2.3.10 08/15/2019
      [ 5119.170193] Workqueue: iavf iavf_adminq_task [iavf]
      [ 5119.170219] RIP: 0010:__page_frag_cache_drain+0x5/0x30
      [ 5119.170238] Code: 0f 0f b6 77 51 85 f6 74 07 31 d2 e9 05 df ff ff e9 90 fe ff ff 48 8b 05 49 db 33 01 eb b4 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <f0> 29 77 34 74 01 c3 48 8b 07 f6 c4 80 74 0f 0f b6 77 51 85 f6 74
      [ 5119.170244] RSP: 0018:ffffa43b0bdcfd78 EFLAGS: 00010282
      [ 5119.170250] RAX: ffffffff896b3e40 RBX: ffff8fb282524000 RCX: 0000000000000002
      [ 5119.170254] RDX: 0000000049000000 RSI: 0000000000000000 RDI: 00001b9780003203
      [ 5119.170259] RBP: ffff8fb248217b00 R08: 0000000000000022 R09: 0000000000000009
      [ 5119.170262] R10: 2b849d6300000000 R11: 0000000000000020 R12: 0000000000000000
      [ 5119.170265] R13: 0000000000001000 R14: 0000000000000009 R15: 0000000000000000
      [ 5119.170269] FS:  0000000000000000(0000) GS:ffff8fb1201c0000(0000) knlGS:0000000000000000
      [ 5119.170274] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      [ 5119.170279] CR2: 00001b9780003237 CR3: 00000008f3e1a003 CR4: 00000000007726e0
      [ 5119.170283] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      [ 5119.170286] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
      [ 5119.170290] PKRU: 55555554
      [ 5119.170292] Call Trace:
      [ 5119.170298]  iavf_clean_rx_ring+0xad/0x110 [iavf]
      [ 5119.170324]  iavf_free_rx_resources+0xe/0x50 [iavf]
      [ 5119.170342]  iavf_free_all_rx_resources.part.51+0x30/0x40 [iavf]
      [ 5119.170358]  iavf_virtchnl_completion+0xd8a/0x15b0 [iavf]
      [ 5119.170377]  ? iavf_clean_arq_element+0x210/0x280 [iavf]
      [ 5119.170397]  iavf_adminq_task+0x126/0x2e0 [iavf]
      [ 5119.170416]  process_one_work+0x18f/0x420
      [ 5119.170429]  worker_thread+0x30/0x370
      [ 5119.170437]  ? process_one_work+0x420/0x420
      [ 5119.170445]  kthread+0x151/0x170
      [ 5119.170452]  ? set_kthread_struct+0x40/0x40
      [ 5119.170460]  ret_from_fork+0x35/0x40
      [ 5119.170477] Modules linked in: iavf sctp ip6_udp_tunnel udp_tunnel mlx4_en mlx4_core nfp tls vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink bridge stp llc rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc intel_rapl_msr iTCO_wdt iTCO_vendor_support dell_smbios wmi_bmof dell_wmi_descriptor dcdbas kvm_intel kvm irqbypass intel_rapl_common isst_if_common skx_edac irdma nfit libnvdimm x86_pkg_temp_thermal i40e intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ib_uverbs rapl ipmi_ssif intel_cstate intel_uncore mei_me pcspkr acpi_ipmi ib_core mei lpc_ich i2c_i801 ipmi_si ipmi_devintf wmi ipmi_msghandler acpi_power_meter xfs libcrc32c sd_mod t10_pi sg mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ice ahci drm libahci crc32c_intel libata tg3 megaraid_sas
      [ 5119.170613]  i2c_algo_bit dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: iavf]
      [ 5119.170627] CR2: 00001b9780003237
      
      Fixes: ec4f5a43 ("ice: Check if VF is disabled for Opcode and other operations")
      Signed-off-by: NPrzemyslaw Patynowski <przemyslawx.patynowski@intel.com>
      Co-developed-by: NSlawomir Laba <slawomirx.laba@intel.com>
      Signed-off-by: NSlawomir Laba <slawomirx.laba@intel.com>
      Signed-off-by: NMateusz Palczewski <mateusz.palczewski@intel.com>
      Tested-by: NKonrad Jankowski <konrad0.jankowski@intel.com>
      Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
      efe41860