1. 26 4月, 2018 2 次提交
  2. 25 4月, 2018 11 次提交
  3. 24 4月, 2018 14 次提交
  4. 23 4月, 2018 13 次提交
    • X
      bonding: do not set slave_dev npinfo before slave_enable_netpoll in bond_enslave · ddea788c
      Xin Long 提交于
      After Commit 8a8efa22 ("bonding: sync netpoll code with bridge"), it
      would set slave_dev npinfo in slave_enable_netpoll when enslaving a dev
      if bond->dev->npinfo was set.
      
      However now slave_dev npinfo is set with bond->dev->npinfo before calling
      slave_enable_netpoll. With slave_dev npinfo set, __netpoll_setup called
      in slave_enable_netpoll will not call slave dev's .ndo_netpoll_setup().
      It causes that the lower dev of this slave dev can't set its npinfo.
      
      One way to reproduce it:
      
        # modprobe bonding
        # brctl addbr br0
        # brctl addif br0 eth1
        # ifconfig bond0 192.168.122.1/24 up
        # ifenslave bond0 eth2
        # systemctl restart netconsole
        # ifenslave bond0 br0
        # ifconfig eth2 down
        # systemctl restart netconsole
      
      The netpoll won't really work.
      
      This patch is to remove that slave_dev npinfo setting in bond_enslave().
      
      Fixes: 8a8efa22 ("bonding: sync netpoll code with bridge")
      Signed-off-by: NXin Long <lucien.xin@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      ddea788c
    • J
      tcp: don't read out-of-bounds opsize · 7e5a206a
      Jann Horn 提交于
      The old code reads the "opsize" variable from out-of-bounds memory (first
      byte behind the segment) if a broken TCP segment ends directly after an
      opcode that is neither EOL nor NOP.
      
      The result of the read isn't used for anything, so the worst thing that
      could theoretically happen is a pagefault; and since the physmap is usually
      mostly contiguous, even that seems pretty unlikely.
      
      The following C reproducer triggers the uninitialized read - however, you
      can't actually see anything happen unless you put something like a
      pr_warn() in tcp_parse_md5sig_option() to print the opsize.
      
      ====================================
      #define _GNU_SOURCE
      #include <arpa/inet.h>
      #include <stdlib.h>
      #include <errno.h>
      #include <stdarg.h>
      #include <net/if.h>
      #include <linux/if.h>
      #include <linux/ip.h>
      #include <linux/tcp.h>
      #include <linux/in.h>
      #include <linux/if_tun.h>
      #include <err.h>
      #include <sys/types.h>
      #include <sys/stat.h>
      #include <fcntl.h>
      #include <string.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <sys/ioctl.h>
      #include <assert.h>
      
      void systemf(const char *command, ...) {
        char *full_command;
        va_list ap;
        va_start(ap, command);
        if (vasprintf(&full_command, command, ap) == -1)
          err(1, "vasprintf");
        va_end(ap);
        printf("systemf: <<<%s>>>\n", full_command);
        system(full_command);
      }
      
      char *devname;
      
      int tun_alloc(char *name) {
        int fd = open("/dev/net/tun", O_RDWR);
        if (fd == -1)
          err(1, "open tun dev");
        static struct ifreq req = { .ifr_flags = IFF_TUN|IFF_NO_PI };
        strcpy(req.ifr_name, name);
        if (ioctl(fd, TUNSETIFF, &req))
          err(1, "TUNSETIFF");
        devname = req.ifr_name;
        printf("device name: %s\n", devname);
        return fd;
      }
      
      #define IPADDR(a,b,c,d) (((a)<<0)+((b)<<8)+((c)<<16)+((d)<<24))
      
      void sum_accumulate(unsigned int *sum, void *data, int len) {
        assert((len&2)==0);
        for (int i=0; i<len/2; i++) {
          *sum += ntohs(((unsigned short *)data)[i]);
        }
      }
      
      unsigned short sum_final(unsigned int sum) {
        sum = (sum >> 16) + (sum & 0xffff);
        sum = (sum >> 16) + (sum & 0xffff);
        return htons(~sum);
      }
      
      void fix_ip_sum(struct iphdr *ip) {
        unsigned int sum = 0;
        sum_accumulate(&sum, ip, sizeof(*ip));
        ip->check = sum_final(sum);
      }
      
      void fix_tcp_sum(struct iphdr *ip, struct tcphdr *tcp) {
        unsigned int sum = 0;
        struct {
          unsigned int saddr;
          unsigned int daddr;
          unsigned char pad;
          unsigned char proto_num;
          unsigned short tcp_len;
        } fakehdr = {
          .saddr = ip->saddr,
          .daddr = ip->daddr,
          .proto_num = ip->protocol,
          .tcp_len = htons(ntohs(ip->tot_len) - ip->ihl*4)
        };
        sum_accumulate(&sum, &fakehdr, sizeof(fakehdr));
        sum_accumulate(&sum, tcp, tcp->doff*4);
        tcp->check = sum_final(sum);
      }
      
      int main(void) {
        int tun_fd = tun_alloc("inject_dev%d");
        systemf("ip link set %s up", devname);
        systemf("ip addr add 192.168.42.1/24 dev %s", devname);
      
        struct {
          struct iphdr ip;
          struct tcphdr tcp;
          unsigned char tcp_opts[20];
        } __attribute__((packed)) syn_packet = {
          .ip = {
            .ihl = sizeof(struct iphdr)/4,
            .version = 4,
            .tot_len = htons(sizeof(syn_packet)),
            .ttl = 30,
            .protocol = IPPROTO_TCP,
            /* FIXUP check */
            .saddr = IPADDR(192,168,42,2),
            .daddr = IPADDR(192,168,42,1)
          },
          .tcp = {
            .source = htons(1),
            .dest = htons(1337),
            .seq = 0x12345678,
            .doff = (sizeof(syn_packet.tcp)+sizeof(syn_packet.tcp_opts))/4,
            .syn = 1,
            .window = htons(64),
            .check = 0 /*FIXUP*/
          },
          .tcp_opts = {
            /* INVALID: trailing MD5SIG opcode after NOPs */
            1, 1, 1, 1, 1,
            1, 1, 1, 1, 1,
            1, 1, 1, 1, 1,
            1, 1, 1, 1, 19
          }
        };
        fix_ip_sum(&syn_packet.ip);
        fix_tcp_sum(&syn_packet.ip, &syn_packet.tcp);
        while (1) {
          int write_res = write(tun_fd, &syn_packet, sizeof(syn_packet));
          if (write_res != sizeof(syn_packet))
            err(1, "packet write failed");
        }
      }
      ====================================
      
      Fixes: cfb6eeb4 ("[TCP]: MD5 Signature Option (RFC2385) support.")
      Signed-off-by: NJann Horn <jannh@google.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7e5a206a
    • J
      dma-mapping: postpone cpu addr translation on mmap · 60695be2
      Jacopo Mondi 提交于
      Postpone calling virt_to_page() translation on memory locations not
      guaranteed to be backed by a struct page.  Try first to map memory from
      the device coherent memory pool, then perform translation if that fails.
      
      On some architectures, specifically SH when configured with the SPARSEMEM
      memory model, assuming a struct page is always assigned to a memory
      address lead to unexpected hangs during the virtual to page address
      translation. This patch fixes that specific issue but applies in the
      general case too.
      Suggested-by: NLaurent Pinchart <laurent.pinchart@ideasonboard.com>
      Signed-off-by: NJacopo Mondi <jacopo+renesas@jmondi.org>
      Reviewed-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      60695be2
    • R
      dma-coherent: clarify dma_mmap_from_dev_coherent documentation · 41d0bbc7
      Robin Murphy 提交于
      The use of "correctly mapped" here is misleading, since it can give the
      wrong expectation in the case that the memory *should* have been mapped
      from the per-device pool, but doing so failed for other reasons.
      Signed-off-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      41d0bbc7
    • T
      dma-direct: don't retry allocation for no-op GFP_DMA · 504a918e
      Takashi Iwai 提交于
      When an allocation with lower dma_coherent mask fails, dma_direct_alloc()
      retries the allocation with GFP_DMA.  But, this is useless for
      architectures that hav no ZONE_DMA.
      
      Fix it by adding the check of CONFIG_ZONE_DMA before retrying the
      allocation.
      
      Fixes: 95f18391 ("dma-direct: retry allocations using GFP_DMA for small masks")
      Signed-off-by: NTakashi Iwai <tiwai@suse.de>
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      504a918e
    • L
      Linux 4.17-rc2 · 6d08b06e
      Linus Torvalds 提交于
      6d08b06e
    • D
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf · 986e54cd
      David S. Miller 提交于
      Daniel Borkmann says:
      
      ====================
      pull-request: bpf 2018-04-21
      
      The following pull-request contains BPF updates for your *net* tree.
      
      The main changes are:
      
      1) Fix a deadlock between mm->mmap_sem and bpf_event_mutex when
         one task is detaching a BPF prog via perf_event_detach_bpf_prog()
         and another one dumping through bpf_prog_array_copy_info(). For
         the latter we move the copy_to_user() out of the bpf_event_mutex
         lock to fix it, from Yonghong.
      
      2) Fix test_sock and test_sock_addr.sh failures. The former was
         hitting rlimit issues and the latter required ping to specify
         the address family, from Yonghong.
      
      3) Remove a dead check in sockmap's sock_map_alloc(), from Jann.
      
      4) Add generated files to BPF kselftests gitignore that were previously
         missed, from Anders.
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      986e54cd
    • T
      ibmvnic: Clean actual number of RX or TX pools · 660e309d
      Thomas Falcon 提交于
      Avoid using value stored in the login response buffer when
      cleaning TX and RX buffer pools since these could be inconsistent
      depending on the device state. Instead use the field in the driver's
      private data that tracks the number of active pools.
      Signed-off-by: NThomas Falcon <tlfalcon@linux.vnet.ibm.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      660e309d
    • D
      Merge branch 'net-sched-ife-malformed-ife-packet-fixes' · 906cce04
      David S. Miller 提交于
      Alexander Aring says:
      
      ====================
      net: sched: ife: malformed ife packet fixes
      
      As promised at netdev 2.2 tc workshop I am working on adding scapy support for
      tdc testing. It is still work in progress. I will submit the patches to tdc
      later (they are not in good shape yet). The good news is I have been able to
      find bugs which normal packet testing would not be able to find.
      With fuzzy testing I was able to craft certain malformed packets that IFE
      action was not able to deal with. This patch set fixes those bugs.
      
      changes since v4:
       - use pskb_may_pull before pointer assign
      
      changes since v3:
       - use pskb_may_pull
      
      changes since v2:
       - remove inline from __ife_tlv_meta_valid
       - add const to cast to meta_tlvhdr
       - add acked and reviewed tags
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      906cce04
    • A
      net: sched: ife: check on metadata length · d57493d6
      Alexander Aring 提交于
      This patch checks if sk buffer is available to dererence ife header. If
      not then NULL will returned to signal an malformed ife packet. This
      avoids to crashing the kernel from outside.
      Signed-off-by: NAlexander Aring <aring@mojatatu.com>
      Reviewed-by: NYotam Gigi <yotam.gi@gmail.com>
      Acked-by: NJamal Hadi Salim <jhs@mojatatu.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      d57493d6
    • A
      net: sched: ife: handle malformed tlv length · cc74eddd
      Alexander Aring 提交于
      There is currently no handling to check on a invalid tlv length. This
      patch adds such handling to avoid killing the kernel with a malformed
      ife packet.
      Signed-off-by: NAlexander Aring <aring@mojatatu.com>
      Reviewed-by: NYotam Gigi <yotam.gi@gmail.com>
      Acked-by: NJamal Hadi Salim <jhs@mojatatu.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      cc74eddd
    • A
      net: sched: ife: signal not finding metaid · f6cd1453
      Alexander Aring 提交于
      We need to record stats for received metadata that we dont know how
      to process. Have find_decode_metaid() return -ENOENT to capture this.
      Signed-off-by: NAlexander Aring <aring@mojatatu.com>
      Reviewed-by: NYotam Gigi <yotam.gi@gmail.com>
      Acked-by: NJamal Hadi Salim <jhs@mojatatu.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      f6cd1453
    • D
      strparser: Do not call mod_delayed_work with a timeout of LONG_MAX · 7c5aba21
      Doron Roberts-Kedes 提交于
      struct sock's sk_rcvtimeo is initialized to
      LONG_MAX/MAX_SCHEDULE_TIMEOUT in sock_init_data. Calling
      mod_delayed_work with a timeout of LONG_MAX causes spurious execution of
      the work function. timer->expires is set equal to jiffies + LONG_MAX.
      When timer_base->clk falls behind the current value of jiffies,
      the delta between timer_base->clk and jiffies + LONG_MAX causes the
      expiration to be in the past. Returning early from strp_start_timer if
      timeo == LONG_MAX solves this problem.
      
      Found while testing net/tls_sw recv path.
      
      Fixes: 43a0c675 ("strparser: Stream parser for messages")
      Reviewed-by: NTejun Heo <tj@kernel.org>
      Signed-off-by: NDoron Roberts-Kedes <doronrk@fb.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7c5aba21