1. 24 10月, 2014 5 次提交
    • D
      Merge branch 'ipv6_route' · fad71e4a
      David S. Miller 提交于
      Martin KaFai Lau says:
      
      ====================
      ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route()
      
      This patch set is trying to reduce the number of fib6_lookup()
      calls from ip6_pol_route().
      
      I have adapted davem's udpflooda and kbench_mod test
      (https://git.kernel.org/pub/scm/linux/kernel/git/davem/net_test_tools.git) to
      support IPv6 and here is the result:
      
      Before:
      [root]# for i in $(seq 1 3); do time ./udpflood -l 20000000 -c 250 2401:face:face:face::2; done
      
      real    0m34.190s
      user    0m3.047s
      sys     0m31.108s
      
      real    0m34.635s
      user    0m3.125s
      sys     0m31.475s
      
      real    0m34.517s
      user    0m3.034s
      sys     0m31.449s
      
      [root]# insmod ip6_route_kbench.ko oif=2 src=2401:face:face:face::1 dst=2401:face:face:face::2
      [  660.160976] ip6_route_kbench: ip6_route_output tdiff: 933
      [  660.207261] ip6_route_kbench: ip6_route_output tdiff: 988
      [  660.253492] ip6_route_kbench: ip6_route_output tdiff: 896
      [  660.298862] ip6_route_kbench: ip6_route_output tdiff: 898
      
      After:
      [root]# for i in $(seq 1 3); do time ./udpflood -l 20000000 -c 250 2401:face:face:face::2; done
      
      real    0m32.695s
      user    0m2.925s
      sys     0m29.737s
      
      real    0m32.636s
      user    0m3.007s
      sys     0m29.596s
      
      real    0m32.797s
      user    0m2.866s
      sys     0m29.898s
      
      [root]# insmod ip6_route_kbench.ko oif=2 src=2401:face:face:face::1 dst=2401:face:face:face::2
      [  881.220793] ip6_route_kbench: ip6_route_output tdiff: 684
      [  881.253477] ip6_route_kbench: ip6_route_output tdiff: 640
      [  881.286867] ip6_route_kbench: ip6_route_output tdiff: 630
      [  881.320749] ip6_route_kbench: ip6_route_output tdiff: 653
      
      /****************************** udpflood.c ******************************/
      /* It is an adaptation of the Eric Dumazet's and David Miller's
       * udpflood tool, by adding IPv6 support.
       */
      
      typedef uint32_t u32;
      
      static int debug =3D 0;
      
      /* Allow -fstrict-aliasing */
      typedef union sa_u {
      	struct sockaddr_storage a46;
      	struct sockaddr_in a4;
      	struct sockaddr_in6 a6;
      } sa_u;
      
      static int usage(void)
      {
      	printf("usage: udpflood [ -l count ] [ -m message_size ] [ -c num_ip_addrs=
       ] IP_ADDRESS\n");
      	return -1;
      }
      
      static u32 get_last32h(const sa_u *sa)
      {
      	if (sa->a46.ss_family =3D=3D PF_INET)
      		return ntohl(sa->a4.sin_addr.s_addr);
      	else
      		return ntohl(sa->a6.sin6_addr.s6_addr32[3]);
      }
      
      static void set_last32h(sa_u *sa, u32 last32h)
      {
      	if (sa->a46.ss_family =3D=3D PF_INET)
      		sa->a4.sin_addr.s_addr =3D htonl(last32h);
      	else
      		sa->a6.sin6_addr.s6_addr32[3] =3D htonl(last32h);
      }
      
      static void print_saddr(const sa_u *sa, const char *msg)
      {
      	char buf[64];
      
      	if (!debug)
      		return;
      
      	switch (sa->a46.ss_family) {
      	case PF_INET:
      		inet_ntop(PF_INET, &(sa->a4.sin_addr.s_addr), buf,
      			  sizeof(buf));
      		break;
      	case PF_INET6:
      		inet_ntop(PF_INET6, &(sa->a6.sin6_addr), buf, sizeof(buf));
      		break;
      	}
      
      	printf("%s: %s\n", msg, buf);
      }
      
      static int send_packets(const sa_u *sa, size_t num_addrs, int count, int ms=
      g_sz)
      {
      	char *msg =3D malloc(msg_sz);
      	sa_u saddr;
      	u32 start_addr32h, end_addr32h, cur_addr32h;
      	int fd, i, err;
      
      	if (!msg)
      		return -ENOMEM;
      
      	memset(msg, 0, msg_sz);
      
      	memcpy(&saddr, sa, sizeof(saddr));
      	cur_addr32h =3D start_addr32h =3D get_last32h(&saddr);
      	end_addr32h =3D start_addr32h + num_addrs;
      
      	fd =3D socket(saddr.a46.ss_family, SOCK_DGRAM, 0);
      	if (fd < 0) {
      		perror("socket");
      		err =3D fd;
      		goto out_nofd;
      	}
      
      	/* connect to avoid the kernel spending time in figuring
      	 * out the source address (i.e pin the src address)
      	 */
      	err =3D connect(fd, (struct sockaddr *) &saddr, sizeof(saddr));
      	if (err < 0) {
      		perror("connect");
      		goto out;
      	}
      
      	print_saddr(&saddr, "start_addr");
      	for (i =3D 0; i < count; i++) {
      		print_saddr(&saddr, "sendto");
      		err =3D sendto(fd, msg, msg_sz, 0, (struct sockaddr *)&saddr,
      			     sizeof(saddr));
      		if (err < 0) {
      			perror("sendto");
      			goto out;
      		}
      
      		if (++cur_addr32h >=3D end_addr32h)
      			cur_addr32h =3D start_addr32h;
      		set_last32h(&saddr, cur_addr32h);
      	}
      
      	err =3D 0;
      out:
      	close(fd);
      out_nofd:
      	free(msg);
      	return err;
      }
      
      int main(int argc, char **argv, char **envp)
      {
      	int port, msg_sz, count, num_addrs, ret;
      
      	sa_u start_addr;
      
      	port =3D 6000;
      	msg_sz =3D 32;
      	count =3D 10000000;
      	num_addrs =3D 1;
      
      	while ((ret =3D getopt(argc, argv, "dl:s:p:c:")) >=3D 0) {
      		switch (ret) {
      		case 'l':
      			sscanf(optarg, "%d", &count);
      			break;
      		case 's':
      			sscanf(optarg, "%d", &msg_sz);
      			break;
      		case 'p':
      			sscanf(optarg, "%d", &port);
      			break;
      		case 'c':
      			sscanf(optarg, "%d", &num_addrs);
      			break;
      		case 'd':
      			debug =3D 1;
      			break;
      		case '?':
      			return usage();
      		}
      	}
      
      	if (num_addrs < 1)
      		return usage();
      
      	if (!argv[optind])
      		return usage();
      
      	start_addr.a4.sin_port =3D htons(port);
      	if (inet_pton(PF_INET, argv[optind], &start_addr.a4.sin_addr))
      		start_addr.a46.ss_family =3D PF_INET;
      	else if (inet_pton(PF_INET6, argv[optind], &start_addr.a6.sin6_addr.s6_add=
      r))
      		start_addr.a46.ss_family =3D PF_INET6;
      	else
      		return usage();
      
      	return send_packets(&start_addr, num_addrs, count, msg_sz);
      }
      
      /****************** ip6_route_kbench_mod.c ******************/
      
      /* We can't just use "get_cycles()" as on some platforms, such
       * as sparc64, that gives system cycles rather than cpu clock
       * cycles.
       */
      
      static inline unsigned long long get_tick(void)
      {
      	unsigned long long t;
      
      	__asm__ __volatile__("rd %%tick, %0" : "=r" (t));
      	return t;
      }
      static inline unsigned long long get_tick(void)
      {
      	unsigned long long t;
      
      	rdtscll(t);
      
      	return t;
      }
      static inline unsigned long long get_tick(void)
      {
      	return get_cycles();
      }
      
      static int flow_oif = DEFAULT_OIF;
      static int flow_iif = DEFAULT_IIF;
      static u32 flow_mark = DEFAULT_MARK;
      static struct in6_addr flow_dst_ip_addr;
      static struct in6_addr flow_src_ip_addr;
      static int flow_tos = DEFAULT_TOS;
      
      static char dst_string[64];
      static char src_string[64];
      
      module_param_string(dst, dst_string, sizeof(dst_string), 0);
      module_param_string(src, src_string, sizeof(src_string), 0);
      
      static int __init flow_setup(void)
      {
      	if (dst_string[0] &&
      	    !in6_pton(dst_string, -1, &flow_dst_ip_addr.s6_addr[0], -1, NULL)) {
      		pr_info("cannot parse \"%s\"\n", dst_string);
      		return -1;
      	}
      
      	if (src_string[0] &&
      	    !in6_pton(src_string, -1, &flow_src_ip_addr.s6_addr[0], -1, NULL)) {
      		pr_info("cannot parse \"%s\"\n", dst_string);
      		return -1;
      	}
      
      	return 0;
      }
      
      module_param_named(oif, flow_oif, int, 0);
      module_param_named(iif, flow_iif, int, 0);
      module_param_named(mark, flow_mark, uint, 0);
      module_param_named(tos, flow_tos, int, 0);
      
      static int warmup_count = DEFAULT_WARMUP_COUNT;
      module_param_named(count, warmup_count, int, 0);
      
      static void flow_init(struct flowi6 *fl6)
      {
      	memset(fl6, 0, sizeof(*fl6));
      	fl6->flowi6_proto = IPPROTO_ICMPV6;
      	fl6->flowi6_oif = flow_oif;
      	fl6->flowi6_iif = flow_iif;
      	fl6->flowi6_mark = flow_mark;
      	fl6->flowi6_tos = flow_tos;
      	fl6->daddr = flow_dst_ip_addr;
      	fl6->saddr = flow_src_ip_addr;
      }
      
      static struct sk_buff * fake_skb_get(void)
      {
      	struct ipv6hdr *hdr;
      	struct sk_buff *skb;
      
      	skb = alloc_skb(4096, GFP_KERNEL);
      	if (!skb) {
      		pr_info("Cannot alloc SKB for test\n");
      		return NULL;
      	}
      	skb->dev = __dev_get_by_index(&init_net, flow_iif);
      	if (skb->dev == NULL) {
      		pr_info("Input device (%d) does not exist\n", flow_iif);
      		goto err;
      	}
      
      	skb_reset_mac_header(skb);
      	skb_reset_network_header(skb);
      	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
      	hdr = ipv6_hdr(skb);
      
      	hdr->priority = 0;
      	hdr->version = 6;
      	memset(hdr->flow_lbl, 0, sizeof(hdr->flow_lbl));
      	hdr->payload_len = htons(sizeof(struct icmp6hdr));
      	hdr->nexthdr = IPPROTO_ICMPV6;
      	hdr->saddr = flow_src_ip_addr;
      	hdr->daddr = flow_dst_ip_addr;
      	skb->protocol = htons(ETH_P_IPV6);
      	skb->mark = flow_mark;
      
      	return skb;
      err:
      	kfree_skb(skb);
      	return NULL;
      }
      
      static void do_full_output_lookup_bench(void)
      {
      	unsigned long long t1, t2, tdiff;
      	struct rt6_info *rt;
      	struct flowi6 fl6;
      	int i;
      
      	rt = NULL;
      
      	for (i = 0; i < warmup_count; i++) {
      		flow_init(&fl6);
      
      		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
      		if (IS_ERR(rt))
      			break;
      		ip6_rt_put(rt);
      	}
      	if (IS_ERR(rt)) {
      		pr_info("ip_route_output_key: err=%ld\n", PTR_ERR(rt));
      		return;
      	}
      
      	flow_init(&fl6);
      
      	t1 = get_tick();
      	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
      	t2 = get_tick();
      	if (!IS_ERR(rt))
      		ip6_rt_put(rt);
      
      	tdiff = t2 - t1;
      	pr_info("ip6_route_output tdiff: %llu\n", tdiff);
      }
      
      static void do_full_input_lookup_bench(void)
      {
      	unsigned long long t1, t2, tdiff;
      	struct sk_buff *skb;
      	struct rt6_info *rt;
      	int err, i;
      
      	skb = fake_skb_get();
      	if (skb == NULL)
      		goto out_free;
      
      	err = 0;
      	local_bh_disable();
      	for (i = 0; i < warmup_count; i++) {
      		ip6_route_input(skb);
      		rt = (struct rt6_info *)skb_dst(skb);
      		err = (!rt || rt == init_net.ipv6.ip6_null_entry);
      		skb_dst_drop(skb);
      		if (err)
      			break;
      	}
      	local_bh_enable();
      
      	if (err) {
      		pr_info("Input route lookup fails\n");
      		goto out_free;
      	}
      
      	local_bh_disable();
      	t1 = get_tick();
      	ip6_route_input(skb);
      	t2 = get_tick();
      	local_bh_enable();
      
      	rt = (struct rt6_info *)skb_dst(skb);
      	err = (!rt || rt == init_net.ipv6.ip6_null_entry);
      	skb_dst_drop(skb);
      	if (err) {
      		pr_info("Input route lookup fails\n");
      		goto out_free;
      	}
      
      	tdiff = t2 - t1;
      	pr_info("ip6_route_input tdiff: %llu\n", tdiff);
      
      out_free:
      	kfree_skb(skb);
      }
      
      static void do_full_lookup_bench(void)
      {
      	if (!flow_iif)
      		do_full_output_lookup_bench();
      	else
      		do_full_input_lookup_bench();
      }
      
      static void do_bench(void)
      {
      	do_full_lookup_bench();
      	do_full_lookup_bench();
      	do_full_lookup_bench();
      	do_full_lookup_bench();
      }
      
      static int __init kbench_init(void)
      {
      	if (flow_setup())
      		return -EINVAL;
      
      	pr_info("flow [IIF(%d),OIF(%d),MARK(0x%08x),D("IP6_FMT"),"
      		"S("IP6_FMT"),TOS(0x%02x)]\n",
      		flow_iif, flow_oif, flow_mark,
      		IP6_PRT(flow_dst_ip_addr),
      		IP6_PRT(flow_src_ip_addr),
      		flow_tos);
      
      	if (!cpu_has_tsc) {
      		pr_err("X86 TSC is required, but is unavailable.\n");
      		return -EINVAL;
      	}
      
      	pr_info("sizeof(struct rt6_info)==%zu\n", sizeof(struct rt6_info));
      
      	do_bench();
      
      	return -ENODEV;
      }
      
      static void __exit kbench_exit(void)
      {
      }
      
      module_init(kbench_init);
      module_exit(kbench_exit);
      MODULE_LICENSE("GPL");
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      fad71e4a
    • M
      ipv6: Avoid redoing fib6_lookup() with reachable = 0 by saving fn · 367efcb9
      Martin KaFai Lau 提交于
      This patch save the fn before doing rt6_backtrack.
      Hence, without redo-ing the fib6_lookup(), saved_fn can be used
      to redo rt6_select() with RT6_LOOKUP_F_REACHABLE off.
      
      Some minor changes I think make sense to review as a single patch:
      * Remove the 'out:' goto label.
      * Remove the 'reachable' variable. Only use the 'strict' variable instead.
      
      After this patch, "failing ip6_ins_rt()" should be the only case that
      requires a redo of fib6_lookup().
      
      Cc: David Miller <davem@davemloft.net>
      Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
      Signed-off-by: NMartin KaFai Lau <kafai@fb.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      367efcb9
    • M
      ipv6: Avoid redoing fib6_lookup() for RTF_CACHE hit case · 94c77bb4
      Martin KaFai Lau 提交于
      When there is a RTF_CACHE hit, no need to redo fib6_lookup()
      with reachable=0.
      
      Cc: David Miller <davem@davemloft.net>
      Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
      Signed-off-by: NMartin KaFai Lau <kafai@fb.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      94c77bb4
    • M
      ipv6: Remove BACKTRACK macro · a3c00e46
      Martin KaFai Lau 提交于
      It is the prep work to reduce the number of calls to fib6_lookup().
      
      The BACKTRACK macro could be hard-to-read and error-prone due to
      its side effects (mainly goto).
      
      This patch is to:
      1. Replace BACKTRACK macro with a function (fib6_backtrack) with the following
         return values:
         * If it is backtrack-able, returns next fn for retry.
         * If it reaches the root, returns NULL.
      2. The caller needs to decide if a backtrack is needed (by testing
         rt == net->ipv6.ip6_null_entry).
      3. Rename the goto labels in ip6_pol_route() to make the next few
         patches easier to read.
      
      Cc: David Miller <davem@davemloft.net>
      Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
      Signed-off-by: NMartin KaFai Lau <kafai@fb.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      a3c00e46
    • K
      net: Remove trailing whitespace in tcp.h icmp.c syncookies.c · 105970f6
      Kenjiro Nakayama 提交于
      Remove trailing whitespace in tcp.h icmp.c syncookies.c
      Signed-off-by: NKenjiro Nakayama <nakayamakenjiro@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      105970f6
  2. 23 10月, 2014 10 次提交
  3. 22 10月, 2014 8 次提交
    • S
      net: sched: initialize bstats syncp · 7c1c97d5
      Sabrina Dubroca 提交于
      Use netdev_alloc_pcpu_stats to allocate percpu stats and initialize syncp.
      
      Fixes: 22e0f8b9 "net: sched: make bstats per cpu and estimator RCU safe"
      Signed-off-by: NSabrina Dubroca <sd@queasysnail.net>
      Acked-by: NCong Wang <cwang@twopensource.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7c1c97d5
    • A
      bpf: fix bug in eBPF verifier · 32bf08a6
      Alexei Starovoitov 提交于
      while comparing for verifier state equivalency the comparison
      was missing a check for uninitialized register.
      Make sure it does so and add a testcase.
      
      Fixes: f1bca824 ("bpf: add search pruning optimization to verifier")
      Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
      Signed-off-by: NAlexei Starovoitov <ast@plumgrid.com>
      Acked-by: NHannes Frederic Sowa <hannes@stressinduktion.org>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      32bf08a6
    • T
      netlink: Re-add locking to netlink_lookup() and seq walker · 78fd1d0a
      Thomas Graf 提交于
      The synchronize_rcu() in netlink_release() introduces unacceptable
      latency. Reintroduce minimal lookup so we can drop the
      synchronize_rcu() until socket destruction has been RCUfied.
      
      Cc: David S. Miller <davem@davemloft.net>
      Cc: Eric Dumazet <eric.dumazet@gmail.com>
      Reported-by: NSteinar H. Gunderson <sgunderson@bigfoot.com>
      Reported-and-tested-by: NHeiko Carstens <heiko.carstens@de.ibm.com>
      Signed-off-by: NThomas Graf <tgraf@suug.ch>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      78fd1d0a
    • Y
      tipc: fix lockdep warning when intra-node messages are delivered · 1a194c2d
      Ying Xue 提交于
      When running tipcTC&tipcTS test suite, below lockdep unsafe locking
      scenario is reported:
      
      [ 1109.997854]
      [ 1109.997988] =================================
      [ 1109.998290] [ INFO: inconsistent lock state ]
      [ 1109.998575] 3.17.0-rc1+ #113 Not tainted
      [ 1109.998762] ---------------------------------
      [ 1109.998762] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
      [ 1109.998762] swapper/7/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
      [ 1109.998762]  (slock-AF_TIPC){+.?...}, at: [<ffffffffa0011969>] tipc_sk_rcv+0x49/0x2b0 [tipc]
      [ 1109.998762] {SOFTIRQ-ON-W} state was registered at:
      [ 1109.998762]   [<ffffffff810a4770>] __lock_acquire+0x6a0/0x1d80
      [ 1109.998762]   [<ffffffff810a6555>] lock_acquire+0x95/0x1e0
      [ 1109.998762]   [<ffffffff81a2d1ce>] _raw_spin_lock+0x3e/0x80
      [ 1109.998762]   [<ffffffffa0011969>] tipc_sk_rcv+0x49/0x2b0 [tipc]
      [ 1109.998762]   [<ffffffffa0004fe8>] tipc_link_xmit+0xa8/0xc0 [tipc]
      [ 1109.998762]   [<ffffffffa000ec6f>] tipc_sendmsg+0x15f/0x550 [tipc]
      [ 1109.998762]   [<ffffffffa000f165>] tipc_connect+0x105/0x140 [tipc]
      [ 1109.998762]   [<ffffffff817676ee>] SYSC_connect+0xae/0xc0
      [ 1109.998762]   [<ffffffff81767b7e>] SyS_connect+0xe/0x10
      [ 1109.998762]   [<ffffffff817a9788>] compat_SyS_socketcall+0xb8/0x200
      [ 1109.998762]   [<ffffffff81a306e5>] sysenter_dispatch+0x7/0x1f
      [ 1109.998762] irq event stamp: 241060
      [ 1109.998762] hardirqs last  enabled at (241060): [<ffffffff8105a4ad>] __local_bh_enable_ip+0x6d/0xd0
      [ 1109.998762] hardirqs last disabled at (241059): [<ffffffff8105a46f>] __local_bh_enable_ip+0x2f/0xd0
      [ 1109.998762] softirqs last  enabled at (241020): [<ffffffff81059a52>] _local_bh_enable+0x22/0x50
      [ 1109.998762] softirqs last disabled at (241021): [<ffffffff8105a626>] irq_exit+0x96/0xc0
      [ 1109.998762]
      [ 1109.998762] other info that might help us debug this:
      [ 1109.998762]  Possible unsafe locking scenario:
      [ 1109.998762]
      [ 1109.998762]        CPU0
      [ 1109.998762]        ----
      [ 1109.998762]   lock(slock-AF_TIPC);
      [ 1109.998762]   <Interrupt>
      [ 1109.998762]     lock(slock-AF_TIPC);
      [ 1109.998762]
      [ 1109.998762]  *** DEADLOCK ***
      [ 1109.998762]
      [ 1109.998762] 2 locks held by swapper/7/0:
      [ 1109.998762]  #0:  (rcu_read_lock){......}, at: [<ffffffff81782dc9>] __netif_receive_skb_core+0x69/0xb70
      [ 1109.998762]  #1:  (rcu_read_lock){......}, at: [<ffffffffa0001c90>] tipc_l2_rcv_msg+0x40/0x260 [tipc]
      [ 1109.998762]
      [ 1109.998762] stack backtrace:
      [ 1109.998762] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 3.17.0-rc1+ #113
      [ 1109.998762] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
      [ 1109.998762]  ffffffff82745830 ffff880016c03828 ffffffff81a209eb 0000000000000007
      [ 1109.998762]  ffff880017b3cac0 ffff880016c03888 ffffffff81a1c5ef 0000000000000001
      [ 1109.998762]  ffff880000000001 ffff880000000000 ffffffff81012d4f 0000000000000000
      [ 1109.998762] Call Trace:
      [ 1109.998762]  <IRQ>  [<ffffffff81a209eb>] dump_stack+0x4e/0x68
      [ 1109.998762]  [<ffffffff81a1c5ef>] print_usage_bug+0x1f1/0x202
      [ 1109.998762]  [<ffffffff81012d4f>] ? save_stack_trace+0x2f/0x50
      [ 1109.998762]  [<ffffffff810a406c>] mark_lock+0x28c/0x2f0
      [ 1109.998762]  [<ffffffff810a3440>] ? print_irq_inversion_bug.part.46+0x1f0/0x1f0
      [ 1109.998762]  [<ffffffff810a467d>] __lock_acquire+0x5ad/0x1d80
      [ 1109.998762]  [<ffffffff810a70dd>] ? trace_hardirqs_on+0xd/0x10
      [ 1109.998762]  [<ffffffff8108ace8>] ? sched_clock_cpu+0x98/0xc0
      [ 1109.998762]  [<ffffffff8108ad2b>] ? local_clock+0x1b/0x30
      [ 1109.998762]  [<ffffffff810a10dc>] ? lock_release_holdtime.part.29+0x1c/0x1a0
      [ 1109.998762]  [<ffffffff8108aa05>] ? sched_clock_local+0x25/0x90
      [ 1109.998762]  [<ffffffffa000dec0>] ? tipc_sk_get+0x60/0x80 [tipc]
      [ 1109.998762]  [<ffffffff810a6555>] lock_acquire+0x95/0x1e0
      [ 1109.998762]  [<ffffffffa0011969>] ? tipc_sk_rcv+0x49/0x2b0 [tipc]
      [ 1109.998762]  [<ffffffff810a6fb6>] ? trace_hardirqs_on_caller+0xa6/0x1c0
      [ 1109.998762]  [<ffffffff81a2d1ce>] _raw_spin_lock+0x3e/0x80
      [ 1109.998762]  [<ffffffffa0011969>] ? tipc_sk_rcv+0x49/0x2b0 [tipc]
      [ 1109.998762]  [<ffffffffa000dec0>] ? tipc_sk_get+0x60/0x80 [tipc]
      [ 1109.998762]  [<ffffffffa0011969>] tipc_sk_rcv+0x49/0x2b0 [tipc]
      [ 1109.998762]  [<ffffffffa00076bd>] tipc_rcv+0x5ed/0x960 [tipc]
      [ 1109.998762]  [<ffffffffa0001d1c>] tipc_l2_rcv_msg+0xcc/0x260 [tipc]
      [ 1109.998762]  [<ffffffffa0001c90>] ? tipc_l2_rcv_msg+0x40/0x260 [tipc]
      [ 1109.998762]  [<ffffffff81783345>] __netif_receive_skb_core+0x5e5/0xb70
      [ 1109.998762]  [<ffffffff81782dc9>] ? __netif_receive_skb_core+0x69/0xb70
      [ 1109.998762]  [<ffffffff81784eb9>] ? dev_gro_receive+0x259/0x4e0
      [ 1109.998762]  [<ffffffff817838f6>] __netif_receive_skb+0x26/0x70
      [ 1109.998762]  [<ffffffff81783acd>] netif_receive_skb_internal+0x2d/0x1f0
      [ 1109.998762]  [<ffffffff81785518>] napi_gro_receive+0xd8/0x240
      [ 1109.998762]  [<ffffffff815bf854>] e1000_clean_rx_irq+0x2c4/0x530
      [ 1109.998762]  [<ffffffff815c1a46>] e1000_clean+0x266/0x9c0
      [ 1109.998762]  [<ffffffff8108ad2b>] ? local_clock+0x1b/0x30
      [ 1109.998762]  [<ffffffff8108aa05>] ? sched_clock_local+0x25/0x90
      [ 1109.998762]  [<ffffffff817842b1>] net_rx_action+0x141/0x310
      [ 1109.998762]  [<ffffffff810bd710>] ? handle_fasteoi_irq+0xe0/0x150
      [ 1109.998762]  [<ffffffff81059fa6>] __do_softirq+0x116/0x4d0
      [ 1109.998762]  [<ffffffff8105a626>] irq_exit+0x96/0xc0
      [ 1109.998762]  [<ffffffff81a30d07>] do_IRQ+0x67/0x110
      [ 1109.998762]  [<ffffffff81a2ee2f>] common_interrupt+0x6f/0x6f
      [ 1109.998762]  <EOI>  [<ffffffff8100d2b7>] ? default_idle+0x37/0x250
      [ 1109.998762]  [<ffffffff8100d2b5>] ? default_idle+0x35/0x250
      [ 1109.998762]  [<ffffffff8100dd1f>] arch_cpu_idle+0xf/0x20
      [ 1109.998762]  [<ffffffff810999fd>] cpu_startup_entry+0x27d/0x4d0
      [ 1109.998762]  [<ffffffff81034c78>] start_secondary+0x188/0x1f0
      
      When intra-node messages are delivered from one process to another
      process, tipc_link_xmit() doesn't disable BH before it directly calls
      tipc_sk_rcv() on process context to forward messages to destination
      socket. Meanwhile, if messages delivered by remote node arrive at the
      node and their destinations are also the same socket, tipc_sk_rcv()
      running on process context might be preempted by tipc_sk_rcv() running
      BH context. As a result, the latter cannot obtain the socket lock as
      the lock was obtained by the former, however, the former has no chance
      to be run as the latter is owning the CPU now, so headlock happens. To
      avoid it, BH should be always disabled in tipc_sk_rcv().
      Signed-off-by: NYing Xue <ying.xue@windriver.com>
      Reviewed-by: NJon Maloy <jon.maloy@ericsson.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      1a194c2d
    • Y
      tipc: fix a potential deadlock · 7b8613e0
      Ying Xue 提交于
      Locking dependency detected below possible unsafe locking scenario:
      
                 CPU0                          CPU1
      T0:  tipc_named_rcv()                tipc_rcv()
      T1:  [grab nametble write lock]*     [grab node lock]*
      T2:  tipc_update_nametbl()           tipc_node_link_up()
      T3:  tipc_nodesub_subscribe()        tipc_nametbl_publish()
      T4:  [grab node lock]*               [grab nametble write lock]*
      
      The opposite order of holding nametbl write lock and node lock on
      above two different paths may result in a deadlock. If we move the
      the updating of the name table after link state named out of node
      lock, the reverse order of holding locks will be eliminated, and
      as a result, the deadlock risk.
      Signed-off-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7b8613e0
    • D
      Merge branch 'enic' · 73829bf6
      David S. Miller 提交于
      Govindarajulu Varadarajan says:
      
      ====================
      enic: Bug fixes
      
      This series fixes the following problem.
      
      Please apply this to net.
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      73829bf6
    • G
      enic: Do not call napi_disable when preemption is disabled. · 39dc90c1
      Govindarajulu Varadarajan 提交于
      In enic_stop, we disable preemption using local_bh_disable(). We disable
      preemption to wait for busy_poll to finish.
      
      napi_disable should not be called here as it might sleep.
      
      Moving napi_disable() call out side of local_bh_disable.
      
      BUG: sleeping function called from invalid context at include/linux/netdevice.h:477
      in_atomic(): 1, irqs_disabled(): 0, pid: 443, name: ifconfig
      INFO: lockdep is turned off.
      Preemption disabled at:[<ffffffffa029c5c4>] enic_rfs_flw_tbl_free+0x34/0xd0 [enic]
      
      CPU: 31 PID: 443 Comm: ifconfig Not tainted 3.17.0-netnext-05504-g59f35b81 #268
      Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
       ffff8800dac10000 ffff88020b8dfcb8 ffffffff8148a57c 0000000000000000
       ffff88020b8dfcd0 ffffffff8107e253 ffff8800dac12a40 ffff88020b8dfd10
       ffffffffa029305b ffff88020b8dfd48 ffff8800dac10000 ffff88020b8dfd48
      Call Trace:
       [<ffffffff8148a57c>] dump_stack+0x4e/0x7a
       [<ffffffff8107e253>] __might_sleep+0x123/0x1a0
       [<ffffffffa029305b>] enic_stop+0xdb/0x4d0 [enic]
       [<ffffffff8138ed7d>] __dev_close_many+0x9d/0xf0
       [<ffffffff8138ef81>] __dev_close+0x31/0x50
       [<ffffffff813974a8>] __dev_change_flags+0x98/0x160
       [<ffffffff81397594>] dev_change_flags+0x24/0x60
       [<ffffffff814085fd>] devinet_ioctl+0x63d/0x710
       [<ffffffff81139c16>] ? might_fault+0x56/0xc0
       [<ffffffff81409ef5>] inet_ioctl+0x65/0x90
       [<ffffffff813768e0>] sock_do_ioctl+0x20/0x50
       [<ffffffff81376ebb>] sock_ioctl+0x20b/0x2e0
       [<ffffffff81197250>] do_vfs_ioctl+0x2e0/0x500
       [<ffffffff81492619>] ? sysret_check+0x22/0x5d
       [<ffffffff81285f23>] ? __this_cpu_preempt_check+0x13/0x20
       [<ffffffff8109fe19>] ? trace_hardirqs_on_caller+0x119/0x270
       [<ffffffff811974ac>] SyS_ioctl+0x3c/0x80
       [<ffffffff814925ed>] system_call_fastpath+0x1a/0x1f
      Signed-off-by: NGovindarajulu Varadarajan <_govind@gmx.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      39dc90c1
    • G
      enic: fix possible deadlock in enic_stop/ enic_rfs_flw_tbl_free · b6931c9b
      Govindarajulu Varadarajan 提交于
      The following warning is shown when spinlock debug is enabled.
      
      This occurs when enic_flow_may_expire timer function is running and
      enic_stop is called on same CPU.
      
      Fix this by using spink_lock_bh().
      
      =================================
      [ INFO: inconsistent lock state ]
      3.17.0-netnext-05504-g59f35b81 #268 Not tainted
      ---------------------------------
      inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
      ifconfig/443 [HC0[0]:SC0[0]:HE1:SE1] takes:
       (&(&enic->rfs_h.lock)->rlock){+.?...}, at:
      enic_rfs_flw_tbl_free+0x34/0xd0 [enic]
      {IN-SOFTIRQ-W} state was registered at:
        [<ffffffff810a25af>] __lock_acquire+0x83f/0x21c0
        [<ffffffff810a45f2>] lock_acquire+0xa2/0xd0
        [<ffffffff814913fc>] _raw_spin_lock+0x3c/0x80
        [<ffffffffa029c3d5>] enic_flow_may_expire+0x25/0x130[enic]
        [<ffffffff810bcd07>] call_timer_fn+0x77/0x100
        [<ffffffff810bd8e3>] run_timer_softirq+0x1e3/0x270
        [<ffffffff8105f9ae>] __do_softirq+0x14e/0x280
        [<ffffffff8105fdae>] irq_exit+0x8e/0xb0
        [<ffffffff8103da0f>] smp_apic_timer_interrupt+0x3f/0x50
        [<ffffffff81493742>] apic_timer_interrupt+0x72/0x80
        [<ffffffff81018143>] default_idle+0x13/0x20
        [<ffffffff81018a6a>] arch_cpu_idle+0xa/0x10
        [<ffffffff81097676>] cpu_startup_entry+0x2c6/0x330
        [<ffffffff8103b7ad>] start_secondary+0x21d/0x290
      irq event stamp: 2997
      hardirqs last  enabled at (2997): [<ffffffff81491865>] _raw_spin_unlock_irqrestore+0x65/0x90
      hardirqs last disabled at (2996): [<ffffffff814915e6>] _raw_spin_lock_irqsave+0x26/0x90
      softirqs last  enabled at (2968): [<ffffffff813b57a3>] dev_deactivate_many+0x213/0x260
      softirqs last disabled at (2966): [<ffffffff813b5783>] dev_deactivate_many+0x1f3/0x260
      
      other info that might help us debug this:
       Possible unsafe locking scenario:
      
             CPU0
             ----
        lock(&(&enic->rfs_h.lock)->rlock);
        <Interrupt>
          lock(&(&enic->rfs_h.lock)->rlock);
      
       *** DEADLOCK ***
      Reported-by: NJan Stancek <jstancek@redhat.com>
      Signed-off-by: NGovindarajulu Varadarajan <_govind@gmx.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      b6931c9b
  4. 21 10月, 2014 4 次提交
    • D
      Merge branch 'gso_encap_fixes' · d10845fc
      David S. Miller 提交于
      Florian Westphal says:
      
      ====================
      net: minor gso encapsulation fixes
      
      The following series fixes a minor bug in the gso segmentation handlers
      when encapsulation offload is used.
      
      Theoretically this could cause kernel panic when the stack tries
      to software-segment such a GRE offload packet, but it looks like there
      is only one affected call site (tbf scheduler) and it handles NULL
      return value.
      
      I've included a followup patch to add IS_ERR_OR_NULL checks where needed.
      
      While looking into this, I also found that size computation of the individual
      segments is incorrect if skb->encapsulation is set.
      
      Please see individual patches for delta vs. v1.
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      d10845fc
    • F
      net: core: handle encapsulation offloads when computing segment lengths · f993bc25
      Florian Westphal 提交于
      if ->encapsulation is set we have to use inner_tcp_hdrlen and add the
      size of the inner network headers too.
      
      This is 'mostly harmless'; tbf might send skb that is slightly over
      quota or drop skb even if it would have fit.
      Signed-off-by: NFlorian Westphal <fw@strlen.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      f993bc25
    • F
      net: make skb_gso_segment error handling more robust · 330966e5
      Florian Westphal 提交于
      skb_gso_segment has three possible return values:
      1. a pointer to the first segmented skb
      2. an errno value (IS_ERR())
      3. NULL.  This can happen when GSO is used for header verification.
      
      However, several callers currently test IS_ERR instead of IS_ERR_OR_NULL
      and would oops when NULL is returned.
      
      Note that these call sites should never actually see such a NULL return
      value; all callers mask out the GSO bits in the feature argument.
      
      However, there have been issues with some protocol handlers erronously not
      respecting the specified feature mask in some cases.
      
      It is preferable to get 'have to turn off hw offloading, else slow' reports
      rather than 'kernel crashes'.
      Signed-off-by: NFlorian Westphal <fw@strlen.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      330966e5
    • F
      net: gso: use feature flag argument in all protocol gso handlers · 1e16aa3d
      Florian Westphal 提交于
      skb_gso_segment() has a 'features' argument representing offload features
      available to the output path.
      
      A few handlers, e.g. GRE, instead re-fetch the features of skb->dev and use
      those instead of the provided ones when handing encapsulation/tunnels.
      
      Depending on dev->hw_enc_features of the output device skb_gso_segment() can
      then return NULL even when the caller has disabled all GSO feature bits,
      as segmentation of inner header thinks device will take care of segmentation.
      
      This e.g. affects the tbf scheduler, which will silently drop GRE-encap GSO skbs
      that did not fit the remaining token quota as the segmentation does not work
      when device supports corresponding hw offload capabilities.
      
      Cc: Pravin B Shelar <pshelar@nicira.com>
      Signed-off-by: NFlorian Westphal <fw@strlen.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      1e16aa3d
  5. 20 10月, 2014 11 次提交
    • D
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf · ce8ec489
      David S. Miller 提交于
      Pablo Neira Ayuso says:
      
      ====================
      netfilter fixes for net
      
      The following patchset contains netfilter fixes for your net tree,
      they are:
      
      1) Fix missing MODULE_LICENSE() in the new nf_reject_ipv{4,6} modules.
      
      2) Restrict nat and masq expressions to the nat chain type. Otherwise,
         users may crash their kernel if they attach a nat/masq rule to a non
         nat chain.
      
      3) Fix hook validation in nft_compat when non-base chains are used.
         Basically, initialize hook_mask to zero.
      
      4) Make sure you use match/targets in nft_compat from the right chain
         type. The existing validation relies on the table name which can be
         avoided by
      
      5) Better netlink attribute validation in nft_nat. This expression has
         to reject the configuration when no address and proto configurations
         are specified.
      
      6) Interpret NFTA_NAT_REG_*_MAX if only if NFTA_NAT_REG_*_MIN is set.
         Yet another sanity check to reject incorrect configurations from
         userspace.
      
      7) Conditional NAT attribute dumping depending on the existing
         configuration.
      ====================
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      ce8ec489
    • I
      ax88179_178a: fix bonding failure · 95ff8868
      Ian Morgan 提交于
      The following patch fixes a bug which causes the ax88179_178a driver to be
      incapable of being added to a bond.
      
      When I brought up the issue with the bonding maintainers, they indicated
      that the real problem was with the NIC driver which must return zero for
      success (of setting the MAC address). I see that several other NIC drivers
      follow that pattern by either simply always returing zero, or by passing
      through a negative (error) result while rewriting any positive return code
      to zero. With that same philisophy applied to the ax88179_178a driver, it
      allows it to work correctly with the bonding driver.
      
      I believe this is suitable for queuing in -stable, as it's a small, simple,
      and obvious fix that corrects a defect with no other known workaround.
      
      This patch is against vanilla 3.17(.0).
      Signed-off-by: NIan Morgan <imorgan@primordial.ca>
      
       drivers/net/usb/ax88179_178a.c |    7 ++++++-
       1 file changed, 6 insertions(+), 1 deletion(-)
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      95ff8868
    • L
      Merge tag 'ntb-3.18' of git://github.com/jonmason/ntb · 61ed53de
      Linus Torvalds 提交于
      Pull ntb (non-transparent bridge) updates from Jon Mason:
       "Add support for Haswell NTB split BARs, a debugfs entry for basic
        debugging info, and some code clean-ups"
      
      * tag 'ntb-3.18' of git://github.com/jonmason/ntb:
        ntb: Adding split BAR support for Haswell platforms
        ntb: use errata flag set via DID to implement workaround
        ntb: conslidate reading of PPD to move platform detection earlier
        ntb: move platform detection to separate function
        NTB: debugfs device entry
      61ed53de
    • L
      Merge branch 'i2c/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux · 278f1d07
      Linus Torvalds 提交于
      Pull i2c updates from Wolfram Sang:
       "Highlights from the I2C subsystem for 3.18:
      
         - new drivers for Axxia AM55xx, and Hisilicon hix5hd2 SoC.
      
         - designware driver gained AMD support, exynos gained exynos7 support
      
        The rest is usual driver stuff.  Hopefully no lowlights this time"
      
      * 'i2c/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
        i2c: i801: Add Device IDs for Intel Sunrise Point PCH
        i2c: hix5hd2: add i2c controller driver
        i2c-imx: Disable the clock on probe failure
        i2c: designware: Add support for AMD I2C controller
        i2c: designware: Rework probe() to get clock a bit later
        i2c: designware: Default to fast mode in case of ACPI
        i2c: axxia: Add I2C driver for AXM55xx
        i2c: exynos: add support for HSI2C module on Exynos7
        i2c: mxs: detect No Slave Ack on SELECT in PIO mode
        i2c: cros_ec: Remove EC_I2C_FLAG_10BIT
        i2c: cros-ec-tunnel: Add of match table
        i2c: rcar: remove sign-compare flaw
        i2c: ismt: Use minimum descriptor size
        i2c: imx: Add arbitration lost check
        i2c: rk3x: Remove unlikely() annotations
        i2c: rcar: check for no IRQ in rcar_i2c_irq()
        i2c: rcar: make rcar_i2c_prepare_msg() *void*
        i2c: rcar: simplify check for last message
        i2c: designware: add support of platform data to set I2C mode
        i2c: designware: add support of I2C standard mode
      278f1d07
    • L
      Merge tag 'sound-fix-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound · d590c6cd
      Linus Torvalds 提交于
      Pull sound fixes from Takashi Iwai:
       "Here are a collection of small fixes after 3.18 merge.
      
        The urgent one is the fix for kernel panics with linked PCM substream
        triggered by the recent nonatomic PCM ops support.  Other two fixes
        (emu10k1 and bebob) are stable fixes, and one easy PCI ID addition for
        a new Intel HD-audio controller"
      
      * tag 'sound-fix-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound:
        ALSA: hda_intel: Add Device IDs for Intel Sunrise Point PCH
        ALSA: emu10k1: Fix deadlock in synth voice lookup
        ALSA: pcm: Fix referred substream in snd_pcm_action_group() unlock loop
        ALSA: bebob: Fix failure to detect source of clock for Terratec Phase 88
      d590c6cd
    • L
      Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input · fb378df5
      Linus Torvalds 提交于
      Pull second round of input updates from Dmitry Torokhov:
       "Mostly simple bug fixes, although we do have one brand new driver for
        Microchip AR1021 i2c touchscreen.
      
        Also there is the change to stop trying to use i8042 active
        multiplexing by default (it is still possible to activate it via
        i8042.nomux=0 on boxes that implement it)"
      
      * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input:
        Input: xpad - add Thrustmaster as Xbox 360 controller vendor
        Input: xpad - add USB ID for Thrustmaster Ferrari 458 Racing Wheel
        Input: max77693-haptic - fix state check in imax77693_haptic_disable()
        Input: xen-kbdfront - free grant table entry in xenkbd_disconnect_backend
        Input: alps - fix v4 button press recognition
        Input: i8042 - disable active multiplexing by default
        Input: i8042 - add noloop quirk for Asus X750LN
        Input: synaptics - gate forcepad support by DMI check
        Input: Add Microchip AR1021 i2c touchscreen
        Input: cros_ec_keyb - add of match table
        Input: serio - avoid negative serio device numbers
        Input: avoid negative input device numbers
        Input: automatically set EV_ABS bit in input_set_abs_params
        Input: adp5588-keys - cancel workqueue in failure path
        Input: opencores-kbd - switch to using managed resources
        Input: evdev - fix EVIOCG{type} ioctl
      fb378df5
    • L
      Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband · 2eb7f910
      Linus Torvalds 提交于
      Pull infiniband/RDMA updates from Roland Dreier:
       - large set of iSER initiator improvements
       - hardware driver fixes for cxgb4, mlx5 and ocrdma
       - small fixes to core midlayer
      
      * tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (47 commits)
        RDMA/cxgb4: Fix ntuple calculation for ipv6 and remove duplicate line
        RDMA/cxgb4: Add missing neigh_release in find_route
        RDMA/cxgb4: Take IPv6 into account for best_mtu and set_emss
        RDMA/cxgb4: Make c4iw_wr_log_size_order static
        IB/core: Fix XRC race condition in ib_uverbs_open_qp
        IB/core: Clear AH attr variable to prevent garbage data
        RDMA/ocrdma: Save the bit environment, spare unncessary parenthesis
        RDMA/ocrdma: The kernel has a perfectly good BIT() macro - use it
        RDMA/ocrdma: Don't memset() buffers we just allocated with kzalloc()
        RDMA/ocrdma: Remove a unused-label warning
        RDMA/ocrdma: Convert kernel VA to PA for mmap in user
        RDMA/ocrdma: Get vlan tag from ib_qp_attrs
        RDMA/ocrdma: Add default GID at index 0
        IB/mlx5, iser, isert: Add Signature API additions
        Target/iser: Centralize ib_sig_domain setting
        IB/iser: Centralize ib_sig_domain settings
        IB/mlx5: Use extended internal signature layout
        IB/iser: Set IP_CSUM as default guard type
        IB/iser: Remove redundant assignment
        IB/mlx5: Use enumerations for PI copy mask
        ...
      2eb7f910
    • L
      Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip · 1f6075f9
      Linus Torvalds 提交于
      Pull more perf updates from Ingo Molnar:
       "A second (and last) round of late coming fixes and changes, almost all
        of them in perf tooling:
      
        User visible tooling changes:
      
         - Add period data column and make it default in 'perf script' (Jiri
           Olsa)
      
         - Add a visual cue for toggle zeroing of samples in 'perf top'
           (Taeung Song)
      
         - Improve callchains when using libunwind (Namhyung Kim)
      
        Tooling fixes and infrastructure changes:
      
         - Fix for double free in 'perf stat' when using some specific invalid
           command line combo (Yasser Shalabi)
      
         - Fix off-by-one bugs in map->end handling (Stephane Eranian)
      
         - Fix off-by-one bug in maps__find(), also related to map->end
           handling (Namhyung Kim)
      
         - Make struct symbol->end be the first addr after the symbol range,
           to make it match the convention used for struct map->end.  (Arnaldo
           Carvalho de Melo)
      
         - Fix perf_evlist__add_pollfd() error handling in 'perf kvm stat
           live' (Jiri Olsa)
      
         - Fix python test build by moving callchain_param to an object linked
           into the python binding (Jiri Olsa)
      
         - Document sysfs events/ interfaces (Cody P Schafer)
      
         - Fix typos in perf/Documentation (Masanari Iida)
      
         - Add missing 'struct option' forward declaration (Arnaldo Carvalho
           de Melo)
      
         - Add option to copy events when queuing for sorting across cpu
           buffers and enable it for 'perf kvm stat live', to avoid having
           events left in the queue pointing to the ring buffer be rewritten
           in high volume sessions.  (Alexander Yarygin, improving work done
           by David Ahern):
      
         - Do not include a struct hists per perf_evsel, untangling the
           histogram code from perf_evsel, to pave the way for exporting a
           minimalistic tools/lib/api/perf/ library usable by tools/perf and
           initially by the rasd daemon being developed by Borislav Petkov,
           Robert Richter and Jean Pihet.  (Arnaldo Carvalho de Melo)
      
         - Make perf_evlist__open(evlist, NULL, NULL), i.e. without cpu and
           thread maps mean syswide monitoring, reducing the boilerplate for
           tools that only want system wide mode.  (Arnaldo Carvalho de Melo)
      
         - Move exit stuff from perf_evsel__delete to perf_evsel__exit, delete
           should be just a front end for exit + free (Arnaldo Carvalho de
           Melo)
      
         - Add support to new style format of kernel PMU event.  (Kan Liang)
      
        and other misc fixes"
      
      * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
        perf script: Add period as a default output column
        perf script: Add period data column
        perf evsel: No need to drag util/cgroup.h
        perf evlist: Add missing 'struct option' forward declaration
        perf evsel: Move exit stuff from __delete to __exit
        kprobes/x86: Remove stale ARCH_SUPPORTS_KPROBES_ON_FTRACE define
        perf kvm stat live: Enable events copying
        perf session: Add option to copy events when queueing
        perf Documentation: Fix typos in perf/Documentation
        perf trace: Use thread_{,_set}_priv helpers
        perf kvm: Use thread_{,_set}_priv helpers
        perf callchain: Create an address space per thread
        perf report: Set callchain_param.record_mode for future use
        perf evlist: Fix for double free in tools/perf stat
        perf test: Add test case for pmu event new style format
        perf tools: Add support to new style format of kernel PMU event
        perf tools: Parse the pmu event prefix and suffix
        Revert "perf tools: Default to cpu// for events v5"
        perf Documentation: Remove Ruplicated docs for powerpc cpu specific events
        perf Documentation: sysfs events/ interfaces
        ...
      1f6075f9
    • L
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc · 5e2ee7cd
      Linus Torvalds 提交于
      Pull sparc fixes from David Miller:
       "Here we have two bug fixes:
      
        1) The current thread's fault_code is not setup properly upon entry to
           do_sparc64_fault() in some paths, leading to spurious SIGBUS.
      
        2) Don't use a zero length array at the end of thread_info on sparc64,
           otherwise end_of_stack() isn't right"
      
      * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
        sparc64: Do not define thread fpregs save area as zero-length array.
        sparc64: Fix corrupted thread fault code.
      5e2ee7cd
    • L
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net · e25b4927
      Linus Torvalds 提交于
      Pull networking fixes from David Miller:
       "A quick batch of bug fixes:
      
        1) Fix build with IPV6 disabled, from Eric Dumazet.
      
        2) Several more cases of caching SKB data pointers across calls to
           pskb_may_pull(), thus referencing potentially free'd memory.  From
           Li RongQing.
      
        3) DSA phy code tests operation presence improperly, instead of going:
      
              if (x->ops->foo)
                      r = x->ops->foo(args);
      
           it was going:
      
              if (x->ops->foo(args))
                      r = x->ops->foo(args);
      
         Fix from Andew Lunn"
      
      * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
        Net: DSA: Fix checking for get_phy_flags function
        ipv6: fix a potential use after free in sit.c
        ipv6: fix a potential use after free in ip6_offload.c
        ipv4: fix a potential use after free in gre_offload.c
        tcp: fix build error if IPv6 is not enabled
      e25b4927
    • A
      Net: DSA: Fix checking for get_phy_flags function · 228b16cb
      Andrew Lunn 提交于
      The check for the presence or not of the optional switch function
      get_phy_flags() called the function, rather than checked to see if it
      is a NULL pointer. This causes a derefernce of a NULL pointer on all
      switch chips except the sf2, the only switch to implement this call.
      Signed-off-by: NAndrew Lunn <andrew@lunn.ch>
      Fixes: 6819563e ("net: dsa: allow switch drivers to specify phy_device::dev_flags")
      Cc: Florian Fainelli <f.fainelli@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      228b16cb
  6. 19 10月, 2014 2 次提交
    • D
      sparc64: Do not define thread fpregs save area as zero-length array. · e2653143
      David S. Miller 提交于
      This breaks the stack end corruption detection facility.
      
      What that facility does it write a magic value to "end_of_stack()"
      and checking to see if it gets overwritten.
      
      "end_of_stack()" is "task_thread_info(p) + 1", which for sparc64 is
      the beginning of the FPU register save area.
      
      So once the user uses the FPU, the magic value is overwritten and the
      debug checks trigger.
      
      Fix this by making the size explicit.
      
      Due to the size we use for the fpsaved[], gsr[], and xfsr[] arrays we
      are limited to 7 levels of FPU state saves.  So each FPU register set
      is 256 bytes, allocate 256 * 7 for the fpregs area.
      Reported-by: NMeelis Roos <mroos@linux.ee>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      e2653143
    • D
      sparc64: Fix corrupted thread fault code. · 84bd6d8b
      David S. Miller 提交于
      Every path that ends up at do_sparc64_fault() must install a valid
      FAULT_CODE_* bitmask in the per-thread fault code byte.
      
      Two paths leading to the label winfix_trampoline (which expects the
      FAULT_CODE_* mask in register %g4) were not doing so:
      
      1) For pre-hypervisor TLB protection violation traps, if we took
         the 'winfix_trampoline' path we wouldn't have %g4 initialized
         with the FAULT_CODE_* value yet.  Resulting in using the
         TLB_TAG_ACCESS register address value instead.
      
      2) In the TSB miss path, when we notice that we are going to use a
         hugepage mapping, but we haven't allocated the hugepage TSB yet, we
         still have to take the window fixup case into consideration and
         in that particular path we leave %g4 not setup properly.
      
      Errors on this sort were largely invisible previously, but after
      commit 4ccb9272 ("sparc64: sun4v TLB
      error power off events") we now have a fault_code mask bit
      (FAULT_CODE_BAD_RA) that triggers due to this bug.
      
      FAULT_CODE_BAD_RA triggers because this bit is set in TLB_TAG_ACCESS
      (see #1 above) and thus we get seemingly random bus errors triggered
      for user processes.
      
      Fixes: 4ccb9272 ("sparc64: sun4v TLB error power off events")
      Reported-by: NMeelis Roos <mroos@linux.ee>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      84bd6d8b