1. 16 5月, 2014 9 次提交
    • J
      vti6: delete unneeded call to netdev_priv · 112a3513
      Julia Lawall 提交于
      Netdev_priv is an accessor function, and has no purpose if its result is
      not used.
      
      A simplified version of the semantic match that fixes this problem is as
      follows: (http://coccinelle.lip6.fr/)
      
      // <smpl>
      @@ local idexpression x; @@
      -x = netdev_priv(...);
      ... when != x
      // </smpl>
      Signed-off-by: NJulia Lawall <Julia.Lawall@lip6.fr>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      112a3513
    • J
      ip_tunnel: delete unneeded call to netdev_priv · 4929fd8c
      Julia Lawall 提交于
      Netdev_priv is an accessor function, and has no purpose if its result is
      not used.
      
      A simplified version of the semantic match that fixes this problem is as
      follows: (http://coccinelle.lip6.fr/)
      
      // <smpl>
      @@ local idexpression x; @@
      -x = netdev_priv(...);
      ... when != x
      // </smpl>
      Signed-off-by: NJulia Lawall <Julia.Lawall@lip6.fr>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      4929fd8c
    • A
      net: filter: x86: internal BPF JIT · 62258278
      Alexei Starovoitov 提交于
      Maps all internal BPF instructions into x86_64 instructions.
      This patch replaces original BPF x64 JIT with internal BPF x64 JIT.
      sysctl net.core.bpf_jit_enable is reused as on/off switch.
      
      Performance:
      
      1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code.
        No performance difference is observed for filters that were JIT-able before
      
      Example assembler code for BPF filter "tcpdump port 22"
      
      original BPF -> old JIT:            original BPF -> internal BPF -> new JIT:
         0:   push   %rbp                      0:     push   %rbp
         1:   mov    %rsp,%rbp                 1:     mov    %rsp,%rbp
         4:   sub    $0x60,%rsp                4:     sub    $0x228,%rsp
         8:   mov    %rbx,-0x8(%rbp)           b:     mov    %rbx,-0x228(%rbp) // prologue
                                              12:     mov    %r13,-0x220(%rbp)
                                              19:     mov    %r14,-0x218(%rbp)
                                              20:     mov    %r15,-0x210(%rbp)
                                              27:     xor    %eax,%eax         // clear A
         c:   xor    %ebx,%ebx                29:     xor    %r13,%r13         // clear X
         e:   mov    0x68(%rdi),%r9d          2c:     mov    0x68(%rdi),%r9d
        12:   sub    0x6c(%rdi),%r9d          30:     sub    0x6c(%rdi),%r9d
        16:   mov    0xd8(%rdi),%r8           34:     mov    0xd8(%rdi),%r10
                                              3b:     mov    %rdi,%rbx
        1d:   mov    $0xc,%esi                3e:     mov    $0xc,%esi
        22:   callq  0xffffffffe1021e15       43:     callq  0xffffffffe102bd75
        27:   cmp    $0x86dd,%eax             48:     cmp    $0x86dd,%rax
        2c:   jne    0x0000000000000069       4f:     jne    0x000000000000009a
        2e:   mov    $0x14,%esi               51:     mov    $0x14,%esi
        33:   callq  0xffffffffe1021e31       56:     callq  0xffffffffe102bd91
        38:   cmp    $0x84,%eax               5b:     cmp    $0x84,%rax
        3d:   je     0x0000000000000049       62:     je     0x0000000000000074
        3f:   cmp    $0x6,%eax                64:     cmp    $0x6,%rax
        42:   je     0x0000000000000049       68:     je     0x0000000000000074
        44:   cmp    $0x11,%eax               6a:     cmp    $0x11,%rax
        47:   jne    0x00000000000000c6       6e:     jne    0x0000000000000117
        49:   mov    $0x36,%esi               74:     mov    $0x36,%esi
        4e:   callq  0xffffffffe1021e15       79:     callq  0xffffffffe102bd75
        53:   cmp    $0x16,%eax               7e:     cmp    $0x16,%rax
        56:   je     0x00000000000000bf       82:     je     0x0000000000000110
        58:   mov    $0x38,%esi               88:     mov    $0x38,%esi
        5d:   callq  0xffffffffe1021e15       8d:     callq  0xffffffffe102bd75
        62:   cmp    $0x16,%eax               92:     cmp    $0x16,%rax
        65:   je     0x00000000000000bf       96:     je     0x0000000000000110
        67:   jmp    0x00000000000000c6       98:     jmp    0x0000000000000117
        69:   cmp    $0x800,%eax              9a:     cmp    $0x800,%rax
        6e:   jne    0x00000000000000c6       a1:     jne    0x0000000000000117
        70:   mov    $0x17,%esi               a3:     mov    $0x17,%esi
        75:   callq  0xffffffffe1021e31       a8:     callq  0xffffffffe102bd91
        7a:   cmp    $0x84,%eax               ad:     cmp    $0x84,%rax
        7f:   je     0x000000000000008b       b4:     je     0x00000000000000c2
        81:   cmp    $0x6,%eax                b6:     cmp    $0x6,%rax
        84:   je     0x000000000000008b       ba:     je     0x00000000000000c2
        86:   cmp    $0x11,%eax               bc:     cmp    $0x11,%rax
        89:   jne    0x00000000000000c6       c0:     jne    0x0000000000000117
        8b:   mov    $0x14,%esi               c2:     mov    $0x14,%esi
        90:   callq  0xffffffffe1021e15       c7:     callq  0xffffffffe102bd75
        95:   test   $0x1fff,%ax              cc:     test   $0x1fff,%rax
        99:   jne    0x00000000000000c6       d3:     jne    0x0000000000000117
                                              d5:     mov    %rax,%r14
        9b:   mov    $0xe,%esi                d8:     mov    $0xe,%esi
        a0:   callq  0xffffffffe1021e44       dd:     callq  0xffffffffe102bd91 // MSH
                                              e2:     and    $0xf,%eax
                                              e5:     shl    $0x2,%eax
                                              e8:     mov    %rax,%r13
                                              eb:     mov    %r14,%rax
                                              ee:     mov    %r13,%rsi
        a5:   lea    0xe(%rbx),%esi           f1:     add    $0xe,%esi
        a8:   callq  0xffffffffe1021e0d       f4:     callq  0xffffffffe102bd6d
        ad:   cmp    $0x16,%eax               f9:     cmp    $0x16,%rax
        b0:   je     0x00000000000000bf       fd:     je     0x0000000000000110
                                              ff:     mov    %r13,%rsi
        b2:   lea    0x10(%rbx),%esi         102:     add    $0x10,%esi
        b5:   callq  0xffffffffe1021e0d      105:     callq  0xffffffffe102bd6d
        ba:   cmp    $0x16,%eax              10a:     cmp    $0x16,%rax
        bd:   jne    0x00000000000000c6      10e:     jne    0x0000000000000117
        bf:   mov    $0xffff,%eax            110:     mov    $0xffff,%eax
        c4:   jmp    0x00000000000000c8      115:     jmp    0x000000000000011c
        c6:   xor    %eax,%eax               117:     mov    $0x0,%eax
        c8:   mov    -0x8(%rbp),%rbx         11c:     mov    -0x228(%rbp),%rbx // epilogue
        cc:   leaveq                         123:     mov    -0x220(%rbp),%r13
        cd:   retq                           12a:     mov    -0x218(%rbp),%r14
                                             131:     mov    -0x210(%rbp),%r15
                                             138:     leaveq
                                             139:     retq
      
      On fully cached SKBs both JITed functions take 12 nsec to execute.
      BPF interpreter executes the program in 30 nsec.
      
      The difference in generated assembler is due to the following:
      
      Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function
      inside bpf_jit.S.
      New JIT removes the helper and does it explicitly, so ldx_msh cost
      is the same for both JITs, but generated code looks longer.
      
      New JIT has 4 registers to save, so prologue/epilogue are larger,
      but the cost is within noise on x64.
      
      Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'.
      New JIT clears %rax unconditionally.
      
      2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM
        extensions. New JIT supports all BPF extensions.
        Performance of such filters improves 2-4 times depending on a filter.
        The longer the filter the higher performance gain.
        Synthetic benchmarks with many ancillary loads see 20x speedup
        which seems to be the maximum gain from JIT
      
      Notes:
      
      . net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional
        and can be used to see generated assembler
      
      . there are two jit_compile() functions and code flow for classic filters is:
        sk_attach_filter() - load classic BPF
        bpf_jit_compile() - try to JIT from classic BPF
        sk_convert_filter() - convert classic to internal
        bpf_int_jit_compile() - JIT from internal BPF
      
        seccomp and tracing filters will just call bpf_int_jit_compile()
      Signed-off-by: NAlexei Starovoitov <ast@plumgrid.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      62258278
    • P
      mac802154: make mac802154_wpan_open static · 6ef0023a
      Phoebe Buckheister 提交于
      This function is only used within the same translation unit, so mark it
      static.
      Signed-off-by: NPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      6ef0023a
    • P
      ieee802154: fix dgram socket sendmsg() · 1cc76e36
      Phoebe Buckheister 提交于
      802.15.4 datagram sockets do not currently have a compliant sendmsg().
      The destination address supplied is always ignored, and in unconnected
      mode, packets are broadcast instead of dropped with -EDESTADDRREQ. This
      patch fixes 802.15.4 dgram sockets to be compliant, i.e.
      
       !conn && !msg_name => -EDESTADDRREQ
       !conn &&  msg_name => send to msg_name
        conn && !msg_name => send to connected
        conn &&  msg_name => -EISCONN
      Signed-off-by: NPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      1cc76e36
    • P
      6lowpan: fix fragmentation · d4b2816d
      Phoebe Buckheister 提交于
      Currently, 6lowpan creates one 802.15.4 MAC header for the original
      packet the device was given by upper layers and reuses this header for
      all fragments, if fragmentation is required. This also reuses frame
      sequence numbers, which must not happen. 6lowpan also has issues with
      fragmentation in the presence of security headers, since those may imply
      the presence of trailing fields that are not accounted for by the
      fragmentation code right now.
      
      Fix both of these issues by properly allocating fragment skbs with
      headromm and tailroom as specified by the underlying device, create one
      header for each skb instead of reusing the original header, let the
      underlying device do the rest.
      Signed-off-by: NPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      d4b2816d
    • P
      ieee802154: change _cb handling slightly · 32edc40a
      Phoebe Buckheister 提交于
      The current mac_cb handling of ieee802154 is rather awkward and limited.
      Decompose the single flags field into multiple fields with the meanings
      of each subfield of the flags field to make future extensions (for
      example, link-layer security) easier. Also don't set the frame sequence
      number in upper layers, since that's a thing the MAC is supposed to set
      on frame transmit - we set it on header creation, but assuming that
      upper layers do not blindly duplicate our headers, this is fine.
      Signed-off-by: NPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      32edc40a
    • P
      mac802154: account for all header parts during wpan header creationg · 8c84296f
      Phoebe Buckheister 提交于
      The current WPAN header creation code checks for EMSGSIZE conditions,
      but does not account for the MIC field that link layer security may add
      at the end of the frame. Now that we can accurately calculate the
      maximum payload size of packets, use that to check for EMSGSIZE
      conditions.
      Signed-off-by: NPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      8c84296f
    • P
      ieee802154: add definitions for link-layer security and header functions · c3a6114f
      Phoebe Buckheister 提交于
      When dealing with 802.15.4, one often has to know the maximum payload
      size for a given packet. This depends on many factors, one of which is
      whether or not a security header is present in the frame. These
      definitions and functions provide an easy way for any upper layer to
      calculate the maximum payload size for a packet. The first obvious user
      for this is 6lowpan, which duplicates this calculation and gets it
      partially wrong because it ignores security headers.
      Signed-off-by: NPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      c3a6114f
  2. 15 5月, 2014 12 次提交
    • J
      net: Use a more standard macro for INET_ADDR_COOKIE · c7228317
      Joe Perches 提交于
      Missing a colon on definition use is a bit odd so
      change the macro for the 32 bit case to declare an
      __attribute__((unused)) and __deprecated variable.
      
      The __deprecated attribute will cause gcc to emit
      an error if the variable is actually used.
      Signed-off-by: NJoe Perches <joe@perches.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      c7228317
    • W
      dccp: make the request_retries minimum is 1 · 8ba7e7bf
      wangweidong 提交于
      In Documentation/networking/dccp.txt points that request_retries
      should be greater than 0. So make the extra1 to be &one instead
      of &zero.
      Signed-off-by: NWang Weidong <wangweidong1@huawei.com>
      Signed-off-by: NGerrit Renker <gerrit@erg.abdn.ac.uk>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      8ba7e7bf
    • W
      snmp: fix some left over of snmp stats · c9f2dba6
      WANG Cong 提交于
      Fengguang reported the following sparse warning:
      
      >> net/ipv6/proc.c:198:41: sparse: incorrect type in argument 1 (different address spaces)
         net/ipv6/proc.c:198:41:    expected void [noderef] <asn:3>*mib
         net/ipv6/proc.c:198:41:    got void [noderef] <asn:3>**pcpumib
      
      Fixes: commit 698365fa (net: clean up snmp stats code)
      Reported-by: NFengguang Wu <fengguang.wu@intel.com>
      Cc: David S. Miller <davem@davemloft.net>
      Signed-off-by: NCong Wang <xiyou.wangcong@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      c9f2dba6
    • W
      ipv4: make ip_local_reserved_ports per netns · 122ff243
      WANG Cong 提交于
      ip_local_port_range is already per netns, so should ip_local_reserved_ports
      be. And since it is none by default we don't actually need it when we don't
      enable CONFIG_SYSCTL.
      
      By the way, rename inet_is_reserved_local_port() to inet_is_local_reserved_port()
      
      Cc: "David S. Miller" <davem@davemloft.net>
      Signed-off-by: NCong Wang <xiyou.wangcong@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      122ff243
    • J
      tipc: merge port message reception into socket reception function · 9816f061
      Jon Paul Maloy 提交于
      In order to reduce complexity and save a call level during message
      reception at port/socket level, we remove the function tipc_port_rcv()
      and merge its functionality into tipc_sk_rcv().
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      9816f061
    • J
      tipc: clean up neigbor discovery message reception · c82910e2
      Jon Paul Maloy 提交于
      The function tipc_disc_rcv(), which is handling received neighbor
      discovery messages, is perceived as messy, and it is hard to verify
      its correctness by code inspection. The fact that the task it is set
      to resolve is fairly complex does not make the situation better.
      
      In this commit we try to take a more systematic approach to the
      problem. We define a decision machine which takes three state flags
       as input, and produces three action flags as output. We then walk
      through all permutations of the state flags, and for each of them we
      describe verbally what is going on, plus that we set zero or more of
      the action flags. The action flags indicate what should be done once
      the decision machine has finished its job, while the last part of the
      function deals with performing those actions.
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      c82910e2
    • J
      tipc: improve and extend media address conversion functions · 38504c28
      Jon Paul Maloy 提交于
      TIPC currently handles two media specific addresses: Ethernet MAC
      addresses and InfiniBand addresses. Those are kept in three different
      formats:
      
      1) A "raw" format as obtained from the device. This format is known
         only by the media specific adapter code in eth_media.c and
         ib_media.c.
      2) A "generic" internal format, in the form of struct tipc_media_addr,
         which can be referenced and passed around by the generic media-
         unaware code.
      3) A serialized version of the latter, to be conveyed in neighbor
         discovery messages.
      
      Conversion between the three formats can only be done by the media
      specific code, so we have function pointers for this purpose in
      struct tipc_media. Here, the media adapters can install their own
      conversion functions at startup.
      
      We now introduce a new such function, 'raw2addr()', whose purpose
      is to convert from format 1 to format 2 above. We also try to as far
      as possible uniform commenting, variable names and usage of these
      functions, with the purpose of making them more comprehensible.
      
      We can now also remove the function tipc_l2_media_addr_set(), whose
      job is done better by the new function.
      
      Finally, we expand the field for serialized addresses (format 3)
      in discovery messages from 20 to 32 bytes. This is permitted
      according to the spec, and reduces the risk of problems when we
      add new media in the future.
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      38504c28
    • J
      tipc: rename and move message reassembly function · 37e22164
      Jon Paul Maloy 提交于
      The function tipc_link_frag_rcv() is in reality a re-entrant generic
      message reassemby function that has nothing in particular to do with
      the link, where it is defined now. This becomes obvious when we see
      the need to call the function from other places in the code.
      
      In this commit rename it to tipc_buf_append() and move it to the file
      msg.c. We also simplify its signature by moving the tail pointer to
      the control block of the head buffer, hence making the head buffer
      self-contained.
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      37e22164
    • J
      tipc: mark head of reassembly buffer as non-linear · 5074ab89
      Jon Paul Maloy 提交于
      The message reassembly function does not update the 'len' and 'data_len'
      fields of the head skbuff correctly when fragments are chained to it.
      This may sometimes lead to obsure errors, such as fragment reordering
      when we receive fragments which are cloned buffers.
      
      This commit fixes this, by ensuring that the two fields are updated
      correctly.
      Suggested-by: NEric Dumazet <eric.dumazet@gmail.com>
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      5074ab89
    • J
      tipc: don't record link RESET or ACTIVATE messages as traffic · ec37dcd3
      Jon Paul Maloy 提交于
      In the current code, all incoming LINK_PROTOCOL messages, irrespective
      of type, nudge the "last message received" checkpoint, informing the
      link state machine that a message was received from the peer since last
      supervision timeout event. This inhibits the link from starting probing
      the peer unnecessarily.
      
      However, not only STATE messages are recorded as legitimate incoming
      traffic this way, but even RESET and ACTIVATE messages, which in
      reality are there to inform the link that the peer endpoint has been
      reset. At the same time, some RESET messages may be dropped instead
      of causing a link reset. This happens when the link endpoint thinks
      it is fully up and working, and the session number of the RESET is
      lower than or equal to the current link session. In such cases the
      RESET is perceived as a delayed remnant from an earlier session, or
      the current one, and dropped.
      
      Now, if a TIPC module is removed and then immediately reinserted, e.g.
      when using a script, RESET messages may arrive at the peer link endpoint
      before this one has had time to discover the failure. The RESET may be
      dropped because of the session number, but only after it has been
      recorded as a legitimate traffic event. Hence, the receiving link will
      not start probing, and not discover that the peer endpoint is down, at
      the same time ignoring the periodic RESET messages coming from that
      endpoint. We have ended up in a stale state where a failed link cannot
      be re-established.
      
      In this commit, we remedy this by nudging the checkpoint only for
      received STATE messages, not for RESET or ACTIVATE messages.
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      ec37dcd3
    • J
      tipc: compensate for double accounting in socket rcv buffer · 4f4482dc
      Jon Paul Maloy 提交于
      The function net/core/sock.c::__release_sock() runs a tight loop
      to move buffers from the socket backlog queue to the receive queue.
      
      As a security measure, sk_backlog.len of the receiving socket
      is not set to zero until after the loop is finished, i.e., until
      the whole backlog queue has been transferred to the receive queue.
      During this transfer, the data that has already been moved is counted
      both in the backlog queue and the receive queue, hence giving an
      incorrect picture of the available queue space for new arriving buffers.
      
      This leads to unnecessary rejection of buffers by sk_add_backlog(),
      which in TIPC leads to unnecessarily broken connections.
      
      In this commit, we compensate for this double accounting by adding
      a counter that keeps track of it. The function socket.c::backlog_rcv()
      receives buffers one by one from __release_sock(), and adds them to the
      socket receive queue. If the transfer is successful, it increases a new
      atomic counter 'tipc_sock::dupl_rcvcnt' with 'truesize' of the
      transferred buffer. If a new buffer arrives during this transfer and
      finds the socket busy (owned), we attempt to add it to the backlog.
      However, when sk_add_backlog() is called, we adjust the 'limit'
      parameter with the value of the new counter, so that the risk of
      inadvertent rejection is eliminated.
      
      It should be noted that this change does not invalidate the original
      purpose of zeroing 'sk_backlog.len' after the full transfer. We set an
      upper limit for dupl_rcvcnt, so that if a 'wild' sender (i.e., one that
      doesn't respect the send window) keeps pumping in buffers to
      sk_add_backlog(), he will eventually reach an upper limit,
      (2 x TIPC_CONN_OVERLOAD_LIMIT). After that, no messages can be added
      to the backlog, and the connection will be broken. Ordinary, well-
      behaved senders will never reach this buffer limit at all.
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      4f4482dc
    • J
      tipc: decrease connection flow control window · 6163a194
      Jon Paul Maloy 提交于
      Memory overhead when allocating big buffers for data transfer may
      be quite significant. E.g., truesize of a 64 KB buffer turns out
      to be 132 KB, 2 x the requested size.
      
      This invalidates the "worst case" calculation we have been
      using to determine the default socket receive buffer limit,
      which is based on the assumption that 1024x64KB = 67MB buffers
      may be queued up on a socket.
      
      Since TIPC connections cannot survive hitting the buffer limit,
      we have to compensate for this overhead.
      
      We do that in this commit by dividing the fix connection flow
      control window from 1024 (2*512) messages to 512 (2*256). Since
      older version nodes send out acks at 512 message intervals,
      compatibility with such nodes is guaranteed, although performance
      may be non-optimal in such cases.
      Signed-off-by: NJon Maloy <jon.maloy@ericsson.com>
      Reviewed-by: NYing Xue <ying.xue@windriver.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      6163a194
  3. 14 5月, 2014 11 次提交
  4. 13 5月, 2014 3 次提交
  5. 12 5月, 2014 2 次提交
  6. 10 5月, 2014 3 次提交
    • W
      sctp: add a checking for sctp_sysctl_net_register · f66138c8
      wangweidong 提交于
      When register_net_sysctl failed, we should free the
      sysctl_table.
      Signed-off-by: NWang Weidong <wangweidong1@huawei.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      f66138c8
    • W
      Revert "sctp: optimize the sctp_sysctl_net_register" · eb9f3705
      wangweidong 提交于
      This revert commit efb842c4("sctp: optimize the sctp_sysctl_net_register"),
      Since it doesn't kmemdup a sysctl_table for init_net, so the
      init_net->sctp.sysctl_header->ctl_table_arg points to sctp_net_table
      which is a static array pointer. So when doing sctp_sysctl_net_unregister,
      it will free sctp_net_table, then we will get a NULL pointer dereference
      like that:
      
      [  262.948220] BUG: unable to handle kernel NULL pointer dereference at 000000000000006c
      [  262.948232] IP: [<ffffffff81144b70>] kfree+0x80/0x420
      [  262.948260] PGD db80a067 PUD dae12067 PMD 0
      [  262.948268] Oops: 0000 [#1] SMP
      [  262.948273] Modules linked in: sctp(-) crc32c_generic libcrc32c
      ...
      [  262.948338] task: ffff8800db830190 ti: ffff8800dad00000 task.ti: ffff8800dad00000
      [  262.948344] RIP: 0010:[<ffffffff81144b70>]  [<ffffffff81144b70>] kfree+0x80/0x420
      [  262.948353] RSP: 0018:ffff8800dad01d88  EFLAGS: 00010046
      [  262.948358] RAX: 0100000000000000 RBX: ffffffffa0227940 RCX: ffffea0000707888
      [  262.948363] RDX: ffffea0000707888 RSI: 0000000000000001 RDI: ffffffffa0227940
      [  262.948369] RBP: ffff8800dad01de8 R08: 0000000000000000 R09: ffff8800d9e983a9
      [  262.948374] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa0227940
      [  262.948380] R13: ffffffff8187cfc0 R14: 0000000000000000 R15: ffffffff8187da10
      [  262.948386] FS:  00007fa2a2658700(0000) GS:ffff880112800000(0000) knlGS:0000000000000000
      [  262.948394] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
      [  262.948400] CR2: 000000000000006c CR3: 00000000cddc0000 CR4: 00000000000006e0
      [  262.948410] Stack:
      [  262.948413]  ffff8800dad01da8 0000000000000286 0000000020227940 ffffffffa0227940
      [  262.948422]  ffff8800dad01dd8 ffffffff811b7fa1 ffffffffa0227940 ffffffffa0227940
      [  262.948431]  ffffffff8187d960 ffffffff8187cfc0 ffffffff8187d960 ffffffff8187da10
      [  262.948440] Call Trace:
      [  262.948457]  [<ffffffff811b7fa1>] ? unregister_sysctl_table+0x51/0xa0
      [  262.948476]  [<ffffffffa020d1a1>] sctp_sysctl_net_unregister+0x21/0x30 [sctp]
      [  262.948490]  [<ffffffffa020ef6d>] sctp_net_exit+0x12d/0x150 [sctp]
      [  262.948512]  [<ffffffff81394f49>] ops_exit_list+0x39/0x60
      [  262.948522]  [<ffffffff813951ed>] unregister_pernet_operations+0x3d/0x70
      [  262.948530]  [<ffffffff81395292>] unregister_pernet_subsys+0x22/0x40
      [  262.948544]  [<ffffffffa020efcc>] sctp_exit+0x3c/0x12d [sctp]
      [  262.948562]  [<ffffffff810c5e04>] SyS_delete_module+0x194/0x210
      [  262.948577]  [<ffffffff81240fde>] ? trace_hardirqs_on_thunk+0x3a/0x3f
      [  262.948587]  [<ffffffff815217a2>] system_call_fastpath+0x16/0x1b
      
      With this revert, it won't occur the Oops.
      Signed-off-by: NWang Weidong <wangweidong1@huawei.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      eb9f3705
    • W
      rds: remove the unneed NULL checking · be7faf71
      wangweidong 提交于
      unregister_net_sysctl_table will check the ctl_table_header,
      so remove the unneed checking
      Signed-off-by: NWang Weidong <wangweidong1@huawei.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      be7faf71