1. 26 7月, 2008 3 次提交
  2. 25 7月, 2008 8 次提交
    • H
      ipsec: ipcomp - Decompress into frags if necessary · 7d7e5a60
      Herbert Xu 提交于
      When decompressing extremely large packets allocating them through
      kmalloc is prone to failure.  Therefore it's better to use page
      frags instead.
      Signed-off-by: NHerbert Xu <herbert@gondor.apana.org.au>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      7d7e5a60
    • H
      ipsec: ipcomp - Merge IPComp implementations · 6fccab67
      Herbert Xu 提交于
      This patch merges the IPv4/IPv6 IPComp implementations since most
      of the code is identical.  As a result future enhancements will no
      longer need to be duplicated.
      Signed-off-by: NHerbert Xu <herbert@gondor.apana.org.au>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      6fccab67
    • D
      pkt_sched: Fix locking in shutdown_scheduler_queue() · cffe1c5d
      David S. Miller 提交于
      Qdisc locks need to be held with BH disabled.
      Tested-by: NIngo Molnar <mingo@elte.hu>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      cffe1c5d
    • U
      flag parameters: check magic constants · e38b36f3
      Ulrich Drepper 提交于
      This patch adds test that ensure the boundary conditions for the various
      constants introduced in the previous patches is met.  No code is generated.
      
      [akpm@linux-foundation.org: fix alpha]
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      e38b36f3
    • U
      flag parameters: NONBLOCK in socket and socketpair · 77d27200
      Ulrich Drepper 提交于
      This patch introduces support for the SOCK_NONBLOCK flag in socket,
      socketpair, and  paccept.  To do this the internal function sock_attach_fd
      gets an additional parameter which it uses to set the appropriate flag for
      the file descriptor.
      
      Given that in modern, scalable programs almost all socket connections are
      non-blocking and the minimal additional cost for the new functionality
      I see no reason not to add this code.
      
      The following test must be adjusted for architectures other than x86 and
      x86-64 and in case the syscall numbers changed.
      
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      #include <fcntl.h>
      #include <pthread.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <netinet/in.h>
      #include <sys/socket.h>
      #include <sys/syscall.h>
      
      #ifndef __NR_paccept
      # ifdef __x86_64__
      #  define __NR_paccept 288
      # elif defined __i386__
      #  define SYS_PACCEPT 18
      #  define USE_SOCKETCALL 1
      # else
      #  error "need __NR_paccept"
      # endif
      #endif
      
      #ifdef USE_SOCKETCALL
      # define paccept(fd, addr, addrlen, mask, flags) \
        ({ long args[6] = { \
             (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
           syscall (__NR_socketcall, SYS_PACCEPT, args); })
      #else
      # define paccept(fd, addr, addrlen, mask, flags) \
        syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
      #endif
      
      #define PORT 57392
      
      #define SOCK_NONBLOCK O_NONBLOCK
      
      static pthread_barrier_t b;
      
      static void *
      tf (void *arg)
      {
        pthread_barrier_wait (&b);
        int s = socket (AF_INET, SOCK_STREAM, 0);
        struct sockaddr_in sin;
        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
        sin.sin_port = htons (PORT);
        connect (s, (const struct sockaddr *) &sin, sizeof (sin));
        close (s);
        pthread_barrier_wait (&b);
      
        pthread_barrier_wait (&b);
        s = socket (AF_INET, SOCK_STREAM, 0);
        sin.sin_port = htons (PORT);
        connect (s, (const struct sockaddr *) &sin, sizeof (sin));
        close (s);
        pthread_barrier_wait (&b);
      
        return NULL;
      }
      
      int
      main (void)
      {
        int fd;
        fd = socket (PF_INET, SOCK_STREAM, 0);
        if (fd == -1)
          {
            puts ("socket(0) failed");
            return 1;
          }
        int fl = fcntl (fd, F_GETFL);
        if (fl == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if (fl & O_NONBLOCK)
          {
            puts ("socket(0) set non-blocking mode");
            return 1;
          }
        close (fd);
      
        fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
        if (fd == -1)
          {
            puts ("socket(SOCK_NONBLOCK) failed");
            return 1;
          }
        fl = fcntl (fd, F_GETFL);
        if (fl == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if ((fl & O_NONBLOCK) == 0)
          {
            puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
            return 1;
          }
        close (fd);
      
        int fds[2];
        if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
          {
            puts ("socketpair(0) failed");
            return 1;
          }
        for (int i = 0; i < 2; ++i)
          {
            fl = fcntl (fds[i], F_GETFL);
            if (fl == -1)
              {
                puts ("fcntl failed");
                return 1;
              }
            if (fl & O_NONBLOCK)
              {
                printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
                return 1;
              }
            close (fds[i]);
          }
      
        if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
          {
            puts ("socketpair(SOCK_NONBLOCK) failed");
            return 1;
          }
        for (int i = 0; i < 2; ++i)
          {
            fl = fcntl (fds[i], F_GETFL);
            if (fl == -1)
              {
                puts ("fcntl failed");
                return 1;
              }
            if ((fl & O_NONBLOCK) == 0)
              {
                printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
                return 1;
              }
            close (fds[i]);
          }
      
        pthread_barrier_init (&b, NULL, 2);
      
        struct sockaddr_in sin;
        pthread_t th;
        if (pthread_create (&th, NULL, tf, NULL) != 0)
          {
            puts ("pthread_create failed");
            return 1;
          }
      
        int s = socket (AF_INET, SOCK_STREAM, 0);
        int reuse = 1;
        setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
        sin.sin_port = htons (PORT);
        bind (s, (struct sockaddr *) &sin, sizeof (sin));
        listen (s, SOMAXCONN);
      
        pthread_barrier_wait (&b);
      
        int s2 = paccept (s, NULL, 0, NULL, 0);
        if (s2 < 0)
          {
            puts ("paccept(0) failed");
            return 1;
          }
      
        fl = fcntl (s2, F_GETFL);
        if (fl & O_NONBLOCK)
          {
            puts ("paccept(0) set non-blocking mode");
            return 1;
          }
        close (s2);
        close (s);
      
        pthread_barrier_wait (&b);
      
        s = socket (AF_INET, SOCK_STREAM, 0);
        sin.sin_port = htons (PORT);
        setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
        bind (s, (struct sockaddr *) &sin, sizeof (sin));
        listen (s, SOMAXCONN);
      
        pthread_barrier_wait (&b);
      
        s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
        if (s2 < 0)
          {
            puts ("paccept(SOCK_NONBLOCK) failed");
            return 1;
          }
      
        fl = fcntl (s2, F_GETFL);
        if ((fl & O_NONBLOCK) == 0)
          {
            puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
            return 1;
          }
        close (s2);
        close (s);
      
        pthread_barrier_wait (&b);
        puts ("OK");
      
        return 0;
      }
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Cc: "David S. Miller" <davem@davemloft.net>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      77d27200
    • U
      flag parameters: paccept w/out set_restore_sigmask · c019bbc6
      Ulrich Drepper 提交于
      Some platforms do not have support to restore the signal mask in the
      return path from a syscall.  For those platforms syscalls like pselect are
      not defined at all.  This is, I think, not a good choice for paccept()
      since paccept() adds more value on top of accept() than just the signal
      mask handling.
      
      Therefore this patch defines a scaled down version of the sys_paccept
      function for those platforms.  It returns -EINVAL in case the signal mask
      is non-NULL but behaves the same otherwise.
      
      Note that I explicitly included <linux/thread_info.h>.  I saw that it is
      currently included but indirectly two levels down.  There is too much risk
      in relying on this.  The header might change and then suddenly the
      function definition would change without anyone immediately noticing.
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Cc: Davide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      c019bbc6
    • U
      flag parameters: paccept · aaca0bdc
      Ulrich Drepper 提交于
      This patch is by far the most complex in the series.  It adds a new syscall
      paccept.  This syscall differs from accept in that it adds (at the userlevel)
      two additional parameters:
      
      - a signal mask
      - a flags value
      
      The flags parameter can be used to set flag like SOCK_CLOEXEC.  This is
      imlpemented here as well.  Some people argued that this is a property which
      should be inherited from the file desriptor for the server but this is against
      POSIX.  Additionally, we really want the signal mask parameter as well
      (similar to pselect, ppoll, etc).  So an interface change in inevitable.
      
      The flag value is the same as for socket and socketpair.  I think diverging
      here will only create confusion.  Similar to the filesystem interfaces where
      the use of the O_* constants differs, it is acceptable here.
      
      The signal mask is handled as for pselect etc.  The mask is temporarily
      installed for the thread and removed before the call returns.  I modeled the
      code after pselect.  If there is a problem it's likely also in pselect.
      
      For architectures which use socketcall I maintained this interface instead of
      adding a system call.  The symmetry shouldn't be broken.
      
      The following test must be adjusted for architectures other than x86 and
      x86-64 and in case the syscall numbers changed.
      
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      #include <errno.h>
      #include <fcntl.h>
      #include <pthread.h>
      #include <signal.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <netinet/in.h>
      #include <sys/socket.h>
      #include <sys/syscall.h>
      
      #ifndef __NR_paccept
      # ifdef __x86_64__
      #  define __NR_paccept 288
      # elif defined __i386__
      #  define SYS_PACCEPT 18
      #  define USE_SOCKETCALL 1
      # else
      #  error "need __NR_paccept"
      # endif
      #endif
      
      #ifdef USE_SOCKETCALL
      # define paccept(fd, addr, addrlen, mask, flags) \
        ({ long args[6] = { \
             (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
           syscall (__NR_socketcall, SYS_PACCEPT, args); })
      #else
      # define paccept(fd, addr, addrlen, mask, flags) \
        syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
      #endif
      
      #define PORT 57392
      
      #define SOCK_CLOEXEC O_CLOEXEC
      
      static pthread_barrier_t b;
      
      static void *
      tf (void *arg)
      {
        pthread_barrier_wait (&b);
        int s = socket (AF_INET, SOCK_STREAM, 0);
        struct sockaddr_in sin;
        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
        sin.sin_port = htons (PORT);
        connect (s, (const struct sockaddr *) &sin, sizeof (sin));
        close (s);
      
        pthread_barrier_wait (&b);
        s = socket (AF_INET, SOCK_STREAM, 0);
        sin.sin_port = htons (PORT);
        connect (s, (const struct sockaddr *) &sin, sizeof (sin));
        close (s);
        pthread_barrier_wait (&b);
      
        pthread_barrier_wait (&b);
        sleep (2);
        pthread_kill ((pthread_t) arg, SIGUSR1);
      
        return NULL;
      }
      
      static void
      handler (int s)
      {
      }
      
      int
      main (void)
      {
        pthread_barrier_init (&b, NULL, 2);
      
        struct sockaddr_in sin;
        pthread_t th;
        if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
          {
            puts ("pthread_create failed");
            return 1;
          }
      
        int s = socket (AF_INET, SOCK_STREAM, 0);
        int reuse = 1;
        setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
        sin.sin_port = htons (PORT);
        bind (s, (struct sockaddr *) &sin, sizeof (sin));
        listen (s, SOMAXCONN);
      
        pthread_barrier_wait (&b);
      
        int s2 = paccept (s, NULL, 0, NULL, 0);
        if (s2 < 0)
          {
            puts ("paccept(0) failed");
            return 1;
          }
      
        int coe = fcntl (s2, F_GETFD);
        if (coe & FD_CLOEXEC)
          {
            puts ("paccept(0) set close-on-exec-flag");
            return 1;
          }
        close (s2);
      
        pthread_barrier_wait (&b);
      
        s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
        if (s2 < 0)
          {
            puts ("paccept(SOCK_CLOEXEC) failed");
            return 1;
          }
      
        coe = fcntl (s2, F_GETFD);
        if ((coe & FD_CLOEXEC) == 0)
          {
            puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
            return 1;
          }
        close (s2);
      
        pthread_barrier_wait (&b);
      
        struct sigaction sa;
        sa.sa_handler = handler;
        sa.sa_flags = 0;
        sigemptyset (&sa.sa_mask);
        sigaction (SIGUSR1, &sa, NULL);
      
        sigset_t ss;
        pthread_sigmask (SIG_SETMASK, NULL, &ss);
        sigaddset (&ss, SIGUSR1);
        pthread_sigmask (SIG_SETMASK, &ss, NULL);
      
        sigdelset (&ss, SIGUSR1);
        alarm (4);
        pthread_barrier_wait (&b);
      
        errno = 0 ;
        s2 = paccept (s, NULL, 0, &ss, 0);
        if (s2 != -1 || errno != EINTR)
          {
            puts ("paccept did not fail with EINTR");
            return 1;
          }
      
        close (s);
      
        puts ("OK");
      
        return 0;
      }
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      
      [akpm@linux-foundation.org: make it compile]
      [akpm@linux-foundation.org: add sys_ni stub]
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Cc: <linux-arch@vger.kernel.org>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: Roland McGrath <roland@redhat.com>
      Cc: Kyle McMartin <kyle@mcmartin.ca>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      aaca0bdc
    • U
      flag parameters: socket and socketpair · a677a039
      Ulrich Drepper 提交于
      This patch adds support for flag values which are ORed to the type passwd
      to socket and socketpair.  The additional code is minimal.  The flag
      values in this implementation can and must match the O_* flags.  This
      avoids overhead in the conversion.
      
      The internal functions sock_alloc_fd and sock_map_fd get a new parameters
      and all callers are changed.
      
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      #include <fcntl.h>
      #include <stdio.h>
      #include <unistd.h>
      #include <netinet/in.h>
      #include <sys/socket.h>
      
      #define PORT 57392
      
      /* For Linux these must be the same.  */
      #define SOCK_CLOEXEC O_CLOEXEC
      
      int
      main (void)
      {
        int fd;
        fd = socket (PF_INET, SOCK_STREAM, 0);
        if (fd == -1)
          {
            puts ("socket(0) failed");
            return 1;
          }
        int coe = fcntl (fd, F_GETFD);
        if (coe == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if (coe & FD_CLOEXEC)
          {
            puts ("socket(0) set close-on-exec flag");
            return 1;
          }
        close (fd);
      
        fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
        if (fd == -1)
          {
            puts ("socket(SOCK_CLOEXEC) failed");
            return 1;
          }
        coe = fcntl (fd, F_GETFD);
        if (coe == -1)
          {
            puts ("fcntl failed");
            return 1;
          }
        if ((coe & FD_CLOEXEC) == 0)
          {
            puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
            return 1;
          }
        close (fd);
      
        int fds[2];
        if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
          {
            puts ("socketpair(0) failed");
            return 1;
          }
        for (int i = 0; i < 2; ++i)
          {
            coe = fcntl (fds[i], F_GETFD);
            if (coe == -1)
              {
                puts ("fcntl failed");
                return 1;
              }
            if (coe & FD_CLOEXEC)
              {
                printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
                return 1;
              }
            close (fds[i]);
          }
      
        if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
          {
            puts ("socketpair(SOCK_CLOEXEC) failed");
            return 1;
          }
        for (int i = 0; i < 2; ++i)
          {
            coe = fcntl (fds[i], F_GETFD);
            if (coe == -1)
              {
                puts ("fcntl failed");
                return 1;
              }
            if ((coe & FD_CLOEXEC) == 0)
              {
                printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
                return 1;
              }
            close (fds[i]);
          }
      
        puts ("OK");
      
        return 0;
      }
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      Signed-off-by: NUlrich Drepper <drepper@redhat.com>
      Acked-by: NDavide Libenzi <davidel@xmailserver.org>
      Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      a677a039
  3. 24 7月, 2008 5 次提交
    • J
      pkt_sched: sch_sfq: dump a real number of flows · f867e6af
      Jarek Poplawski 提交于
      Dump the "flows" number according to the number of active flows
      instead of repeating the "limit".
      Reported-by: NDenys Fedoryshchenko <denys@visp.net.lb>
      Signed-off-by: NJarek Poplawski <jarkao2@gmail.com>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      f867e6af
    • P
    • D
      tcp: Clear probes_out more aggressively in tcp_ack(). · 4b53fb67
      David S. Miller 提交于
      This is based upon an excellent bug report from Eric Dumazet.
      
      tcp_ack() should clear ->icsk_probes_out even if there are packets
      outstanding.  Otherwise if we get a sequence of ACKs while we do have
      packets outstanding over and over again, we'll never clear the
      probes_out value and eventually think the connection is too sick and
      we'll reset it.
      
      This appears to be some "optimization" added to tcp_ack() in the 2.4.x
      timeframe.  In 2.2.x, probes_out is pretty much always cleared by
      tcp_ack().
      
      Here is Eric's original report:
      
      ----------------------------------------
      Apparently, we can in some situations reset TCP connections in a couple of seconds when some frames are lost.
      
      In order to reproduce the problem, please try the following program on linux-2.6.25.*
      
      Setup some iptables rules to allow two frames per second sent on loopback interface to tcp destination port 12000
      
      iptables -N SLOWLO
      iptables -A SLOWLO -m hashlimit --hashlimit 2 --hashlimit-burst 1 --hashlimit-mode dstip --hashlimit-name slow2 -j ACCEPT
      iptables -A SLOWLO -j DROP
      
      iptables -A OUTPUT -o lo -p tcp --dport 12000 -j SLOWLO
      
      Then run the attached program and see the output :
      
      # ./loop
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,1)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,3)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,5)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,7)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,9)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,11)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,201ms,13)
      State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
      ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,188ms,15)
      write(): Connection timed out
      wrote 890 bytes but was interrupted after 9 seconds
      ESTAB      0      0                 127.0.0.1:12000            127.0.0.1:54455
      Exiting read() because no data available (4000 ms timeout).
      read 860 bytes
      
      While this tcp session makes progress (sending frames with 50 bytes of payload, every 500ms), linux tcp stack decides to reset it, when tcp_retries 2 is reached (default value : 15)
      
      tcpdump :
      
      15:30:28.856695 IP 127.0.0.1.56554 > 127.0.0.1.12000: S 33788768:33788768(0) win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
      15:30:28.856711 IP 127.0.0.1.12000 > 127.0.0.1.56554: S 33899253:33899253(0) ack 33788769 win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
      15:30:29.356947 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 1:61(60) ack 1 win 257
      15:30:29.356966 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 61 win 257
      15:30:29.866415 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 61:111(50) ack 1 win 257
      15:30:29.866427 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 111 win 257
      15:30:30.366516 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 111:161(50) ack 1 win 257
      15:30:30.366527 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 161 win 257
      15:30:30.876196 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 161:211(50) ack 1 win 257
      15:30:30.876207 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 211 win 257
      15:30:31.376282 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 211:261(50) ack 1 win 257
      15:30:31.376290 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 261 win 257
      15:30:31.885619 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 261:311(50) ack 1 win 257
      15:30:31.885631 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 311 win 257
      15:30:32.385705 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 311:361(50) ack 1 win 257
      15:30:32.385715 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 361 win 257
      15:30:32.895249 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 361:411(50) ack 1 win 257
      15:30:32.895266 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 411 win 257
      15:30:33.395341 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 411:461(50) ack 1 win 257
      15:30:33.395351 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 461 win 257
      15:30:33.918085 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 461:511(50) ack 1 win 257
      15:30:33.918096 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 511 win 257
      15:30:34.418163 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 511:561(50) ack 1 win 257
      15:30:34.418172 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 561 win 257
      15:30:34.927685 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 561:611(50) ack 1 win 257
      15:30:34.927698 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 611 win 257
      15:30:35.427757 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 611:661(50) ack 1 win 257
      15:30:35.427766 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 661 win 257
      15:30:35.937359 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 661:711(50) ack 1 win 257
      15:30:35.937376 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 711 win 257
      15:30:36.437451 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 711:761(50) ack 1 win 257
      15:30:36.437464 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 761 win 257
      15:30:36.947022 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 761:811(50) ack 1 win 257
      15:30:36.947039 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 811 win 257
      15:30:37.447135 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 811:861(50) ack 1 win 257
      15:30:37.447203 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 861 win 257
      15:30:41.448171 IP 127.0.0.1.12000 > 127.0.0.1.56554: F 1:1(0) ack 861 win 257
      15:30:41.448189 IP 127.0.0.1.56554 > 127.0.0.1.12000: R 33789629:33789629(0) win 0
      
      Source of program :
      
      /*
       * small producer/consumer program.
       * setup a listener on 127.0.0.1:12000
       * Forks a child
       *   child connect to 127.0.0.1, and sends 10 bytes on this tcp socket every 100 ms
       * Father accepts connection, and read all data
       */
      #include <sys/types.h>
      #include <sys/socket.h>
      #include <netinet/in.h>
      #include <unistd.h>
      #include <stdio.h>
      #include <time.h>
      #include <sys/poll.h>
      
      int port = 12000;
      char buffer[4096];
      int main(int argc, char *argv[])
      {
              int lfd = socket(AF_INET, SOCK_STREAM, 0);
              struct sockaddr_in socket_address;
              time_t t0, t1;
              int on = 1, sfd, res;
              unsigned long total = 0;
              socklen_t alen = sizeof(socket_address);
              pid_t pid;
      
              time(&t0);
              socket_address.sin_family = AF_INET;
              socket_address.sin_port = htons(port);
              socket_address.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
      
              if (lfd == -1) {
                      perror("socket()");
                      return 1;
              }
              setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int));
              if (bind(lfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) {
                      perror("bind");
                      close(lfd);
                      return 1;
              }
              if (listen(lfd, 1) == -1) {
                      perror("listen()");
                      close(lfd);
                      return 1;
              }
              pid = fork();
              if (pid == 0) {
                      int i, cfd = socket(AF_INET, SOCK_STREAM, 0);
                      close(lfd);
                      if (connect(cfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) {
                              perror("connect()");
                              return 1;
                              }
                      for (i = 0 ; ;) {
                              res = write(cfd, "blablabla\n", 10);
                              if (res > 0) total += res;
                              else if (res == -1) {
                                      perror("write()");
                                      break;
                              } else break;
                              usleep(100000);
                              if (++i == 10) {
                                      system("ss -on dst 127.0.0.1:12000");
                                      i = 0;
                              }
                      }
                      time(&t1);
                      fprintf(stderr, "wrote %lu bytes but was interrupted after %g seconds\n", total, difftime(t1, t0));
                      system("ss -on | grep 127.0.0.1:12000");
                      close(cfd);
                      return 0;
              }
              sfd = accept(lfd, (struct sockaddr *)&socket_address, &alen);
              if (sfd == -1) {
                      perror("accept");
                      return 1;
              }
              close(lfd);
              while (1) {
                      struct pollfd pfd[1];
                      pfd[0].fd = sfd;
                      pfd[0].events = POLLIN;
                      if (poll(pfd, 1, 4000) == 0) {
                              fprintf(stderr, "Exiting read() because no data available (4000 ms timeout).\n");
                              break;
                      }
                      res = read(sfd, buffer, sizeof(buffer));
                      if (res > 0) total += res;
                      else if (res == 0) break;
                      else perror("read()");
              }
              fprintf(stderr, "read %lu bytes\n", total);
              close(sfd);
              return 0;
      }
      ----------------------------------------
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      4b53fb67
    • O
      net: Update entry in af_family_clock_key_strings · b4942af6
      Oliver Hartkopp 提交于
      In the merge phase of the CAN subsystem the 
      af_family_clock_key_strings[] have been added to sock.c in commit 
      443aef0e 
      (lockdep: fixup sk_callback_lock annotation). This trivial patch adds 
      the missing name for address family 29 (AF_CAN).
      Signed-off-by: NOliver Hartkopp <oliver@hartkopp.net>
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      b4942af6
    • D
      netdev: Remove warning from __netif_schedule(). · 5b3ab1db
      David S. Miller 提交于
      It isn't helping anything and we aren't going to be able to change all
      the drivers that do queue wakeups in strange situations.
      
      Just letting a noop_qdisc get scheduled will work because when
      qdisc_run() executes via net_tx_work() it will simply find no packets
      pending when it makes the ->dequeue() call in qdisc_restart.
      Signed-off-by: NDavid S. Miller <davem@davemloft.net>
      5b3ab1db
  4. 23 7月, 2008 12 次提交
  5. 22 7月, 2008 12 次提交