stream.c 7.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 *     SUCS NET3:
 *
 *     Generic stream handling routines. These are generic for most
 *     protocols. Even IP. Tonight 8-).
 *     This is used because TCP, LLC (others too) layer all have mostly
 *     identical sendmsg() and recvmsg() code.
 *     So we (will) share it here.
 *
 *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 *                     (from old tcp.c code)
 *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
 */

#include <linux/module.h>
#include <linux/net.h>
#include <linux/signal.h>
#include <linux/tcp.h>
#include <linux/wait.h>
#include <net/sock.h>

/**
 * sk_stream_write_space - stream socket write_space callback.
24
 * @sk: socket
L
Linus Torvalds 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37
 *
 * FIXME: write proper description
 */
void sk_stream_write_space(struct sock *sk)
{
	struct socket *sock = sk->sk_socket;

	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
		clear_bit(SOCK_NOSPACE, &sock->flags);

		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
			wake_up_interruptible(sk->sk_sleep);
		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
38
			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
L
Linus Torvalds 已提交
39 40 41 42 43 44 45
	}
}

EXPORT_SYMBOL(sk_stream_write_space);

/**
 * sk_stream_wait_connect - Wait for a socket to get into the connected state
46 47
 * @sk: sock to wait on
 * @timeo_p: for how long to wait
L
Linus Torvalds 已提交
48 49 50 51 52 53 54
 *
 * Must be called with the socket locked.
 */
int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
{
	struct task_struct *tsk = current;
	DEFINE_WAIT(wait);
55
	int done;
L
Linus Torvalds 已提交
56

57
	do {
58 59 60
		int err = sock_error(sk);
		if (err)
			return err;
L
Linus Torvalds 已提交
61 62 63 64 65 66 67 68 69
		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
			return -EPIPE;
		if (!*timeo_p)
			return -EAGAIN;
		if (signal_pending(tsk))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
		sk->sk_write_pending++;
70
		done = sk_wait_event(sk, timeo_p,
71
				     !sk->sk_err &&
72
				     !((1 << sk->sk_state) &
73
				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
L
Linus Torvalds 已提交
74 75
		finish_wait(sk->sk_sleep, &wait);
		sk->sk_write_pending--;
76
	} while (!done);
L
Linus Torvalds 已提交
77 78 79 80 81 82 83
	return 0;
}

EXPORT_SYMBOL(sk_stream_wait_connect);

/**
 * sk_stream_closing - Return 1 if we still have things to send in our buffers.
84
 * @sk: socket to verify
L
Linus Torvalds 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
 */
static inline int sk_stream_closing(struct sock *sk)
{
	return (1 << sk->sk_state) &
	       (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
}

void sk_stream_wait_close(struct sock *sk, long timeout)
{
	if (timeout) {
		DEFINE_WAIT(wait);

		do {
			prepare_to_wait(sk->sk_sleep, &wait,
					TASK_INTERRUPTIBLE);
			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
				break;
		} while (!signal_pending(current) && timeout);

		finish_wait(sk->sk_sleep, &wait);
	}
}

EXPORT_SYMBOL(sk_stream_wait_close);

/**
 * sk_stream_wait_memory - Wait for more memory for a socket
112 113
 * @sk: socket to wait for memory
 * @timeo_p: for how long
L
Linus Torvalds 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
 */
int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
{
	int err = 0;
	long vm_wait = 0;
	long current_timeo = *timeo_p;
	DEFINE_WAIT(wait);

	if (sk_stream_memory_free(sk))
		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;

	while (1) {
		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);

		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);

		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
			goto do_error;
		if (!*timeo_p)
			goto do_nonblock;
		if (signal_pending(current))
			goto do_interrupted;
		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
		if (sk_stream_memory_free(sk) && !vm_wait)
			break;

		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
		sk->sk_write_pending++;
142
		sk_wait_event(sk, &current_timeo, !sk->sk_err &&
143 144
						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
						  sk_stream_memory_free(sk) &&
L
Linus Torvalds 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
						  vm_wait);
		sk->sk_write_pending--;

		if (vm_wait) {
			vm_wait -= current_timeo;
			current_timeo = *timeo_p;
			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
			    (current_timeo -= vm_wait) < 0)
				current_timeo = 0;
			vm_wait = 0;
		}
		*timeo_p = current_timeo;
	}
out:
	finish_wait(sk->sk_sleep, &wait);
	return err;

do_error:
	err = -EPIPE;
	goto out;
do_nonblock:
	err = -EAGAIN;
	goto out;
do_interrupted:
	err = sock_intr_errno(*timeo_p);
	goto out;
}

EXPORT_SYMBOL(sk_stream_wait_memory);

void sk_stream_rfree(struct sk_buff *skb)
{
	struct sock *sk = skb->sk;

179
	skb_truesize_check(skb);
L
Linus Torvalds 已提交
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
	sk->sk_forward_alloc += skb->truesize;
}

EXPORT_SYMBOL(sk_stream_rfree);

int sk_stream_error(struct sock *sk, int flags, int err)
{
	if (err == -EPIPE)
		err = sock_error(sk) ? : -EPIPE;
	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
		send_sig(SIGPIPE, current, 0);
	return err;
}

EXPORT_SYMBOL(sk_stream_error);

void __sk_stream_mem_reclaim(struct sock *sk)
{
199
	atomic_sub(sk->sk_forward_alloc >> SK_STREAM_MEM_QUANTUM_SHIFT,
200 201 202 203 204 205
		   sk->sk_prot->memory_allocated);
	sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
	if (*sk->sk_prot->memory_pressure &&
	    (atomic_read(sk->sk_prot->memory_allocated) <
	     sk->sk_prot->sysctl_mem[0]))
		*sk->sk_prot->memory_pressure = 0;
L
Linus Torvalds 已提交
206 207 208 209 210 211 212
}

EXPORT_SYMBOL(__sk_stream_mem_reclaim);

int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
{
	int amt = sk_stream_pages(size);
213
	struct proto *prot = sk->sk_prot;
L
Linus Torvalds 已提交
214 215

	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
216
	atomic_add(amt, prot->memory_allocated);
L
Linus Torvalds 已提交
217 218

	/* Under limit. */
219 220 221
	if (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]) {
		if (*prot->memory_pressure)
			*prot->memory_pressure = 0;
L
Linus Torvalds 已提交
222 223 224 225
		return 1;
	}

	/* Over hard limit. */
226 227
	if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[2]) {
		prot->enter_memory_pressure();
L
Linus Torvalds 已提交
228 229 230 231
		goto suppress_allocation;
	}

	/* Under pressure. */
232 233
	if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[1])
		prot->enter_memory_pressure();
L
Linus Torvalds 已提交
234 235

	if (kind) {
236
		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
L
Linus Torvalds 已提交
237
			return 1;
238
	} else if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
L
Linus Torvalds 已提交
239 240
		return 1;

241 242
	if (!*prot->memory_pressure ||
	    prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
L
Linus Torvalds 已提交
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
				sk_stream_pages(sk->sk_wmem_queued +
						atomic_read(&sk->sk_rmem_alloc) +
						sk->sk_forward_alloc))
		return 1;

suppress_allocation:

	if (!kind) {
		sk_stream_moderate_sndbuf(sk);

		/* Fail only if socket is _under_ its sndbuf.
		 * In this case we cannot block, so that we have to fail.
		 */
		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
			return 1;
	}

	/* Alas. Undo changes. */
	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
262
	atomic_sub(amt, prot->memory_allocated);
L
Linus Torvalds 已提交
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	return 0;
}

EXPORT_SYMBOL(sk_stream_mem_schedule);

void sk_stream_kill_queues(struct sock *sk)
{
	/* First the read buffer. */
	__skb_queue_purge(&sk->sk_receive_queue);

	/* Next, the error queue. */
	__skb_queue_purge(&sk->sk_error_queue);

	/* Next, the write queue. */
	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));

	/* Account for returned memory. */
	sk_stream_mem_reclaim(sk);

	BUG_TRAP(!sk->sk_wmem_queued);
	BUG_TRAP(!sk->sk_forward_alloc);

	/* It is _impossible_ for the backlog to contain anything
	 * when we get here.  All user references to this socket
	 * have gone away, only the net layer knows can touch it.
	 */
}

EXPORT_SYMBOL(sk_stream_kill_queues);