socket.c 23.3 KB
Newer Older
B
bellard 已提交
1 2
/*
 * Copyright (c) 1995 Danny Gasparovski.
3 4
 *
 * Please read the file COPYRIGHT for the
B
bellard 已提交
5 6 7
 * terms and conditions of the copyright.
 */

P
Peter Maydell 已提交
8
#include "qemu/osdep.h"
9
#include "qemu-common.h"
10
#include "slirp.h"
B
bellard 已提交
11
#include "ip_icmp.h"
B
bellard 已提交
12 13 14
#ifdef __sun__
#include <sys/filio.h>
#endif
B
bellard 已提交
15

16 17 18
static void sofcantrcvmore(struct socket *so);
static void sofcantsendmore(struct socket *so);

19 20
struct socket *solookup(struct socket **last, struct socket *head,
        struct sockaddr_storage *lhost, struct sockaddr_storage *fhost)
B
bellard 已提交
21
{
22
    struct socket *so = *last;
23

24
    /* Optimisation */
25 26
    if (so != head && sockaddr_equal(&(so->lhost.ss), lhost)
            && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
27 28
        return so;
    }
29

30
    for (so = head->so_next; so != head; so = so->so_next) {
31 32
        if (sockaddr_equal(&(so->lhost.ss), lhost)
                && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
33 34 35 36
            *last = so;
            return so;
        }
    }
37

38
    return (struct socket *)NULL;
B
bellard 已提交
39 40 41 42 43 44 45 46
}

/*
 * Create a new socket, initialise the fields
 * It is the responsibility of the caller to
 * insque() it into the correct linked-list
 */
struct socket *
47
socreate(Slirp *slirp)
B
bellard 已提交
48 49
{
  struct socket *so;
50

B
bellard 已提交
51 52 53 54 55
  so = (struct socket *)malloc(sizeof(struct socket));
  if(so) {
    memset(so, 0, sizeof(struct socket));
    so->so_state = SS_NOFDREF;
    so->s = -1;
56
    so->slirp = slirp;
57
    so->pollfds_idx = -1;
B
bellard 已提交
58 59 60 61
  }
  return(so);
}

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
/*
 * Remove references to so from the given message queue.
 */
static void
soqfree(struct socket *so, struct quehead *qh)
{
    struct mbuf *ifq;

    for (ifq = (struct mbuf *) qh->qh_link;
             (struct quehead *) ifq != qh;
             ifq = ifq->ifq_next) {
        if (ifq->ifq_so == so) {
            struct mbuf *ifm;
            ifq->ifq_so = NULL;
            for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) {
                ifm->ifq_so = NULL;
            }
        }
    }
}

B
bellard 已提交
83 84 85 86
/*
 * remque and free a socket, clobber cache
 */
void
87
sofree(struct socket *so)
B
bellard 已提交
88
{
89
  Slirp *slirp = so->slirp;
90

91 92
  soqfree(so, &slirp->if_fastq);
  soqfree(so, &slirp->if_batchq);
93

B
bellard 已提交
94 95 96 97
  if (so->so_emu==EMU_RSH && so->extra) {
	sofree(so->extra);
	so->extra=NULL;
  }
98 99 100 101
  if (so == slirp->tcp_last_so) {
      slirp->tcp_last_so = &slirp->tcb;
  } else if (so == slirp->udp_last_so) {
      slirp->udp_last_so = &slirp->udb;
102 103
  } else if (so == slirp->icmp_last_so) {
      slirp->icmp_last_so = &slirp->icmp;
104
  }
B
bellard 已提交
105
  m_free(so->so_m);
106 107

  if(so->so_next && so->so_prev)
B
bellard 已提交
108 109
    remque(so);  /* crashes if so is not in a queue */

M
Marc-André Lureau 已提交
110 111 112
  if (so->so_tcpcb) {
      free(so->so_tcpcb);
  }
B
bellard 已提交
113 114 115
  free(so);
}

116
size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np)
B
bellard 已提交
117
{
118
	int n, lss, total;
B
bellard 已提交
119 120 121
	struct sbuf *sb = &so->so_snd;
	int len = sb->sb_datalen - sb->sb_cc;
	int mss = so->so_tcpcb->t_maxseg;
122

123
	DEBUG_CALL("sopreprbuf");
124
	DEBUG_ARG("so = %p", so);
125

126 127 128
	if (len <= 0)
		return 0;

B
bellard 已提交
129
	iov[0].iov_base = sb->sb_wptr;
130 131
        iov[1].iov_base = NULL;
        iov[1].iov_len = 0;
B
bellard 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
	if (sb->sb_wptr < sb->sb_rptr) {
		iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len)
		   iov[0].iov_len = len;
		if (iov[0].iov_len > mss)
		   iov[0].iov_len -= iov[0].iov_len%mss;
		n = 1;
	} else {
		iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		len -= iov[0].iov_len;
		if (len) {
			iov[1].iov_base = sb->sb_data;
			iov[1].iov_len = sb->sb_rptr - sb->sb_data;
			if(iov[1].iov_len > len)
			   iov[1].iov_len = len;
			total = iov[0].iov_len + iov[1].iov_len;
			if (total > mss) {
				lss = total%mss;
				if (iov[1].iov_len > lss) {
					iov[1].iov_len -= lss;
					n = 2;
				} else {
					lss -= iov[1].iov_len;
					iov[0].iov_len -= lss;
					n = 1;
				}
			} else
				n = 2;
		} else {
			if (iov[0].iov_len > mss)
			   iov[0].iov_len -= iov[0].iov_len%mss;
			n = 1;
		}
	}
169 170 171 172 173 174 175 176 177 178 179 180
	if (np)
		*np = n;

	return iov[0].iov_len + (n - 1) * iov[1].iov_len;
}

/*
 * Read from so's socket into sb_snd, updating all relevant sbuf fields
 * NOTE: This will only be called if it is select()ed for reading, so
 * a read() of 0 (or less) means it's disconnected
 */
int
181
soread(struct socket *so)
182 183 184 185 186 187
{
	int n, nn;
	struct sbuf *sb = &so->so_snd;
	struct iovec iov[2];

	DEBUG_CALL("soread");
188
	DEBUG_ARG("so = %p", so);
189 190 191 192 193 194

	/*
	 * No need to check if there's enough room to read.
	 * soread wouldn't have been called if there weren't
	 */
	sopreprbuf(so, iov, &n);
195

B
bellard 已提交
196 197 198 199
#ifdef HAVE_READV
	nn = readv(so->s, (struct iovec *)iov, n);
	DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
#else
B
Blue Swirl 已提交
200
	nn = qemu_recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
201
#endif
B
bellard 已提交
202 203 204 205
	if (nn <= 0) {
		if (nn < 0 && (errno == EINTR || errno == EAGAIN))
			return 0;
		else {
206
			int err;
207 208 209 210
			socklen_t elen = sizeof err;
			struct sockaddr_storage addr;
			struct sockaddr *paddr = (struct sockaddr *) &addr;
			socklen_t alen = sizeof addr;
211 212 213

			err = errno;
			if (nn == 0) {
214 215 216 217 218 219
				if (getpeername(so->s, paddr, &alen) < 0) {
					err = errno;
				} else {
					getsockopt(so->s, SOL_SOCKET, SO_ERROR,
						&err, &elen);
				}
220 221
			}

B
bellard 已提交
222 223
			DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno)));
			sofcantrcvmore(so);
224

225
			if (err == ECONNRESET || err == ECONNREFUSED
226 227 228 229 230
			    || err == ENOTCONN || err == EPIPE) {
				tcp_drop(sototcpcb(so), err);
			} else {
				tcp_sockclosed(sototcpcb(so));
			}
B
bellard 已提交
231 232 233
			return -1;
		}
	}
234

B
bellard 已提交
235 236 237 238 239 240 241 242
#ifndef HAVE_READV
	/*
	 * If there was no error, try and read the second time round
	 * We read again if n = 2 (ie, there's another part of the buffer)
	 * and we read as much as we could in the first read
	 * We don't test for <= 0 this time, because there legitimately
	 * might not be any more data (since the socket is non-blocking),
	 * a close will be detected on next iteration.
S
Stefan Weil 已提交
243
	 * A return of -1 won't (shouldn't) happen, since it didn't happen above
B
bellard 已提交
244
	 */
B
bellard 已提交
245 246
	if (n == 2 && nn == iov[0].iov_len) {
            int ret;
B
Blue Swirl 已提交
247
            ret = qemu_recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
B
bellard 已提交
248 249 250
            if (ret > 0)
                nn += ret;
        }
251

B
bellard 已提交
252 253
	DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
#endif
254

B
bellard 已提交
255 256 257 258 259 260 261
	/* Update fields */
	sb->sb_cc += nn;
	sb->sb_wptr += nn;
	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_wptr -= sb->sb_datalen;
	return nn;
}
262

263 264 265 266 267 268 269
int soreadbuf(struct socket *so, const char *buf, int size)
{
    int n, nn, copy = size;
	struct sbuf *sb = &so->so_snd;
	struct iovec iov[2];

	DEBUG_CALL("soreadbuf");
270
	DEBUG_ARG("so = %p", so);
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304

	/*
	 * No need to check if there's enough room to read.
	 * soread wouldn't have been called if there weren't
	 */
	if (sopreprbuf(so, iov, &n) < size)
        goto err;

    nn = MIN(iov[0].iov_len, copy);
    memcpy(iov[0].iov_base, buf, nn);

    copy -= nn;
    buf += nn;

    if (copy == 0)
        goto done;

    memcpy(iov[1].iov_base, buf, copy);

done:
    /* Update fields */
	sb->sb_cc += size;
	sb->sb_wptr += size;
	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_wptr -= sb->sb_datalen;
    return size;
err:

    sofcantrcvmore(so);
    tcp_sockclosed(sototcpcb(so));
    fprintf(stderr, "soreadbuf buffer to small");
    return -1;
}

B
bellard 已提交
305 306
/*
 * Get urgent data
307
 *
B
bellard 已提交
308 309 310 311
 * When the socket is created, we set it SO_OOBINLINE,
 * so when OOB data arrives, we soread() it and everything
 * in the send buffer is sent as urgent data
 */
312
int
313
sorecvoob(struct socket *so)
B
bellard 已提交
314 315
{
	struct tcpcb *tp = sototcpcb(so);
316
	int ret;
B
bellard 已提交
317 318

	DEBUG_CALL("sorecvoob");
319
	DEBUG_ARG("so = %p", so);
320

B
bellard 已提交
321 322 323 324 325
	/*
	 * We take a guess at how much urgent data has arrived.
	 * In most situations, when urgent data arrives, the next
	 * read() should get all the urgent data.  This guess will
	 * be wrong however if more data arrives just after the
326
	 * urgent data, or the read() doesn't return all the
B
bellard 已提交
327 328
	 * urgent data.
	 */
329 330 331 332 333 334 335 336 337
	ret = soread(so);
	if (ret > 0) {
	    tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
	    tp->t_force = 1;
	    tcp_output(tp);
	    tp->t_force = 0;
	}

	return ret;
B
bellard 已提交
338 339 340 341 342 343 344
}

/*
 * Send urgent data
 * There's a lot duplicated code here, but...
 */
int
345
sosendoob(struct socket *so)
B
bellard 已提交
346 347 348
{
	struct sbuf *sb = &so->so_rcv;
	char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
349

350
	int n;
351

B
bellard 已提交
352
	DEBUG_CALL("sosendoob");
353
	DEBUG_ARG("so = %p", so);
B
bellard 已提交
354
	DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
355

B
bellard 已提交
356 357
	if (so->so_urgc > 2048)
	   so->so_urgc = 2048; /* XXXX */
358

B
bellard 已提交
359 360
	if (sb->sb_rptr < sb->sb_wptr) {
		/* We can send it directly */
361
		n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
B
bellard 已提交
362
	} else {
363
		/*
B
bellard 已提交
364 365 366 367
		 * Since there's no sendv or sendtov like writev,
		 * we must copy all data to a linear buffer then
		 * send it all
		 */
368
		uint32_t urgc = so->so_urgc;
369
		int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
370 371 372
		if (len > urgc) {
			len = urgc;
		}
B
bellard 已提交
373
		memcpy(buff, sb->sb_rptr, len);
374 375
		urgc -= len;
		if (urgc) {
B
bellard 已提交
376
			n = sb->sb_wptr - sb->sb_data;
377 378 379
			if (n > urgc) {
				n = urgc;
			}
B
bellard 已提交
380 381 382
			memcpy((buff + len), sb->sb_data, n);
			len += n;
		}
383
		n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
B
bellard 已提交
384
#ifdef DEBUG
385 386 387
		if (n != len) {
			DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
		}
388
#endif
389 390
	}

391 392
	if (n < 0) {
		return n;
B
bellard 已提交
393
	}
394 395
	so->so_urgc -= n;
	DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
396

B
bellard 已提交
397 398 399 400
	sb->sb_cc -= n;
	sb->sb_rptr += n;
	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_rptr -= sb->sb_datalen;
401

B
bellard 已提交
402 403 404 405
	return n;
}

/*
406
 * Write data from so_rcv to so's socket,
B
bellard 已提交
407 408 409
 * updating all sbuf field as necessary
 */
int
410
sowrite(struct socket *so)
B
bellard 已提交
411 412 413 414 415
{
	int  n,nn;
	struct sbuf *sb = &so->so_rcv;
	int len = sb->sb_cc;
	struct iovec iov[2];
416

B
bellard 已提交
417
	DEBUG_CALL("sowrite");
418
	DEBUG_ARG("so = %p", so);
419

B
bellard 已提交
420
	if (so->so_urgc) {
421 422 423 424 425 426 427 428 429
		uint32_t expected = so->so_urgc;
		if (sosendoob(so) < expected) {
			/* Treat a short write as a fatal error too,
			 * rather than continuing on and sending the urgent
			 * data as if it were non-urgent and leaving the
			 * so_urgc count wrong.
			 */
			goto err_disconnected;
		}
B
bellard 已提交
430 431 432 433 434 435 436 437
		if (sb->sb_cc == 0)
			return 0;
	}

	/*
	 * No need to check if there's something to write,
	 * sowrite wouldn't have been called otherwise
	 */
438

B
bellard 已提交
439
	iov[0].iov_base = sb->sb_rptr;
440 441
        iov[1].iov_base = NULL;
        iov[1].iov_len = 0;
B
bellard 已提交
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
	if (sb->sb_rptr < sb->sb_wptr) {
		iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		n = 1;
	} else {
		iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		len -= iov[0].iov_len;
		if (len) {
			iov[1].iov_base = sb->sb_data;
			iov[1].iov_len = sb->sb_wptr - sb->sb_data;
			if (iov[1].iov_len > len) iov[1].iov_len = len;
			n = 2;
		} else
			n = 1;
	}
	/* Check if there's urgent data to send, and if so, send it */

#ifdef HAVE_READV
	nn = writev(so->s, (const struct iovec *)iov, n);
463

B
bellard 已提交
464 465
	DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
#else
466
	nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0);
B
bellard 已提交
467 468 469 470
#endif
	/* This should never happen, but people tell me it does *shrug* */
	if (nn < 0 && (errno == EAGAIN || errno == EINTR))
		return 0;
471

B
bellard 已提交
472
	if (nn <= 0) {
473
		goto err_disconnected;
B
bellard 已提交
474
	}
475

B
bellard 已提交
476
#ifndef HAVE_READV
B
bellard 已提交
477 478
	if (n == 2 && nn == iov[0].iov_len) {
            int ret;
479
            ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0);
B
bellard 已提交
480 481 482
            if (ret > 0)
                nn += ret;
        }
B
bellard 已提交
483 484
        DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
#endif
485

B
bellard 已提交
486 487 488 489 490
	/* Update sbuf */
	sb->sb_cc -= nn;
	sb->sb_rptr += nn;
	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_rptr -= sb->sb_datalen;
491

B
bellard 已提交
492 493 494 495 496 497
	/*
	 * If in DRAIN mode, and there's no more data, set
	 * it CANTSENDMORE
	 */
	if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
		sofcantsendmore(so);
498

B
bellard 已提交
499
	return nn;
500 501 502 503 504 505 506

err_disconnected:
	DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
		    so->so_state, errno));
	sofcantsendmore(so);
	tcp_sockclosed(sototcpcb(so));
	return -1;
B
bellard 已提交
507 508 509 510 511 512
}

/*
 * recvfrom() a UDP socket
 */
void
513
sorecvfrom(struct socket *so)
B
bellard 已提交
514
{
515
	struct sockaddr_storage addr;
516
	struct sockaddr_storage saddr, daddr;
517
	socklen_t addrlen = sizeof(struct sockaddr_storage);
518

B
bellard 已提交
519
	DEBUG_CALL("sorecvfrom");
520
	DEBUG_ARG("so = %p", so);
521

B
bellard 已提交
522 523 524
	if (so->so_type == IPPROTO_ICMP) {   /* This is a "ping" reply */
	  char buff[256];
	  int len;
525

526
	  len = recvfrom(so->s, buff, 256, 0,
B
bellard 已提交
527 528
			 (struct sockaddr *)&addr, &addrlen);
	  /* XXX Check if reply is "correct"? */
529

B
bellard 已提交
530 531 532 533 534
	  if(len == -1 || len == 0) {
	    u_char code=ICMP_UNREACH_PORT;

	    if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
	    else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET;
535

B
bellard 已提交
536 537
	    DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
			errno,strerror(errno)));
Y
Yann Bordenave 已提交
538
	    icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
B
bellard 已提交
539 540
	  } else {
	    icmp_reflect(so->so_m);
541
            so->so_m = NULL; /* Don't m_free() it again! */
B
bellard 已提交
542 543 544 545 546
	  }
	  /* No need for this socket anymore, udp_detach it */
	  udp_detach(so);
	} else {                            	/* A "normal" UDP packet */
	  struct mbuf *m;
B
Blue Swirl 已提交
547 548 549 550 551 552
          int len;
#ifdef _WIN32
          unsigned long n;
#else
          int n;
#endif
B
bellard 已提交
553

554 555 556 557
	  m = m_get(so->slirp);
	  if (!m) {
	      return;
	  }
558 559 560 561 562 563 564 565 566 567 568 569
	  switch (so->so_ffamily) {
	  case AF_INET:
	      m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr);
	      break;
	  case AF_INET6:
	      m->m_data += IF_MAXLINKHDR + sizeof(struct ip6)
	                                 + sizeof(struct udphdr);
	      break;
	  default:
	      g_assert_not_reached();
	      break;
	  }
570

571
	  /*
B
bellard 已提交
572 573 574 575 576
	   * XXX Shouldn't FIONREAD packets destined for port 53,
	   * but I don't know the max packet size for DNS lookups
	   */
	  len = M_FREEROOM(m);
	  /* if (so->so_fport != htons(53)) { */
B
bellard 已提交
577
	  ioctlsocket(so->s, FIONREAD, &n);
578

B
bellard 已提交
579 580 581 582 583 584
	  if (n > len) {
	    n = (m->m_data - m->m_dat) + m->m_len + n + 1;
	    m_inc(m, n);
	    len = M_FREEROOM(m);
	  }
	  /* } */
585

B
bellard 已提交
586 587
	  m->m_len = recvfrom(so->s, m->m_data, len, 0,
			      (struct sockaddr *)&addr, &addrlen);
588
	  DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
B
bellard 已提交
589 590
		      m->m_len, errno,strerror(errno)));
	  if(m->m_len<0) {
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
	    /* Report error as ICMP */
	    switch (so->so_lfamily) {
	    uint8_t code;
	    case AF_INET:
	      code = ICMP_UNREACH_PORT;

	      if (errno == EHOSTUNREACH) {
		code = ICMP_UNREACH_HOST;
	      } else if (errno == ENETUNREACH) {
		code = ICMP_UNREACH_NET;
	      }

	      DEBUG_MISC((dfd, " rx error, tx icmp ICMP_UNREACH:%i\n", code));
	      icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
	      break;
	    case AF_INET6:
	      code = ICMP6_UNREACH_PORT;

	      if (errno == EHOSTUNREACH) {
		code = ICMP6_UNREACH_ADDRESS;
	      } else if (errno == ENETUNREACH) {
		code = ICMP6_UNREACH_NO_ROUTE;
	      }

	      DEBUG_MISC((dfd, " rx error, tx icmp6 ICMP_UNREACH:%i\n", code));
	      icmp6_send_error(so->so_m, ICMP6_UNREACH, code);
	      break;
	    default:
	      g_assert_not_reached();
	      break;
	    }
B
bellard 已提交
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
	    m_free(m);
	  } else {
	  /*
	   * Hack: domain name lookup will be used the most for UDP,
	   * and since they'll only be used once there's no need
	   * for the 4 minute (or whatever) timeout... So we time them
	   * out much quicker (10 seconds  for now...)
	   */
	    if (so->so_expire) {
	      if (so->so_fport == htons(53))
		so->so_expire = curtime + SO_EXPIREFAST;
	      else
		so->so_expire = curtime + SO_EXPIRE;
	    }

637
	    /*
B
bellard 已提交
638
	     * If this packet was destined for CTL_ADDR,
639
	     * make it look like that's where it came from
B
bellard 已提交
640
	     */
641 642 643 644
	    saddr = addr;
	    sotranslate_in(so, &saddr);
	    daddr = so->lhost.ss;

645 646
	    switch (so->so_ffamily) {
	    case AF_INET:
647 648 649
	        udp_output(so, m, (struct sockaddr_in *) &saddr,
	                   (struct sockaddr_in *) &daddr,
	                   so->so_iptos);
650
	        break;
651 652 653 654
	    case AF_INET6:
	        udp6_output(so, m, (struct sockaddr_in6 *) &saddr,
	                    (struct sockaddr_in6 *) &daddr);
	        break;
655
	    default:
656
	        g_assert_not_reached();
657 658
	        break;
	    }
B
bellard 已提交
659 660 661 662 663 664 665 666
	  } /* rx error */
	} /* if ping packet */
}

/*
 * sendto() a socket
 */
int
667
sosendto(struct socket *so, struct mbuf *m)
B
bellard 已提交
668 669
{
	int ret;
670
	struct sockaddr_storage addr;
B
bellard 已提交
671 672

	DEBUG_CALL("sosendto");
673 674
	DEBUG_ARG("so = %p", so);
	DEBUG_ARG("m = %p", m);
675

676 677 678
	addr = so->fhost.ss;
	DEBUG_CALL(" sendto()ing)");
	sotranslate_out(so, &addr);
679

B
bellard 已提交
680 681
	/* Don't care what port we get */
	ret = sendto(so->s, m->m_data, m->m_len, 0,
682
		     (struct sockaddr *)&addr, sockaddr_size(&addr));
B
bellard 已提交
683 684
	if (ret < 0)
		return -1;
685

B
bellard 已提交
686 687 688 689 690 691
	/*
	 * Kill the socket if there's no reply in 4 minutes,
	 * but only if it's an expirable socket
	 */
	if (so->so_expire)
		so->so_expire = curtime + SO_EXPIRE;
692 693
	so->so_state &= SS_PERSISTENT_MASK;
	so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */
B
bellard 已提交
694 695 696 697
	return 0;
}

/*
698
 * Listen for incoming TCP connections
B
bellard 已提交
699 700
 */
struct socket *
701
tcp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr,
702
           u_int lport, int flags)
B
bellard 已提交
703 704 705
{
	struct sockaddr_in addr;
	struct socket *so;
706 707
	int s, opt = 1;
	socklen_t addrlen = sizeof(addr);
708
	memset(&addr, 0, addrlen);
B
bellard 已提交
709

710
	DEBUG_CALL("tcp_listen");
711
	DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){.s_addr = haddr}));
712
	DEBUG_ARG("hport = %d", ntohs(hport));
713
	DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){.s_addr = laddr}));
714
	DEBUG_ARG("lport = %d", ntohs(lport));
B
bellard 已提交
715
	DEBUG_ARG("flags = %x", flags);
716

717 718
	so = socreate(slirp);
	if (!so) {
B
bellard 已提交
719 720
	  return NULL;
	}
721

B
bellard 已提交
722 723 724 725 726
	/* Don't tcp_attach... we don't need so_snd nor so_rcv */
	if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) {
		free(so);
		return NULL;
	}
727
	insque(so, &slirp->tcb);
728 729

	/*
B
bellard 已提交
730 731 732 733
	 * SS_FACCEPTONCE sockets must time out.
	 */
	if (flags & SS_FACCEPTONCE)
	   so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
734

735 736
	so->so_state &= SS_PERSISTENT_MASK;
	so->so_state |= (SS_FACCEPTCONN | flags);
737
	so->so_lfamily = AF_INET;
B
bellard 已提交
738 739
	so->so_lport = lport; /* Kept in network format */
	so->so_laddr.s_addr = laddr; /* Ditto */
740

B
bellard 已提交
741
	addr.sin_family = AF_INET;
742 743
	addr.sin_addr.s_addr = haddr;
	addr.sin_port = hport;
744

K
Kevin Wolf 已提交
745
	if (((s = qemu_socket(AF_INET,SOCK_STREAM,0)) < 0) ||
746
	    (socket_set_fast_reuse(s) < 0) ||
B
bellard 已提交
747 748 749
	    (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) ||
	    (listen(s,1) < 0)) {
		int tmperrno = errno; /* Don't clobber the real reason we failed */
750

751 752 753
                if (s >= 0) {
                    closesocket(s);
                }
B
bellard 已提交
754 755
		sofree(so);
		/* Restore the real errno */
B
bellard 已提交
756 757 758
#ifdef _WIN32
		WSASetLastError(tmperrno);
#else
B
bellard 已提交
759
		errno = tmperrno;
B
bellard 已提交
760
#endif
B
bellard 已提交
761 762
		return NULL;
	}
763
	qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int));
764 765
	opt = 1;
	qemu_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int));
766

B
bellard 已提交
767
	getsockname(s,(struct sockaddr *)&addr,&addrlen);
768
	so->so_ffamily = AF_INET;
B
bellard 已提交
769 770
	so->so_fport = addr.sin_port;
	if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
771
	   so->so_faddr = slirp->vhost_addr;
B
bellard 已提交
772 773 774 775 776 777 778 779 780 781 782 783 784 785
	else
	   so->so_faddr = addr.sin_addr;

	so->s = s;
	return so;
}

/*
 * Various session state calls
 * XXX Should be #define's
 * The socket state stuff needs work, these often get call 2 or 3
 * times each when only 1 was needed
 */
void
786
soisfconnecting(struct socket *so)
B
bellard 已提交
787 788 789 790 791 792 793
{
	so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
			  SS_FCANTSENDMORE|SS_FWDRAIN);
	so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
}

void
794
soisfconnected(struct socket *so)
B
bellard 已提交
795 796 797 798 799
{
	so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
	so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
}

800 801
static void
sofcantrcvmore(struct socket *so)
B
bellard 已提交
802 803 804 805 806
{
	if ((so->so_state & SS_NOFDREF) == 0) {
		shutdown(so->s,0);
	}
	so->so_state &= ~(SS_ISFCONNECTING);
807 808 809 810
	if (so->so_state & SS_FCANTSENDMORE) {
	   so->so_state &= SS_PERSISTENT_MASK;
	   so->so_state |= SS_NOFDREF; /* Don't select it */
	} else {
B
bellard 已提交
811
	   so->so_state |= SS_FCANTRCVMORE;
812
	}
B
bellard 已提交
813 814
}

815 816
static void
sofcantsendmore(struct socket *so)
B
bellard 已提交
817 818
{
	if ((so->so_state & SS_NOFDREF) == 0) {
B
bellard 已提交
819
            shutdown(so->s,1);           /* send FIN to fhost */
B
bellard 已提交
820 821
	}
	so->so_state &= ~(SS_ISFCONNECTING);
822 823 824 825
	if (so->so_state & SS_FCANTRCVMORE) {
	   so->so_state &= SS_PERSISTENT_MASK;
	   so->so_state |= SS_NOFDREF; /* as above */
	} else {
B
bellard 已提交
826
	   so->so_state |= SS_FCANTSENDMORE;
827
	}
B
bellard 已提交
828 829 830 831 832 833 834
}

/*
 * Set write drain mode
 * Set CANTSENDMORE once all data has been write()n
 */
void
835
sofwdrain(struct socket *so)
B
bellard 已提交
836 837 838 839 840 841
{
	if (so->so_rcv.sb_cc)
		so->so_state |= SS_FWDRAIN;
	else
		sofcantsendmore(so);
}
842 843 844 845 846 847 848 849

/*
 * Translate addr in host addr when it is a virtual address
 */
void sotranslate_out(struct socket *so, struct sockaddr_storage *addr)
{
    Slirp *slirp = so->slirp;
    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
850
    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870

    switch (addr->ss_family) {
    case AF_INET:
        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
                slirp->vnetwork_addr.s_addr) {
            /* It's an alias */
            if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) {
                if (get_dns_addr(&sin->sin_addr) < 0) {
                    sin->sin_addr = loopback_addr;
                }
            } else {
                sin->sin_addr = loopback_addr;
            }
        }

        DEBUG_MISC((dfd, " addr.sin_port=%d, "
            "addr.sin_addr.s_addr=%.16s\n",
            ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)));
        break;

871 872 873 874
    case AF_INET6:
        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
                    slirp->vprefix_len)) {
            if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) {
875 876 877 878
                uint32_t scope_id;
                if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) {
                    sin6->sin6_scope_id = scope_id;
                } else {
879
                    sin6->sin6_addr = in6addr_loopback;
S
Samuel Thibault 已提交
880
                }
881 882 883 884 885 886
            } else {
                sin6->sin6_addr = in6addr_loopback;
            }
        }
        break;

887 888 889 890 891 892 893 894 895
    default:
        break;
    }
}

void sotranslate_in(struct socket *so, struct sockaddr_storage *addr)
{
    Slirp *slirp = so->slirp;
    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
896
    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912

    switch (addr->ss_family) {
    case AF_INET:
        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
            slirp->vnetwork_addr.s_addr) {
            uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr;

            if ((so->so_faddr.s_addr & inv_mask) == inv_mask) {
                sin->sin_addr = slirp->vhost_addr;
            } else if (sin->sin_addr.s_addr == loopback_addr.s_addr ||
                       so->so_faddr.s_addr != slirp->vhost_addr.s_addr) {
                sin->sin_addr = so->so_faddr;
            }
        }
        break;

913 914 915 916 917 918 919 920 921 922
    case AF_INET6:
        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
                    slirp->vprefix_len)) {
            if (in6_equal(&sin6->sin6_addr, &in6addr_loopback)
                    || !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) {
                sin6->sin6_addr = so->so_faddr6;
            }
        }
        break;

923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943
    default:
        break;
    }
}

/*
 * Translate connections from localhost to the real hostname
 */
void sotranslate_accept(struct socket *so)
{
    Slirp *slirp = so->slirp;

    switch (so->so_ffamily) {
    case AF_INET:
        if (so->so_faddr.s_addr == INADDR_ANY ||
            (so->so_faddr.s_addr & loopback_mask) ==
            (loopback_addr.s_addr & loopback_mask)) {
           so->so_faddr = slirp->vhost_addr;
        }
        break;

944 945 946 947 948 949 950
   case AF_INET6:
        if (in6_equal(&so->so_faddr6, &in6addr_any) ||
                in6_equal(&so->so_faddr6, &in6addr_loopback)) {
           so->so_faddr6 = slirp->vhost_addr6;
        }
        break;

951 952 953 954
    default:
        break;
    }
}