socket.c 22.4 KB
Newer Older
B
bellard 已提交
1 2
/*
 * Copyright (c) 1995 Danny Gasparovski.
3 4
 *
 * Please read the file COPYRIGHT for the
B
bellard 已提交
5 6 7
 * terms and conditions of the copyright.
 */

P
Peter Maydell 已提交
8
#include "qemu/osdep.h"
9
#include "qemu-common.h"
10
#include "slirp.h"
B
bellard 已提交
11
#include "ip_icmp.h"
B
bellard 已提交
12 13 14
#ifdef __sun__
#include <sys/filio.h>
#endif
B
bellard 已提交
15

16 17 18
static void sofcantrcvmore(struct socket *so);
static void sofcantsendmore(struct socket *so);

19 20
struct socket *solookup(struct socket **last, struct socket *head,
        struct sockaddr_storage *lhost, struct sockaddr_storage *fhost)
B
bellard 已提交
21
{
22
    struct socket *so = *last;
23

24
    /* Optimisation */
25 26
    if (so != head && sockaddr_equal(&(so->lhost.ss), lhost)
            && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
27 28
        return so;
    }
29

30
    for (so = head->so_next; so != head; so = so->so_next) {
31 32
        if (sockaddr_equal(&(so->lhost.ss), lhost)
                && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
33 34 35 36
            *last = so;
            return so;
        }
    }
37

38
    return (struct socket *)NULL;
B
bellard 已提交
39 40 41 42 43 44 45 46
}

/*
 * Create a new socket, initialise the fields
 * It is the responsibility of the caller to
 * insque() it into the correct linked-list
 */
struct socket *
47
socreate(Slirp *slirp)
B
bellard 已提交
48 49
{
  struct socket *so;
50

B
bellard 已提交
51 52 53 54 55
  so = (struct socket *)malloc(sizeof(struct socket));
  if(so) {
    memset(so, 0, sizeof(struct socket));
    so->so_state = SS_NOFDREF;
    so->s = -1;
56
    so->slirp = slirp;
57
    so->pollfds_idx = -1;
B
bellard 已提交
58 59 60 61 62 63 64 65
  }
  return(so);
}

/*
 * remque and free a socket, clobber cache
 */
void
66
sofree(struct socket *so)
B
bellard 已提交
67
{
68
  Slirp *slirp = so->slirp;
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
  struct mbuf *ifm;

  for (ifm = (struct mbuf *) slirp->if_fastq.qh_link;
       (struct quehead *) ifm != &slirp->if_fastq;
       ifm = ifm->ifq_next) {
    if (ifm->ifq_so == so) {
      ifm->ifq_so = NULL;
    }
  }

  for (ifm = (struct mbuf *) slirp->if_batchq.qh_link;
       (struct quehead *) ifm != &slirp->if_batchq;
       ifm = ifm->ifq_next) {
    if (ifm->ifq_so == so) {
      ifm->ifq_so = NULL;
    }
  }
86

B
bellard 已提交
87 88 89 90
  if (so->so_emu==EMU_RSH && so->extra) {
	sofree(so->extra);
	so->extra=NULL;
  }
91 92 93 94
  if (so == slirp->tcp_last_so) {
      slirp->tcp_last_so = &slirp->tcb;
  } else if (so == slirp->udp_last_so) {
      slirp->udp_last_so = &slirp->udb;
95 96
  } else if (so == slirp->icmp_last_so) {
      slirp->icmp_last_so = &slirp->icmp;
97
  }
B
bellard 已提交
98
  m_free(so->so_m);
99 100

  if(so->so_next && so->so_prev)
B
bellard 已提交
101 102 103 104 105
    remque(so);  /* crashes if so is not in a queue */

  free(so);
}

106
size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np)
B
bellard 已提交
107
{
108
	int n, lss, total;
B
bellard 已提交
109 110 111
	struct sbuf *sb = &so->so_snd;
	int len = sb->sb_datalen - sb->sb_cc;
	int mss = so->so_tcpcb->t_maxseg;
112

113
	DEBUG_CALL("sopreprbuf");
114
	DEBUG_ARG("so = %p", so);
115

116 117 118
	if (len <= 0)
		return 0;

B
bellard 已提交
119
	iov[0].iov_base = sb->sb_wptr;
120 121
        iov[1].iov_base = NULL;
        iov[1].iov_len = 0;
B
bellard 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
	if (sb->sb_wptr < sb->sb_rptr) {
		iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len)
		   iov[0].iov_len = len;
		if (iov[0].iov_len > mss)
		   iov[0].iov_len -= iov[0].iov_len%mss;
		n = 1;
	} else {
		iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		len -= iov[0].iov_len;
		if (len) {
			iov[1].iov_base = sb->sb_data;
			iov[1].iov_len = sb->sb_rptr - sb->sb_data;
			if(iov[1].iov_len > len)
			   iov[1].iov_len = len;
			total = iov[0].iov_len + iov[1].iov_len;
			if (total > mss) {
				lss = total%mss;
				if (iov[1].iov_len > lss) {
					iov[1].iov_len -= lss;
					n = 2;
				} else {
					lss -= iov[1].iov_len;
					iov[0].iov_len -= lss;
					n = 1;
				}
			} else
				n = 2;
		} else {
			if (iov[0].iov_len > mss)
			   iov[0].iov_len -= iov[0].iov_len%mss;
			n = 1;
		}
	}
159 160 161 162 163 164 165 166 167 168 169 170
	if (np)
		*np = n;

	return iov[0].iov_len + (n - 1) * iov[1].iov_len;
}

/*
 * Read from so's socket into sb_snd, updating all relevant sbuf fields
 * NOTE: This will only be called if it is select()ed for reading, so
 * a read() of 0 (or less) means it's disconnected
 */
int
171
soread(struct socket *so)
172 173 174 175 176 177
{
	int n, nn;
	struct sbuf *sb = &so->so_snd;
	struct iovec iov[2];

	DEBUG_CALL("soread");
178
	DEBUG_ARG("so = %p", so);
179 180 181 182 183 184

	/*
	 * No need to check if there's enough room to read.
	 * soread wouldn't have been called if there weren't
	 */
	sopreprbuf(so, iov, &n);
185

B
bellard 已提交
186 187 188 189
#ifdef HAVE_READV
	nn = readv(so->s, (struct iovec *)iov, n);
	DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
#else
B
Blue Swirl 已提交
190
	nn = qemu_recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
191
#endif
B
bellard 已提交
192 193 194 195
	if (nn <= 0) {
		if (nn < 0 && (errno == EINTR || errno == EAGAIN))
			return 0;
		else {
196 197 198 199 200 201 202 203 204
			int err;
			socklen_t slen = sizeof err;

			err = errno;
			if (nn == 0) {
				getsockopt(so->s, SOL_SOCKET, SO_ERROR,
					   &err, &slen);
			}

B
bellard 已提交
205 206
			DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno)));
			sofcantrcvmore(so);
207

208
			if (err == ECONNRESET || err == ECONNREFUSED
209 210 211 212 213
			    || err == ENOTCONN || err == EPIPE) {
				tcp_drop(sototcpcb(so), err);
			} else {
				tcp_sockclosed(sototcpcb(so));
			}
B
bellard 已提交
214 215 216
			return -1;
		}
	}
217

B
bellard 已提交
218 219 220 221 222 223 224 225
#ifndef HAVE_READV
	/*
	 * If there was no error, try and read the second time round
	 * We read again if n = 2 (ie, there's another part of the buffer)
	 * and we read as much as we could in the first read
	 * We don't test for <= 0 this time, because there legitimately
	 * might not be any more data (since the socket is non-blocking),
	 * a close will be detected on next iteration.
S
Stefan Weil 已提交
226
	 * A return of -1 won't (shouldn't) happen, since it didn't happen above
B
bellard 已提交
227
	 */
B
bellard 已提交
228 229
	if (n == 2 && nn == iov[0].iov_len) {
            int ret;
B
Blue Swirl 已提交
230
            ret = qemu_recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
B
bellard 已提交
231 232 233
            if (ret > 0)
                nn += ret;
        }
234

B
bellard 已提交
235 236
	DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
#endif
237

B
bellard 已提交
238 239 240 241 242 243 244
	/* Update fields */
	sb->sb_cc += nn;
	sb->sb_wptr += nn;
	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_wptr -= sb->sb_datalen;
	return nn;
}
245

246 247 248 249 250 251 252
int soreadbuf(struct socket *so, const char *buf, int size)
{
    int n, nn, copy = size;
	struct sbuf *sb = &so->so_snd;
	struct iovec iov[2];

	DEBUG_CALL("soreadbuf");
253
	DEBUG_ARG("so = %p", so);
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287

	/*
	 * No need to check if there's enough room to read.
	 * soread wouldn't have been called if there weren't
	 */
	if (sopreprbuf(so, iov, &n) < size)
        goto err;

    nn = MIN(iov[0].iov_len, copy);
    memcpy(iov[0].iov_base, buf, nn);

    copy -= nn;
    buf += nn;

    if (copy == 0)
        goto done;

    memcpy(iov[1].iov_base, buf, copy);

done:
    /* Update fields */
	sb->sb_cc += size;
	sb->sb_wptr += size;
	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_wptr -= sb->sb_datalen;
    return size;
err:

    sofcantrcvmore(so);
    tcp_sockclosed(sototcpcb(so));
    fprintf(stderr, "soreadbuf buffer to small");
    return -1;
}

B
bellard 已提交
288 289
/*
 * Get urgent data
290
 *
B
bellard 已提交
291 292 293 294
 * When the socket is created, we set it SO_OOBINLINE,
 * so when OOB data arrives, we soread() it and everything
 * in the send buffer is sent as urgent data
 */
295
int
296
sorecvoob(struct socket *so)
B
bellard 已提交
297 298
{
	struct tcpcb *tp = sototcpcb(so);
299
	int ret;
B
bellard 已提交
300 301

	DEBUG_CALL("sorecvoob");
302
	DEBUG_ARG("so = %p", so);
303

B
bellard 已提交
304 305 306 307 308
	/*
	 * We take a guess at how much urgent data has arrived.
	 * In most situations, when urgent data arrives, the next
	 * read() should get all the urgent data.  This guess will
	 * be wrong however if more data arrives just after the
309
	 * urgent data, or the read() doesn't return all the
B
bellard 已提交
310 311
	 * urgent data.
	 */
312 313 314 315 316 317 318 319 320
	ret = soread(so);
	if (ret > 0) {
	    tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
	    tp->t_force = 1;
	    tcp_output(tp);
	    tp->t_force = 0;
	}

	return ret;
B
bellard 已提交
321 322 323 324 325 326 327
}

/*
 * Send urgent data
 * There's a lot duplicated code here, but...
 */
int
328
sosendoob(struct socket *so)
B
bellard 已提交
329 330 331
{
	struct sbuf *sb = &so->so_rcv;
	char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
332

B
bellard 已提交
333
	int n, len;
334

B
bellard 已提交
335
	DEBUG_CALL("sosendoob");
336
	DEBUG_ARG("so = %p", so);
B
bellard 已提交
337
	DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
338

B
bellard 已提交
339 340
	if (so->so_urgc > 2048)
	   so->so_urgc = 2048; /* XXXX */
341

B
bellard 已提交
342 343
	if (sb->sb_rptr < sb->sb_wptr) {
		/* We can send it directly */
344
		n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
B
bellard 已提交
345
		so->so_urgc -= n;
346

B
bellard 已提交
347 348
		DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
	} else {
349
		/*
B
bellard 已提交
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
		 * Since there's no sendv or sendtov like writev,
		 * we must copy all data to a linear buffer then
		 * send it all
		 */
		len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
		if (len > so->so_urgc) len = so->so_urgc;
		memcpy(buff, sb->sb_rptr, len);
		so->so_urgc -= len;
		if (so->so_urgc) {
			n = sb->sb_wptr - sb->sb_data;
			if (n > so->so_urgc) n = so->so_urgc;
			memcpy((buff + len), sb->sb_data, n);
			so->so_urgc -= n;
			len += n;
		}
365
		n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
B
bellard 已提交
366 367 368
#ifdef DEBUG
		if (n != len)
		   DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
369
#endif
B
bellard 已提交
370 371
		DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
	}
372

B
bellard 已提交
373 374 375 376
	sb->sb_cc -= n;
	sb->sb_rptr += n;
	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_rptr -= sb->sb_datalen;
377

B
bellard 已提交
378 379 380 381
	return n;
}

/*
382
 * Write data from so_rcv to so's socket,
B
bellard 已提交
383 384 385
 * updating all sbuf field as necessary
 */
int
386
sowrite(struct socket *so)
B
bellard 已提交
387 388 389 390 391
{
	int  n,nn;
	struct sbuf *sb = &so->so_rcv;
	int len = sb->sb_cc;
	struct iovec iov[2];
392

B
bellard 已提交
393
	DEBUG_CALL("sowrite");
394
	DEBUG_ARG("so = %p", so);
395

B
bellard 已提交
396 397 398 399 400 401 402 403 404 405
	if (so->so_urgc) {
		sosendoob(so);
		if (sb->sb_cc == 0)
			return 0;
	}

	/*
	 * No need to check if there's something to write,
	 * sowrite wouldn't have been called otherwise
	 */
406

B
bellard 已提交
407
	iov[0].iov_base = sb->sb_rptr;
408 409
        iov[1].iov_base = NULL;
        iov[1].iov_len = 0;
B
bellard 已提交
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
	if (sb->sb_rptr < sb->sb_wptr) {
		iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		n = 1;
	} else {
		iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		len -= iov[0].iov_len;
		if (len) {
			iov[1].iov_base = sb->sb_data;
			iov[1].iov_len = sb->sb_wptr - sb->sb_data;
			if (iov[1].iov_len > len) iov[1].iov_len = len;
			n = 2;
		} else
			n = 1;
	}
	/* Check if there's urgent data to send, and if so, send it */

#ifdef HAVE_READV
	nn = writev(so->s, (const struct iovec *)iov, n);
431

B
bellard 已提交
432 433
	DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
#else
434
	nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0);
B
bellard 已提交
435 436 437 438
#endif
	/* This should never happen, but people tell me it does *shrug* */
	if (nn < 0 && (errno == EAGAIN || errno == EINTR))
		return 0;
439

B
bellard 已提交
440 441 442 443 444 445 446
	if (nn <= 0) {
		DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
			so->so_state, errno));
		sofcantsendmore(so);
		tcp_sockclosed(sototcpcb(so));
		return -1;
	}
447

B
bellard 已提交
448
#ifndef HAVE_READV
B
bellard 已提交
449 450
	if (n == 2 && nn == iov[0].iov_len) {
            int ret;
451
            ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0);
B
bellard 已提交
452 453 454
            if (ret > 0)
                nn += ret;
        }
B
bellard 已提交
455 456
        DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
#endif
457

B
bellard 已提交
458 459 460 461 462
	/* Update sbuf */
	sb->sb_cc -= nn;
	sb->sb_rptr += nn;
	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_rptr -= sb->sb_datalen;
463

B
bellard 已提交
464 465 466 467 468 469
	/*
	 * If in DRAIN mode, and there's no more data, set
	 * it CANTSENDMORE
	 */
	if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
		sofcantsendmore(so);
470

B
bellard 已提交
471 472 473 474 475 476 477
	return nn;
}

/*
 * recvfrom() a UDP socket
 */
void
478
sorecvfrom(struct socket *so)
B
bellard 已提交
479
{
480
	struct sockaddr_storage addr;
481
	struct sockaddr_storage saddr, daddr;
482
	socklen_t addrlen = sizeof(struct sockaddr_storage);
483

B
bellard 已提交
484
	DEBUG_CALL("sorecvfrom");
485
	DEBUG_ARG("so = %p", so);
486

B
bellard 已提交
487 488 489
	if (so->so_type == IPPROTO_ICMP) {   /* This is a "ping" reply */
	  char buff[256];
	  int len;
490

491
	  len = recvfrom(so->s, buff, 256, 0,
B
bellard 已提交
492 493
			 (struct sockaddr *)&addr, &addrlen);
	  /* XXX Check if reply is "correct"? */
494

B
bellard 已提交
495 496 497 498 499
	  if(len == -1 || len == 0) {
	    u_char code=ICMP_UNREACH_PORT;

	    if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
	    else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET;
500

B
bellard 已提交
501 502
	    DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
			errno,strerror(errno)));
Y
Yann Bordenave 已提交
503
	    icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
B
bellard 已提交
504 505
	  } else {
	    icmp_reflect(so->so_m);
506
            so->so_m = NULL; /* Don't m_free() it again! */
B
bellard 已提交
507 508 509 510 511
	  }
	  /* No need for this socket anymore, udp_detach it */
	  udp_detach(so);
	} else {                            	/* A "normal" UDP packet */
	  struct mbuf *m;
B
Blue Swirl 已提交
512 513 514 515 516 517
          int len;
#ifdef _WIN32
          unsigned long n;
#else
          int n;
#endif
B
bellard 已提交
518

519 520 521 522
	  m = m_get(so->slirp);
	  if (!m) {
	      return;
	  }
523 524 525 526 527 528 529 530 531 532 533 534
	  switch (so->so_ffamily) {
	  case AF_INET:
	      m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr);
	      break;
	  case AF_INET6:
	      m->m_data += IF_MAXLINKHDR + sizeof(struct ip6)
	                                 + sizeof(struct udphdr);
	      break;
	  default:
	      g_assert_not_reached();
	      break;
	  }
535

536
	  /*
B
bellard 已提交
537 538 539 540 541
	   * XXX Shouldn't FIONREAD packets destined for port 53,
	   * but I don't know the max packet size for DNS lookups
	   */
	  len = M_FREEROOM(m);
	  /* if (so->so_fport != htons(53)) { */
B
bellard 已提交
542
	  ioctlsocket(so->s, FIONREAD, &n);
543

B
bellard 已提交
544 545 546 547 548 549
	  if (n > len) {
	    n = (m->m_data - m->m_dat) + m->m_len + n + 1;
	    m_inc(m, n);
	    len = M_FREEROOM(m);
	  }
	  /* } */
550

B
bellard 已提交
551 552
	  m->m_len = recvfrom(so->s, m->m_data, len, 0,
			      (struct sockaddr *)&addr, &addrlen);
553
	  DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
B
bellard 已提交
554 555
		      m->m_len, errno,strerror(errno)));
	  if(m->m_len<0) {
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
	    /* Report error as ICMP */
	    switch (so->so_lfamily) {
	    uint8_t code;
	    case AF_INET:
	      code = ICMP_UNREACH_PORT;

	      if (errno == EHOSTUNREACH) {
		code = ICMP_UNREACH_HOST;
	      } else if (errno == ENETUNREACH) {
		code = ICMP_UNREACH_NET;
	      }

	      DEBUG_MISC((dfd, " rx error, tx icmp ICMP_UNREACH:%i\n", code));
	      icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
	      break;
	    case AF_INET6:
	      code = ICMP6_UNREACH_PORT;

	      if (errno == EHOSTUNREACH) {
		code = ICMP6_UNREACH_ADDRESS;
	      } else if (errno == ENETUNREACH) {
		code = ICMP6_UNREACH_NO_ROUTE;
	      }

	      DEBUG_MISC((dfd, " rx error, tx icmp6 ICMP_UNREACH:%i\n", code));
	      icmp6_send_error(so->so_m, ICMP6_UNREACH, code);
	      break;
	    default:
	      g_assert_not_reached();
	      break;
	    }
B
bellard 已提交
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
	    m_free(m);
	  } else {
	  /*
	   * Hack: domain name lookup will be used the most for UDP,
	   * and since they'll only be used once there's no need
	   * for the 4 minute (or whatever) timeout... So we time them
	   * out much quicker (10 seconds  for now...)
	   */
	    if (so->so_expire) {
	      if (so->so_fport == htons(53))
		so->so_expire = curtime + SO_EXPIREFAST;
	      else
		so->so_expire = curtime + SO_EXPIRE;
	    }

602
	    /*
B
bellard 已提交
603
	     * If this packet was destined for CTL_ADDR,
604
	     * make it look like that's where it came from
B
bellard 已提交
605
	     */
606 607 608 609
	    saddr = addr;
	    sotranslate_in(so, &saddr);
	    daddr = so->lhost.ss;

610 611
	    switch (so->so_ffamily) {
	    case AF_INET:
612 613 614
	        udp_output(so, m, (struct sockaddr_in *) &saddr,
	                   (struct sockaddr_in *) &daddr,
	                   so->so_iptos);
615
	        break;
616 617 618 619
	    case AF_INET6:
	        udp6_output(so, m, (struct sockaddr_in6 *) &saddr,
	                    (struct sockaddr_in6 *) &daddr);
	        break;
620
	    default:
621
	        g_assert_not_reached();
622 623
	        break;
	    }
B
bellard 已提交
624 625 626 627 628 629 630 631
	  } /* rx error */
	} /* if ping packet */
}

/*
 * sendto() a socket
 */
int
632
sosendto(struct socket *so, struct mbuf *m)
B
bellard 已提交
633 634
{
	int ret;
635
	struct sockaddr_storage addr;
B
bellard 已提交
636 637

	DEBUG_CALL("sosendto");
638 639
	DEBUG_ARG("so = %p", so);
	DEBUG_ARG("m = %p", m);
640

641 642 643
	addr = so->fhost.ss;
	DEBUG_CALL(" sendto()ing)");
	sotranslate_out(so, &addr);
644

B
bellard 已提交
645 646
	/* Don't care what port we get */
	ret = sendto(so->s, m->m_data, m->m_len, 0,
647
		     (struct sockaddr *)&addr, sockaddr_size(&addr));
B
bellard 已提交
648 649
	if (ret < 0)
		return -1;
650

B
bellard 已提交
651 652 653 654 655 656
	/*
	 * Kill the socket if there's no reply in 4 minutes,
	 * but only if it's an expirable socket
	 */
	if (so->so_expire)
		so->so_expire = curtime + SO_EXPIRE;
657 658
	so->so_state &= SS_PERSISTENT_MASK;
	so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */
B
bellard 已提交
659 660 661 662
	return 0;
}

/*
663
 * Listen for incoming TCP connections
B
bellard 已提交
664 665
 */
struct socket *
666
tcp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr,
667
           u_int lport, int flags)
B
bellard 已提交
668 669 670
{
	struct sockaddr_in addr;
	struct socket *so;
671 672
	int s, opt = 1;
	socklen_t addrlen = sizeof(addr);
673
	memset(&addr, 0, addrlen);
B
bellard 已提交
674

675
	DEBUG_CALL("tcp_listen");
676 677
	DEBUG_ARG("haddr = %x", haddr);
	DEBUG_ARG("hport = %d", hport);
B
bellard 已提交
678 679 680
	DEBUG_ARG("laddr = %x", laddr);
	DEBUG_ARG("lport = %d", lport);
	DEBUG_ARG("flags = %x", flags);
681

682 683
	so = socreate(slirp);
	if (!so) {
B
bellard 已提交
684 685
	  return NULL;
	}
686

B
bellard 已提交
687 688 689 690 691
	/* Don't tcp_attach... we don't need so_snd nor so_rcv */
	if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) {
		free(so);
		return NULL;
	}
692
	insque(so, &slirp->tcb);
693 694

	/*
B
bellard 已提交
695 696 697 698
	 * SS_FACCEPTONCE sockets must time out.
	 */
	if (flags & SS_FACCEPTONCE)
	   so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
699

700 701
	so->so_state &= SS_PERSISTENT_MASK;
	so->so_state |= (SS_FACCEPTCONN | flags);
702
	so->so_lfamily = AF_INET;
B
bellard 已提交
703 704
	so->so_lport = lport; /* Kept in network format */
	so->so_laddr.s_addr = laddr; /* Ditto */
705

B
bellard 已提交
706
	addr.sin_family = AF_INET;
707 708
	addr.sin_addr.s_addr = haddr;
	addr.sin_port = hport;
709

K
Kevin Wolf 已提交
710
	if (((s = qemu_socket(AF_INET,SOCK_STREAM,0)) < 0) ||
711
	    (socket_set_fast_reuse(s) < 0) ||
B
bellard 已提交
712 713 714
	    (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) ||
	    (listen(s,1) < 0)) {
		int tmperrno = errno; /* Don't clobber the real reason we failed */
715

B
bellard 已提交
716 717 718
		close(s);
		sofree(so);
		/* Restore the real errno */
B
bellard 已提交
719 720 721
#ifdef _WIN32
		WSASetLastError(tmperrno);
#else
B
bellard 已提交
722
		errno = tmperrno;
B
bellard 已提交
723
#endif
B
bellard 已提交
724 725
		return NULL;
	}
726
	qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int));
727

B
bellard 已提交
728
	getsockname(s,(struct sockaddr *)&addr,&addrlen);
729
	so->so_ffamily = AF_INET;
B
bellard 已提交
730 731
	so->so_fport = addr.sin_port;
	if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
732
	   so->so_faddr = slirp->vhost_addr;
B
bellard 已提交
733 734 735 736 737 738 739 740 741 742 743 744 745 746
	else
	   so->so_faddr = addr.sin_addr;

	so->s = s;
	return so;
}

/*
 * Various session state calls
 * XXX Should be #define's
 * The socket state stuff needs work, these often get call 2 or 3
 * times each when only 1 was needed
 */
void
747
soisfconnecting(struct socket *so)
B
bellard 已提交
748 749 750 751 752 753 754
{
	so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
			  SS_FCANTSENDMORE|SS_FWDRAIN);
	so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
}

void
755
soisfconnected(struct socket *so)
B
bellard 已提交
756 757 758 759 760
{
	so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
	so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
}

761 762
static void
sofcantrcvmore(struct socket *so)
B
bellard 已提交
763 764 765 766 767
{
	if ((so->so_state & SS_NOFDREF) == 0) {
		shutdown(so->s,0);
	}
	so->so_state &= ~(SS_ISFCONNECTING);
768 769 770 771
	if (so->so_state & SS_FCANTSENDMORE) {
	   so->so_state &= SS_PERSISTENT_MASK;
	   so->so_state |= SS_NOFDREF; /* Don't select it */
	} else {
B
bellard 已提交
772
	   so->so_state |= SS_FCANTRCVMORE;
773
	}
B
bellard 已提交
774 775
}

776 777
static void
sofcantsendmore(struct socket *so)
B
bellard 已提交
778 779
{
	if ((so->so_state & SS_NOFDREF) == 0) {
B
bellard 已提交
780
            shutdown(so->s,1);           /* send FIN to fhost */
B
bellard 已提交
781 782
	}
	so->so_state &= ~(SS_ISFCONNECTING);
783 784 785 786
	if (so->so_state & SS_FCANTRCVMORE) {
	   so->so_state &= SS_PERSISTENT_MASK;
	   so->so_state |= SS_NOFDREF; /* as above */
	} else {
B
bellard 已提交
787
	   so->so_state |= SS_FCANTSENDMORE;
788
	}
B
bellard 已提交
789 790 791 792 793 794 795
}

/*
 * Set write drain mode
 * Set CANTSENDMORE once all data has been write()n
 */
void
796
sofwdrain(struct socket *so)
B
bellard 已提交
797 798 799 800 801 802
{
	if (so->so_rcv.sb_cc)
		so->so_state |= SS_FWDRAIN;
	else
		sofcantsendmore(so);
}
803 804 805 806 807 808 809 810

/*
 * Translate addr in host addr when it is a virtual address
 */
void sotranslate_out(struct socket *so, struct sockaddr_storage *addr)
{
    Slirp *slirp = so->slirp;
    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
811
    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831

    switch (addr->ss_family) {
    case AF_INET:
        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
                slirp->vnetwork_addr.s_addr) {
            /* It's an alias */
            if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) {
                if (get_dns_addr(&sin->sin_addr) < 0) {
                    sin->sin_addr = loopback_addr;
                }
            } else {
                sin->sin_addr = loopback_addr;
            }
        }

        DEBUG_MISC((dfd, " addr.sin_port=%d, "
            "addr.sin_addr.s_addr=%.16s\n",
            ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)));
        break;

832 833 834 835
    case AF_INET6:
        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
                    slirp->vprefix_len)) {
            if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) {
836 837 838 839
                uint32_t scope_id;
                if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) {
                    sin6->sin6_scope_id = scope_id;
                } else {
840
                    sin6->sin6_addr = in6addr_loopback;
S
Samuel Thibault 已提交
841
                }
842 843 844 845 846 847
            } else {
                sin6->sin6_addr = in6addr_loopback;
            }
        }
        break;

848 849 850 851 852 853 854 855 856
    default:
        break;
    }
}

void sotranslate_in(struct socket *so, struct sockaddr_storage *addr)
{
    Slirp *slirp = so->slirp;
    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
857
    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873

    switch (addr->ss_family) {
    case AF_INET:
        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
            slirp->vnetwork_addr.s_addr) {
            uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr;

            if ((so->so_faddr.s_addr & inv_mask) == inv_mask) {
                sin->sin_addr = slirp->vhost_addr;
            } else if (sin->sin_addr.s_addr == loopback_addr.s_addr ||
                       so->so_faddr.s_addr != slirp->vhost_addr.s_addr) {
                sin->sin_addr = so->so_faddr;
            }
        }
        break;

874 875 876 877 878 879 880 881 882 883
    case AF_INET6:
        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
                    slirp->vprefix_len)) {
            if (in6_equal(&sin6->sin6_addr, &in6addr_loopback)
                    || !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) {
                sin6->sin6_addr = so->so_faddr6;
            }
        }
        break;

884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
    default:
        break;
    }
}

/*
 * Translate connections from localhost to the real hostname
 */
void sotranslate_accept(struct socket *so)
{
    Slirp *slirp = so->slirp;

    switch (so->so_ffamily) {
    case AF_INET:
        if (so->so_faddr.s_addr == INADDR_ANY ||
            (so->so_faddr.s_addr & loopback_mask) ==
            (loopback_addr.s_addr & loopback_mask)) {
           so->so_faddr = slirp->vhost_addr;
        }
        break;

905 906 907 908 909 910 911
   case AF_INET6:
        if (in6_equal(&so->so_faddr6, &in6addr_any) ||
                in6_equal(&so->so_faddr6, &in6addr_loopback)) {
           so->so_faddr6 = slirp->vhost_addr6;
        }
        break;

912 913 914 915
    default:
        break;
    }
}