socket.c 22.8 KB
Newer Older
B
bellard 已提交
1 2
/*
 * Copyright (c) 1995 Danny Gasparovski.
3 4
 *
 * Please read the file COPYRIGHT for the
B
bellard 已提交
5 6 7
 * terms and conditions of the copyright.
 */

P
Peter Maydell 已提交
8
#include "qemu/osdep.h"
9
#include "qemu-common.h"
10
#include "slirp.h"
B
bellard 已提交
11
#include "ip_icmp.h"
B
bellard 已提交
12 13 14
#ifdef __sun__
#include <sys/filio.h>
#endif
B
bellard 已提交
15

16 17 18
static void sofcantrcvmore(struct socket *so);
static void sofcantsendmore(struct socket *so);

19 20
struct socket *solookup(struct socket **last, struct socket *head,
        struct sockaddr_storage *lhost, struct sockaddr_storage *fhost)
B
bellard 已提交
21
{
22
    struct socket *so = *last;
23

24
    /* Optimisation */
25 26
    if (so != head && sockaddr_equal(&(so->lhost.ss), lhost)
            && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
27 28
        return so;
    }
29

30
    for (so = head->so_next; so != head; so = so->so_next) {
31 32
        if (sockaddr_equal(&(so->lhost.ss), lhost)
                && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) {
33 34 35 36
            *last = so;
            return so;
        }
    }
37

38
    return (struct socket *)NULL;
B
bellard 已提交
39 40 41 42 43 44 45 46
}

/*
 * Create a new socket, initialise the fields
 * It is the responsibility of the caller to
 * insque() it into the correct linked-list
 */
struct socket *
47
socreate(Slirp *slirp)
B
bellard 已提交
48 49
{
  struct socket *so;
50

B
bellard 已提交
51 52 53 54 55
  so = (struct socket *)malloc(sizeof(struct socket));
  if(so) {
    memset(so, 0, sizeof(struct socket));
    so->so_state = SS_NOFDREF;
    so->s = -1;
56
    so->slirp = slirp;
57
    so->pollfds_idx = -1;
B
bellard 已提交
58 59 60 61 62 63 64 65
  }
  return(so);
}

/*
 * remque and free a socket, clobber cache
 */
void
66
sofree(struct socket *so)
B
bellard 已提交
67
{
68
  Slirp *slirp = so->slirp;
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
  struct mbuf *ifm;

  for (ifm = (struct mbuf *) slirp->if_fastq.qh_link;
       (struct quehead *) ifm != &slirp->if_fastq;
       ifm = ifm->ifq_next) {
    if (ifm->ifq_so == so) {
      ifm->ifq_so = NULL;
    }
  }

  for (ifm = (struct mbuf *) slirp->if_batchq.qh_link;
       (struct quehead *) ifm != &slirp->if_batchq;
       ifm = ifm->ifq_next) {
    if (ifm->ifq_so == so) {
      ifm->ifq_so = NULL;
    }
  }
86

B
bellard 已提交
87 88 89 90
  if (so->so_emu==EMU_RSH && so->extra) {
	sofree(so->extra);
	so->extra=NULL;
  }
91 92 93 94
  if (so == slirp->tcp_last_so) {
      slirp->tcp_last_so = &slirp->tcb;
  } else if (so == slirp->udp_last_so) {
      slirp->udp_last_so = &slirp->udb;
95 96
  } else if (so == slirp->icmp_last_so) {
      slirp->icmp_last_so = &slirp->icmp;
97
  }
B
bellard 已提交
98
  m_free(so->so_m);
99 100

  if(so->so_next && so->so_prev)
B
bellard 已提交
101 102
    remque(so);  /* crashes if so is not in a queue */

M
Marc-André Lureau 已提交
103 104 105
  if (so->so_tcpcb) {
      free(so->so_tcpcb);
  }
B
bellard 已提交
106 107 108
  free(so);
}

109
size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np)
B
bellard 已提交
110
{
111
	int n, lss, total;
B
bellard 已提交
112 113 114
	struct sbuf *sb = &so->so_snd;
	int len = sb->sb_datalen - sb->sb_cc;
	int mss = so->so_tcpcb->t_maxseg;
115

116
	DEBUG_CALL("sopreprbuf");
117
	DEBUG_ARG("so = %p", so);
118

119 120 121
	if (len <= 0)
		return 0;

B
bellard 已提交
122
	iov[0].iov_base = sb->sb_wptr;
123 124
        iov[1].iov_base = NULL;
        iov[1].iov_len = 0;
B
bellard 已提交
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
	if (sb->sb_wptr < sb->sb_rptr) {
		iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len)
		   iov[0].iov_len = len;
		if (iov[0].iov_len > mss)
		   iov[0].iov_len -= iov[0].iov_len%mss;
		n = 1;
	} else {
		iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		len -= iov[0].iov_len;
		if (len) {
			iov[1].iov_base = sb->sb_data;
			iov[1].iov_len = sb->sb_rptr - sb->sb_data;
			if(iov[1].iov_len > len)
			   iov[1].iov_len = len;
			total = iov[0].iov_len + iov[1].iov_len;
			if (total > mss) {
				lss = total%mss;
				if (iov[1].iov_len > lss) {
					iov[1].iov_len -= lss;
					n = 2;
				} else {
					lss -= iov[1].iov_len;
					iov[0].iov_len -= lss;
					n = 1;
				}
			} else
				n = 2;
		} else {
			if (iov[0].iov_len > mss)
			   iov[0].iov_len -= iov[0].iov_len%mss;
			n = 1;
		}
	}
162 163 164 165 166 167 168 169 170 171 172 173
	if (np)
		*np = n;

	return iov[0].iov_len + (n - 1) * iov[1].iov_len;
}

/*
 * Read from so's socket into sb_snd, updating all relevant sbuf fields
 * NOTE: This will only be called if it is select()ed for reading, so
 * a read() of 0 (or less) means it's disconnected
 */
int
174
soread(struct socket *so)
175 176 177 178 179 180
{
	int n, nn;
	struct sbuf *sb = &so->so_snd;
	struct iovec iov[2];

	DEBUG_CALL("soread");
181
	DEBUG_ARG("so = %p", so);
182 183 184 185 186 187

	/*
	 * No need to check if there's enough room to read.
	 * soread wouldn't have been called if there weren't
	 */
	sopreprbuf(so, iov, &n);
188

B
bellard 已提交
189 190 191 192
#ifdef HAVE_READV
	nn = readv(so->s, (struct iovec *)iov, n);
	DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
#else
B
Blue Swirl 已提交
193
	nn = qemu_recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
194
#endif
B
bellard 已提交
195 196 197 198
	if (nn <= 0) {
		if (nn < 0 && (errno == EINTR || errno == EAGAIN))
			return 0;
		else {
199 200 201 202 203 204 205 206 207
			int err;
			socklen_t slen = sizeof err;

			err = errno;
			if (nn == 0) {
				getsockopt(so->s, SOL_SOCKET, SO_ERROR,
					   &err, &slen);
			}

B
bellard 已提交
208 209
			DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno)));
			sofcantrcvmore(so);
210

211
			if (err == ECONNRESET || err == ECONNREFUSED
212 213 214 215 216
			    || err == ENOTCONN || err == EPIPE) {
				tcp_drop(sototcpcb(so), err);
			} else {
				tcp_sockclosed(sototcpcb(so));
			}
B
bellard 已提交
217 218 219
			return -1;
		}
	}
220

B
bellard 已提交
221 222 223 224 225 226 227 228
#ifndef HAVE_READV
	/*
	 * If there was no error, try and read the second time round
	 * We read again if n = 2 (ie, there's another part of the buffer)
	 * and we read as much as we could in the first read
	 * We don't test for <= 0 this time, because there legitimately
	 * might not be any more data (since the socket is non-blocking),
	 * a close will be detected on next iteration.
S
Stefan Weil 已提交
229
	 * A return of -1 won't (shouldn't) happen, since it didn't happen above
B
bellard 已提交
230
	 */
B
bellard 已提交
231 232
	if (n == 2 && nn == iov[0].iov_len) {
            int ret;
B
Blue Swirl 已提交
233
            ret = qemu_recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
B
bellard 已提交
234 235 236
            if (ret > 0)
                nn += ret;
        }
237

B
bellard 已提交
238 239
	DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
#endif
240

B
bellard 已提交
241 242 243 244 245 246 247
	/* Update fields */
	sb->sb_cc += nn;
	sb->sb_wptr += nn;
	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_wptr -= sb->sb_datalen;
	return nn;
}
248

249 250 251 252 253 254 255
int soreadbuf(struct socket *so, const char *buf, int size)
{
    int n, nn, copy = size;
	struct sbuf *sb = &so->so_snd;
	struct iovec iov[2];

	DEBUG_CALL("soreadbuf");
256
	DEBUG_ARG("so = %p", so);
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

	/*
	 * No need to check if there's enough room to read.
	 * soread wouldn't have been called if there weren't
	 */
	if (sopreprbuf(so, iov, &n) < size)
        goto err;

    nn = MIN(iov[0].iov_len, copy);
    memcpy(iov[0].iov_base, buf, nn);

    copy -= nn;
    buf += nn;

    if (copy == 0)
        goto done;

    memcpy(iov[1].iov_base, buf, copy);

done:
    /* Update fields */
	sb->sb_cc += size;
	sb->sb_wptr += size;
	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_wptr -= sb->sb_datalen;
    return size;
err:

    sofcantrcvmore(so);
    tcp_sockclosed(sototcpcb(so));
    fprintf(stderr, "soreadbuf buffer to small");
    return -1;
}

B
bellard 已提交
291 292
/*
 * Get urgent data
293
 *
B
bellard 已提交
294 295 296 297
 * When the socket is created, we set it SO_OOBINLINE,
 * so when OOB data arrives, we soread() it and everything
 * in the send buffer is sent as urgent data
 */
298
int
299
sorecvoob(struct socket *so)
B
bellard 已提交
300 301
{
	struct tcpcb *tp = sototcpcb(so);
302
	int ret;
B
bellard 已提交
303 304

	DEBUG_CALL("sorecvoob");
305
	DEBUG_ARG("so = %p", so);
306

B
bellard 已提交
307 308 309 310 311
	/*
	 * We take a guess at how much urgent data has arrived.
	 * In most situations, when urgent data arrives, the next
	 * read() should get all the urgent data.  This guess will
	 * be wrong however if more data arrives just after the
312
	 * urgent data, or the read() doesn't return all the
B
bellard 已提交
313 314
	 * urgent data.
	 */
315 316 317 318 319 320 321 322 323
	ret = soread(so);
	if (ret > 0) {
	    tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
	    tp->t_force = 1;
	    tcp_output(tp);
	    tp->t_force = 0;
	}

	return ret;
B
bellard 已提交
324 325 326 327 328 329 330
}

/*
 * Send urgent data
 * There's a lot duplicated code here, but...
 */
int
331
sosendoob(struct socket *so)
B
bellard 已提交
332 333 334
{
	struct sbuf *sb = &so->so_rcv;
	char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
335

B
bellard 已提交
336
	int n, len;
337

B
bellard 已提交
338
	DEBUG_CALL("sosendoob");
339
	DEBUG_ARG("so = %p", so);
B
bellard 已提交
340
	DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
341

B
bellard 已提交
342 343
	if (so->so_urgc > 2048)
	   so->so_urgc = 2048; /* XXXX */
344

B
bellard 已提交
345 346
	if (sb->sb_rptr < sb->sb_wptr) {
		/* We can send it directly */
347
		n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
B
bellard 已提交
348
	} else {
349
		/*
B
bellard 已提交
350 351 352 353
		 * Since there's no sendv or sendtov like writev,
		 * we must copy all data to a linear buffer then
		 * send it all
		 */
354
		uint32_t urgc = so->so_urgc;
B
bellard 已提交
355
		len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
356 357 358
		if (len > urgc) {
			len = urgc;
		}
B
bellard 已提交
359
		memcpy(buff, sb->sb_rptr, len);
360 361
		urgc -= len;
		if (urgc) {
B
bellard 已提交
362
			n = sb->sb_wptr - sb->sb_data;
363 364 365
			if (n > urgc) {
				n = urgc;
			}
B
bellard 已提交
366 367 368
			memcpy((buff + len), sb->sb_data, n);
			len += n;
		}
369
		n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
370 371
	}

B
bellard 已提交
372
#ifdef DEBUG
373 374 375
	if (n != len) {
		DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
	}
376
#endif
377 378
	if (n < 0) {
		return n;
B
bellard 已提交
379
	}
380 381
	so->so_urgc -= n;
	DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
382

B
bellard 已提交
383 384 385 386
	sb->sb_cc -= n;
	sb->sb_rptr += n;
	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_rptr -= sb->sb_datalen;
387

B
bellard 已提交
388 389 390 391
	return n;
}

/*
392
 * Write data from so_rcv to so's socket,
B
bellard 已提交
393 394 395
 * updating all sbuf field as necessary
 */
int
396
sowrite(struct socket *so)
B
bellard 已提交
397 398 399 400 401
{
	int  n,nn;
	struct sbuf *sb = &so->so_rcv;
	int len = sb->sb_cc;
	struct iovec iov[2];
402

B
bellard 已提交
403
	DEBUG_CALL("sowrite");
404
	DEBUG_ARG("so = %p", so);
405

B
bellard 已提交
406
	if (so->so_urgc) {
407 408 409 410 411 412 413 414 415
		uint32_t expected = so->so_urgc;
		if (sosendoob(so) < expected) {
			/* Treat a short write as a fatal error too,
			 * rather than continuing on and sending the urgent
			 * data as if it were non-urgent and leaving the
			 * so_urgc count wrong.
			 */
			goto err_disconnected;
		}
B
bellard 已提交
416 417 418 419 420 421 422 423
		if (sb->sb_cc == 0)
			return 0;
	}

	/*
	 * No need to check if there's something to write,
	 * sowrite wouldn't have been called otherwise
	 */
424

B
bellard 已提交
425
	iov[0].iov_base = sb->sb_rptr;
426 427
        iov[1].iov_base = NULL;
        iov[1].iov_len = 0;
B
bellard 已提交
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
	if (sb->sb_rptr < sb->sb_wptr) {
		iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
		/* Should never succeed, but... */
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		n = 1;
	} else {
		iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
		if (iov[0].iov_len > len) iov[0].iov_len = len;
		len -= iov[0].iov_len;
		if (len) {
			iov[1].iov_base = sb->sb_data;
			iov[1].iov_len = sb->sb_wptr - sb->sb_data;
			if (iov[1].iov_len > len) iov[1].iov_len = len;
			n = 2;
		} else
			n = 1;
	}
	/* Check if there's urgent data to send, and if so, send it */

#ifdef HAVE_READV
	nn = writev(so->s, (const struct iovec *)iov, n);
449

B
bellard 已提交
450 451
	DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
#else
452
	nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0);
B
bellard 已提交
453 454 455 456
#endif
	/* This should never happen, but people tell me it does *shrug* */
	if (nn < 0 && (errno == EAGAIN || errno == EINTR))
		return 0;
457

B
bellard 已提交
458
	if (nn <= 0) {
459
		goto err_disconnected;
B
bellard 已提交
460
	}
461

B
bellard 已提交
462
#ifndef HAVE_READV
B
bellard 已提交
463 464
	if (n == 2 && nn == iov[0].iov_len) {
            int ret;
465
            ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0);
B
bellard 已提交
466 467 468
            if (ret > 0)
                nn += ret;
        }
B
bellard 已提交
469 470
        DEBUG_MISC((dfd, "  ... wrote nn = %d bytes\n", nn));
#endif
471

B
bellard 已提交
472 473 474 475 476
	/* Update sbuf */
	sb->sb_cc -= nn;
	sb->sb_rptr += nn;
	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
		sb->sb_rptr -= sb->sb_datalen;
477

B
bellard 已提交
478 479 480 481 482 483
	/*
	 * If in DRAIN mode, and there's no more data, set
	 * it CANTSENDMORE
	 */
	if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
		sofcantsendmore(so);
484

B
bellard 已提交
485
	return nn;
486 487 488 489 490 491 492

err_disconnected:
	DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
		    so->so_state, errno));
	sofcantsendmore(so);
	tcp_sockclosed(sototcpcb(so));
	return -1;
B
bellard 已提交
493 494 495 496 497 498
}

/*
 * recvfrom() a UDP socket
 */
void
499
sorecvfrom(struct socket *so)
B
bellard 已提交
500
{
501
	struct sockaddr_storage addr;
502
	struct sockaddr_storage saddr, daddr;
503
	socklen_t addrlen = sizeof(struct sockaddr_storage);
504

B
bellard 已提交
505
	DEBUG_CALL("sorecvfrom");
506
	DEBUG_ARG("so = %p", so);
507

B
bellard 已提交
508 509 510
	if (so->so_type == IPPROTO_ICMP) {   /* This is a "ping" reply */
	  char buff[256];
	  int len;
511

512
	  len = recvfrom(so->s, buff, 256, 0,
B
bellard 已提交
513 514
			 (struct sockaddr *)&addr, &addrlen);
	  /* XXX Check if reply is "correct"? */
515

B
bellard 已提交
516 517 518 519 520
	  if(len == -1 || len == 0) {
	    u_char code=ICMP_UNREACH_PORT;

	    if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
	    else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET;
521

B
bellard 已提交
522 523
	    DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
			errno,strerror(errno)));
Y
Yann Bordenave 已提交
524
	    icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
B
bellard 已提交
525 526
	  } else {
	    icmp_reflect(so->so_m);
527
            so->so_m = NULL; /* Don't m_free() it again! */
B
bellard 已提交
528 529 530 531 532
	  }
	  /* No need for this socket anymore, udp_detach it */
	  udp_detach(so);
	} else {                            	/* A "normal" UDP packet */
	  struct mbuf *m;
B
Blue Swirl 已提交
533 534 535 536 537 538
          int len;
#ifdef _WIN32
          unsigned long n;
#else
          int n;
#endif
B
bellard 已提交
539

540 541 542 543
	  m = m_get(so->slirp);
	  if (!m) {
	      return;
	  }
544 545 546 547 548 549 550 551 552 553 554 555
	  switch (so->so_ffamily) {
	  case AF_INET:
	      m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr);
	      break;
	  case AF_INET6:
	      m->m_data += IF_MAXLINKHDR + sizeof(struct ip6)
	                                 + sizeof(struct udphdr);
	      break;
	  default:
	      g_assert_not_reached();
	      break;
	  }
556

557
	  /*
B
bellard 已提交
558 559 560 561 562
	   * XXX Shouldn't FIONREAD packets destined for port 53,
	   * but I don't know the max packet size for DNS lookups
	   */
	  len = M_FREEROOM(m);
	  /* if (so->so_fport != htons(53)) { */
B
bellard 已提交
563
	  ioctlsocket(so->s, FIONREAD, &n);
564

B
bellard 已提交
565 566 567 568 569 570
	  if (n > len) {
	    n = (m->m_data - m->m_dat) + m->m_len + n + 1;
	    m_inc(m, n);
	    len = M_FREEROOM(m);
	  }
	  /* } */
571

B
bellard 已提交
572 573
	  m->m_len = recvfrom(so->s, m->m_data, len, 0,
			      (struct sockaddr *)&addr, &addrlen);
574
	  DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
B
bellard 已提交
575 576
		      m->m_len, errno,strerror(errno)));
	  if(m->m_len<0) {
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
	    /* Report error as ICMP */
	    switch (so->so_lfamily) {
	    uint8_t code;
	    case AF_INET:
	      code = ICMP_UNREACH_PORT;

	      if (errno == EHOSTUNREACH) {
		code = ICMP_UNREACH_HOST;
	      } else if (errno == ENETUNREACH) {
		code = ICMP_UNREACH_NET;
	      }

	      DEBUG_MISC((dfd, " rx error, tx icmp ICMP_UNREACH:%i\n", code));
	      icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
	      break;
	    case AF_INET6:
	      code = ICMP6_UNREACH_PORT;

	      if (errno == EHOSTUNREACH) {
		code = ICMP6_UNREACH_ADDRESS;
	      } else if (errno == ENETUNREACH) {
		code = ICMP6_UNREACH_NO_ROUTE;
	      }

	      DEBUG_MISC((dfd, " rx error, tx icmp6 ICMP_UNREACH:%i\n", code));
	      icmp6_send_error(so->so_m, ICMP6_UNREACH, code);
	      break;
	    default:
	      g_assert_not_reached();
	      break;
	    }
B
bellard 已提交
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
	    m_free(m);
	  } else {
	  /*
	   * Hack: domain name lookup will be used the most for UDP,
	   * and since they'll only be used once there's no need
	   * for the 4 minute (or whatever) timeout... So we time them
	   * out much quicker (10 seconds  for now...)
	   */
	    if (so->so_expire) {
	      if (so->so_fport == htons(53))
		so->so_expire = curtime + SO_EXPIREFAST;
	      else
		so->so_expire = curtime + SO_EXPIRE;
	    }

623
	    /*
B
bellard 已提交
624
	     * If this packet was destined for CTL_ADDR,
625
	     * make it look like that's where it came from
B
bellard 已提交
626
	     */
627 628 629 630
	    saddr = addr;
	    sotranslate_in(so, &saddr);
	    daddr = so->lhost.ss;

631 632
	    switch (so->so_ffamily) {
	    case AF_INET:
633 634 635
	        udp_output(so, m, (struct sockaddr_in *) &saddr,
	                   (struct sockaddr_in *) &daddr,
	                   so->so_iptos);
636
	        break;
637 638 639 640
	    case AF_INET6:
	        udp6_output(so, m, (struct sockaddr_in6 *) &saddr,
	                    (struct sockaddr_in6 *) &daddr);
	        break;
641
	    default:
642
	        g_assert_not_reached();
643 644
	        break;
	    }
B
bellard 已提交
645 646 647 648 649 650 651 652
	  } /* rx error */
	} /* if ping packet */
}

/*
 * sendto() a socket
 */
int
653
sosendto(struct socket *so, struct mbuf *m)
B
bellard 已提交
654 655
{
	int ret;
656
	struct sockaddr_storage addr;
B
bellard 已提交
657 658

	DEBUG_CALL("sosendto");
659 660
	DEBUG_ARG("so = %p", so);
	DEBUG_ARG("m = %p", m);
661

662 663 664
	addr = so->fhost.ss;
	DEBUG_CALL(" sendto()ing)");
	sotranslate_out(so, &addr);
665

B
bellard 已提交
666 667
	/* Don't care what port we get */
	ret = sendto(so->s, m->m_data, m->m_len, 0,
668
		     (struct sockaddr *)&addr, sockaddr_size(&addr));
B
bellard 已提交
669 670
	if (ret < 0)
		return -1;
671

B
bellard 已提交
672 673 674 675 676 677
	/*
	 * Kill the socket if there's no reply in 4 minutes,
	 * but only if it's an expirable socket
	 */
	if (so->so_expire)
		so->so_expire = curtime + SO_EXPIRE;
678 679
	so->so_state &= SS_PERSISTENT_MASK;
	so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */
B
bellard 已提交
680 681 682 683
	return 0;
}

/*
684
 * Listen for incoming TCP connections
B
bellard 已提交
685 686
 */
struct socket *
687
tcp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr,
688
           u_int lport, int flags)
B
bellard 已提交
689 690 691
{
	struct sockaddr_in addr;
	struct socket *so;
692 693
	int s, opt = 1;
	socklen_t addrlen = sizeof(addr);
694
	memset(&addr, 0, addrlen);
B
bellard 已提交
695

696
	DEBUG_CALL("tcp_listen");
697 698
	DEBUG_ARG("haddr = %x", haddr);
	DEBUG_ARG("hport = %d", hport);
B
bellard 已提交
699 700 701
	DEBUG_ARG("laddr = %x", laddr);
	DEBUG_ARG("lport = %d", lport);
	DEBUG_ARG("flags = %x", flags);
702

703 704
	so = socreate(slirp);
	if (!so) {
B
bellard 已提交
705 706
	  return NULL;
	}
707

B
bellard 已提交
708 709 710 711 712
	/* Don't tcp_attach... we don't need so_snd nor so_rcv */
	if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) {
		free(so);
		return NULL;
	}
713
	insque(so, &slirp->tcb);
714 715

	/*
B
bellard 已提交
716 717 718 719
	 * SS_FACCEPTONCE sockets must time out.
	 */
	if (flags & SS_FACCEPTONCE)
	   so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
720

721 722
	so->so_state &= SS_PERSISTENT_MASK;
	so->so_state |= (SS_FACCEPTCONN | flags);
723
	so->so_lfamily = AF_INET;
B
bellard 已提交
724 725
	so->so_lport = lport; /* Kept in network format */
	so->so_laddr.s_addr = laddr; /* Ditto */
726

B
bellard 已提交
727
	addr.sin_family = AF_INET;
728 729
	addr.sin_addr.s_addr = haddr;
	addr.sin_port = hport;
730

K
Kevin Wolf 已提交
731
	if (((s = qemu_socket(AF_INET,SOCK_STREAM,0)) < 0) ||
732
	    (socket_set_fast_reuse(s) < 0) ||
B
bellard 已提交
733 734 735
	    (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) ||
	    (listen(s,1) < 0)) {
		int tmperrno = errno; /* Don't clobber the real reason we failed */
736

737 738 739
                if (s >= 0) {
                    closesocket(s);
                }
B
bellard 已提交
740 741
		sofree(so);
		/* Restore the real errno */
B
bellard 已提交
742 743 744
#ifdef _WIN32
		WSASetLastError(tmperrno);
#else
B
bellard 已提交
745
		errno = tmperrno;
B
bellard 已提交
746
#endif
B
bellard 已提交
747 748
		return NULL;
	}
749
	qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int));
750

B
bellard 已提交
751
	getsockname(s,(struct sockaddr *)&addr,&addrlen);
752
	so->so_ffamily = AF_INET;
B
bellard 已提交
753 754
	so->so_fport = addr.sin_port;
	if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
755
	   so->so_faddr = slirp->vhost_addr;
B
bellard 已提交
756 757 758 759 760 761 762 763 764 765 766 767 768 769
	else
	   so->so_faddr = addr.sin_addr;

	so->s = s;
	return so;
}

/*
 * Various session state calls
 * XXX Should be #define's
 * The socket state stuff needs work, these often get call 2 or 3
 * times each when only 1 was needed
 */
void
770
soisfconnecting(struct socket *so)
B
bellard 已提交
771 772 773 774 775 776 777
{
	so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
			  SS_FCANTSENDMORE|SS_FWDRAIN);
	so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
}

void
778
soisfconnected(struct socket *so)
B
bellard 已提交
779 780 781 782 783
{
	so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
	so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
}

784 785
static void
sofcantrcvmore(struct socket *so)
B
bellard 已提交
786 787 788 789 790
{
	if ((so->so_state & SS_NOFDREF) == 0) {
		shutdown(so->s,0);
	}
	so->so_state &= ~(SS_ISFCONNECTING);
791 792 793 794
	if (so->so_state & SS_FCANTSENDMORE) {
	   so->so_state &= SS_PERSISTENT_MASK;
	   so->so_state |= SS_NOFDREF; /* Don't select it */
	} else {
B
bellard 已提交
795
	   so->so_state |= SS_FCANTRCVMORE;
796
	}
B
bellard 已提交
797 798
}

799 800
static void
sofcantsendmore(struct socket *so)
B
bellard 已提交
801 802
{
	if ((so->so_state & SS_NOFDREF) == 0) {
B
bellard 已提交
803
            shutdown(so->s,1);           /* send FIN to fhost */
B
bellard 已提交
804 805
	}
	so->so_state &= ~(SS_ISFCONNECTING);
806 807 808 809
	if (so->so_state & SS_FCANTRCVMORE) {
	   so->so_state &= SS_PERSISTENT_MASK;
	   so->so_state |= SS_NOFDREF; /* as above */
	} else {
B
bellard 已提交
810
	   so->so_state |= SS_FCANTSENDMORE;
811
	}
B
bellard 已提交
812 813 814 815 816 817 818
}

/*
 * Set write drain mode
 * Set CANTSENDMORE once all data has been write()n
 */
void
819
sofwdrain(struct socket *so)
B
bellard 已提交
820 821 822 823 824 825
{
	if (so->so_rcv.sb_cc)
		so->so_state |= SS_FWDRAIN;
	else
		sofcantsendmore(so);
}
826 827 828 829 830 831 832 833

/*
 * Translate addr in host addr when it is a virtual address
 */
void sotranslate_out(struct socket *so, struct sockaddr_storage *addr)
{
    Slirp *slirp = so->slirp;
    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
834
    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854

    switch (addr->ss_family) {
    case AF_INET:
        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
                slirp->vnetwork_addr.s_addr) {
            /* It's an alias */
            if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) {
                if (get_dns_addr(&sin->sin_addr) < 0) {
                    sin->sin_addr = loopback_addr;
                }
            } else {
                sin->sin_addr = loopback_addr;
            }
        }

        DEBUG_MISC((dfd, " addr.sin_port=%d, "
            "addr.sin_addr.s_addr=%.16s\n",
            ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)));
        break;

855 856 857 858
    case AF_INET6:
        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
                    slirp->vprefix_len)) {
            if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) {
859 860 861 862
                uint32_t scope_id;
                if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) {
                    sin6->sin6_scope_id = scope_id;
                } else {
863
                    sin6->sin6_addr = in6addr_loopback;
S
Samuel Thibault 已提交
864
                }
865 866 867 868 869 870
            } else {
                sin6->sin6_addr = in6addr_loopback;
            }
        }
        break;

871 872 873 874 875 876 877 878 879
    default:
        break;
    }
}

void sotranslate_in(struct socket *so, struct sockaddr_storage *addr)
{
    Slirp *slirp = so->slirp;
    struct sockaddr_in *sin = (struct sockaddr_in *)addr;
880
    struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896

    switch (addr->ss_family) {
    case AF_INET:
        if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
            slirp->vnetwork_addr.s_addr) {
            uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr;

            if ((so->so_faddr.s_addr & inv_mask) == inv_mask) {
                sin->sin_addr = slirp->vhost_addr;
            } else if (sin->sin_addr.s_addr == loopback_addr.s_addr ||
                       so->so_faddr.s_addr != slirp->vhost_addr.s_addr) {
                sin->sin_addr = so->so_faddr;
            }
        }
        break;

897 898 899 900 901 902 903 904 905 906
    case AF_INET6:
        if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6,
                    slirp->vprefix_len)) {
            if (in6_equal(&sin6->sin6_addr, &in6addr_loopback)
                    || !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) {
                sin6->sin6_addr = so->so_faddr6;
            }
        }
        break;

907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
    default:
        break;
    }
}

/*
 * Translate connections from localhost to the real hostname
 */
void sotranslate_accept(struct socket *so)
{
    Slirp *slirp = so->slirp;

    switch (so->so_ffamily) {
    case AF_INET:
        if (so->so_faddr.s_addr == INADDR_ANY ||
            (so->so_faddr.s_addr & loopback_mask) ==
            (loopback_addr.s_addr & loopback_mask)) {
           so->so_faddr = slirp->vhost_addr;
        }
        break;

928 929 930 931 932 933 934
   case AF_INET6:
        if (in6_equal(&so->so_faddr6, &in6addr_any) ||
                in6_equal(&so->so_faddr6, &in6addr_loopback)) {
           so->so_faddr6 = slirp->vhost_addr6;
        }
        break;

935 936 937 938
    default:
        break;
    }
}