iov.c 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * Helpers for getting linearized buffers from iov / filling buffers into iovs
 *
 * Copyright IBM, Corp. 2007, 2008
 * Copyright (C) 2010 Red Hat, Inc.
 *
 * Author(s):
 *  Anthony Liguori <aliguori@us.ibm.com>
 *  Amit Shah <amit.shah@redhat.com>
M
Michael Tokarev 已提交
10
 *  Michael Tokarev <mjt@tls.msk.ru>
11 12 13
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
14 15 16
 *
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
17 18
 */

19
#include "qemu/iov.h"
20

21 22 23 24 25 26 27 28
#ifdef _WIN32
# include <windows.h>
# include <winsock2.h>
#else
# include <sys/types.h>
# include <sys/socket.h>
#endif

M
Michael S. Tsirkin 已提交
29
size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
M
Michael Tokarev 已提交
30
                    size_t offset, const void *buf, size_t bytes)
31
{
M
Michael Tokarev 已提交
32
    size_t done;
33
    unsigned int i;
M
Michael Tokarev 已提交
34 35 36 37 38 39 40 41
    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
        if (offset < iov[i].iov_len) {
            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
            memcpy(iov[i].iov_base + offset, buf + done, len);
            done += len;
            offset = 0;
        } else {
            offset -= iov[i].iov_len;
42
        }
43
    }
M
Michael Tokarev 已提交
44 45
    assert(offset == 0);
    return done;
46
}
47

M
Michael Tokarev 已提交
48 49
size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
                  size_t offset, void *buf, size_t bytes)
50
{
M
Michael Tokarev 已提交
51
    size_t done;
52
    unsigned int i;
M
Michael Tokarev 已提交
53 54 55 56 57 58 59 60
    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
        if (offset < iov[i].iov_len) {
            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
            memcpy(buf + done, iov[i].iov_base + offset, len);
            done += len;
            offset = 0;
        } else {
            offset -= iov[i].iov_len;
61
        }
G
Gerd Hoffmann 已提交
62
    }
M
Michael Tokarev 已提交
63 64
    assert(offset == 0);
    return done;
G
Gerd Hoffmann 已提交
65 66
}

67
size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
M
Michael Tokarev 已提交
68
                  size_t offset, int fillc, size_t bytes)
G
Gerd Hoffmann 已提交
69
{
M
Michael Tokarev 已提交
70
    size_t done;
G
Gerd Hoffmann 已提交
71
    unsigned int i;
M
Michael Tokarev 已提交
72 73 74 75 76 77 78 79
    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
        if (offset < iov[i].iov_len) {
            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
            memset(iov[i].iov_base + offset, fillc, len);
            done += len;
            offset = 0;
        } else {
            offset -= iov[i].iov_len;
G
Gerd Hoffmann 已提交
80
        }
81
    }
M
Michael Tokarev 已提交
82 83
    assert(offset == 0);
    return done;
84 85
}

86
size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
87 88 89 90 91
{
    size_t len;
    unsigned int i;

    len = 0;
92
    for (i = 0; i < iov_cnt; i++) {
93 94 95 96
        len += iov[i].iov_len;
    }
    return len;
}
G
Gerd Hoffmann 已提交
97

98 99 100 101
/* helper function for iov_send_recv() */
static ssize_t
do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
{
102
#ifdef CONFIG_POSIX
103 104 105 106 107 108 109 110 111 112 113 114 115 116
    ssize_t ret;
    struct msghdr msg;
    memset(&msg, 0, sizeof(msg));
    msg.msg_iov = iov;
    msg.msg_iovlen = iov_cnt;
    do {
        ret = do_send
            ? sendmsg(sockfd, &msg, 0)
            : recvmsg(sockfd, &msg, 0);
    } while (ret < 0 && errno == EINTR);
    return ret;
#else
    /* else send piece-by-piece */
    /*XXX Note: windows has WSASend() and WSARecv() */
117 118 119
    unsigned i = 0;
    ssize_t ret = 0;
    while (i < iov_cnt) {
120 121 122 123 124 125 126 127 128 129 130 131 132
        ssize_t r = do_send
            ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
            : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
        if (r > 0) {
            ret += r;
        } else if (!r) {
            break;
        } else if (errno == EINTR) {
            continue;
        } else {
            /* else it is some "other" error,
             * only return if there was no data processed. */
            if (ret == 0) {
133
                ret = -1;
134 135 136
            }
            break;
        }
137
        i++;
138
    }
139
    return ret;
140 141 142 143 144 145 146
#endif
}

ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
                      size_t offset, size_t bytes,
                      bool do_send)
{
147
    ssize_t total = 0;
148
    ssize_t ret;
149
    size_t orig_len, tail;
150
    unsigned niov;
151

152 153 154 155 156 157
    while (bytes > 0) {
        /* Find the start position, skipping `offset' bytes:
         * first, skip all full-sized vector elements, */
        for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
            offset -= iov[niov].iov_len;
        }
158

159 160
        /* niov == iov_cnt would only be valid if bytes == 0, which
         * we already ruled out in the loop condition.  */
161
        assert(niov < iov_cnt);
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
        iov += niov;
        iov_cnt -= niov;

        if (offset) {
            /* second, skip `offset' bytes from the (now) first element,
             * undo it on exit */
            iov[0].iov_base += offset;
            iov[0].iov_len -= offset;
        }
        /* Find the end position skipping `bytes' bytes: */
        /* first, skip all full-sized elements */
        tail = bytes;
        for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
            tail -= iov[niov].iov_len;
        }
        if (tail) {
            /* second, fixup the last element, and remember the original
             * length */
            assert(niov < iov_cnt);
            assert(iov[niov].iov_len > tail);
            orig_len = iov[niov].iov_len;
            iov[niov++].iov_len = tail;
        }
185

186
        ret = do_send_recv(sockfd, iov, niov, do_send);
187

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
        /* Undo the changes above before checking for errors */
        if (tail) {
            iov[niov-1].iov_len = orig_len;
        }
        if (offset) {
            iov[0].iov_base -= offset;
            iov[0].iov_len += offset;
        }

        if (ret < 0) {
            assert(errno != EINTR);
            if (errno == EAGAIN && total > 0) {
                return total;
            }
            return -1;
        }

205 206 207 208 209 210
        if (ret == 0 && !do_send) {
            /* recv returns 0 when the peer has performed an orderly
             * shutdown. */
            break;
        }

211 212 213 214
        /* Prepare for the next iteration */
        offset += ret;
        total += ret;
        bytes -= ret;
215 216
    }

217
    return total;
218 219 220
}


G
Gerd Hoffmann 已提交
221 222 223
void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
                 FILE *fp, const char *prefix, size_t limit)
{
P
Peter Crosthwaite 已提交
224 225 226 227 228 229
    int v;
    size_t size = 0;
    char *buf;

    for (v = 0; v < iov_cnt; v++) {
        size += iov[v].iov_len;
G
Gerd Hoffmann 已提交
230
    }
P
Peter Crosthwaite 已提交
231 232 233
    size = size > limit ? limit : size;
    buf = g_malloc(size);
    iov_to_buf(iov, iov_cnt, 0, buf, size);
234
    qemu_hexdump(buf, fp, prefix, size);
P
Peter Crosthwaite 已提交
235
    g_free(buf);
G
Gerd Hoffmann 已提交
236
}
237

M
Michael S. Tsirkin 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
                 const struct iovec *iov, unsigned int iov_cnt,
                 size_t offset, size_t bytes)
{
    size_t len;
    unsigned int i, j;
    for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
        if (offset >= iov[i].iov_len) {
            offset -= iov[i].iov_len;
            continue;
        }
        len = MIN(bytes, iov[i].iov_len - offset);

        dst_iov[j].iov_base = iov[i].iov_base + offset;
        dst_iov[j].iov_len = len;
        j++;
        bytes -= len;
        offset = 0;
    }
    assert(offset == 0);
    return j;
}
260

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
/* io vectors */

void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
{
    qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
    qiov->niov = 0;
    qiov->nalloc = alloc_hint;
    qiov->size = 0;
}

void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
{
    int i;

    qiov->iov = iov;
    qiov->niov = niov;
    qiov->nalloc = -1;
    qiov->size = 0;
    for (i = 0; i < niov; i++)
        qiov->size += iov[i].iov_len;
}

void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
{
    assert(qiov->nalloc != -1);

    if (qiov->niov == qiov->nalloc) {
        qiov->nalloc = 2 * qiov->nalloc + 1;
        qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
    }
    qiov->iov[qiov->niov].iov_base = base;
    qiov->iov[qiov->niov].iov_len = len;
    qiov->size += len;
    ++qiov->niov;
}

/*
298
 * Concatenates (partial) iovecs from src_iov to the end of dst.
299 300 301
 * It starts copying after skipping `soffset' bytes at the
 * beginning of src and adds individual vectors from src to
 * dst copies up to `sbytes' bytes total, or up to the end
302
 * of src_iov if it comes first.  This way, it is okay to specify
303 304 305 306
 * very large value for `sbytes' to indicate "up to the end
 * of src".
 * Only vector pointers are processed, not the actual data buffers.
 */
307 308 309
void qemu_iovec_concat_iov(QEMUIOVector *dst,
                           struct iovec *src_iov, unsigned int src_cnt,
                           size_t soffset, size_t sbytes)
310 311 312
{
    int i;
    size_t done;
313 314 315 316

    if (!sbytes) {
        return;
    }
317
    assert(dst->nalloc != -1);
318 319 320 321
    for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
        if (soffset < src_iov[i].iov_len) {
            size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
            qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
322 323 324
            done += len;
            soffset = 0;
        } else {
325
            soffset -= src_iov[i].iov_len;
326 327
        }
    }
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
    assert(soffset == 0); /* offset beyond end of src */
}

/*
 * Concatenates (partial) iovecs from src to the end of dst.
 * It starts copying after skipping `soffset' bytes at the
 * beginning of src and adds individual vectors from src to
 * dst copies up to `sbytes' bytes total, or up to the end
 * of src if it comes first.  This way, it is okay to specify
 * very large value for `sbytes' to indicate "up to the end
 * of src".
 * Only vector pointers are processed, not the actual data buffers.
 */
void qemu_iovec_concat(QEMUIOVector *dst,
                       QEMUIOVector *src, size_t soffset, size_t sbytes)
{
    qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
}

void qemu_iovec_destroy(QEMUIOVector *qiov)
{
    assert(qiov->nalloc != -1);

    qemu_iovec_reset(qiov);
    g_free(qiov->iov);
    qiov->nalloc = 0;
    qiov->iov = NULL;
}

void qemu_iovec_reset(QEMUIOVector *qiov)
{
    assert(qiov->nalloc != -1);

    qiov->niov = 0;
    qiov->size = 0;
}

size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
                         void *buf, size_t bytes)
{
    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
}

size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
                           const void *buf, size_t bytes)
{
    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
}

size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
                         int fillc, size_t bytes)
{
    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
}
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432

size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
                         size_t bytes)
{
    size_t total = 0;
    struct iovec *cur;

    for (cur = *iov; *iov_cnt > 0; cur++) {
        if (cur->iov_len > bytes) {
            cur->iov_base += bytes;
            cur->iov_len -= bytes;
            total += bytes;
            break;
        }

        bytes -= cur->iov_len;
        total += cur->iov_len;
        *iov_cnt -= 1;
    }

    *iov = cur;
    return total;
}

size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
                        size_t bytes)
{
    size_t total = 0;
    struct iovec *cur;

    if (*iov_cnt == 0) {
        return 0;
    }

    cur = iov + (*iov_cnt - 1);

    while (*iov_cnt > 0) {
        if (cur->iov_len > bytes) {
            cur->iov_len -= bytes;
            total += bytes;
            break;
        }

        bytes -= cur->iov_len;
        total += cur->iov_len;
        cur--;
        *iov_cnt -= 1;
    }

    return total;
}