networking.c 38.4 KB
Newer Older
1 2 3
#include "redis.h"
#include <sys/uio.h>

4 5
static void setProtocolError(redisClient *c, int pos);

6 7 8 9 10 11 12 13 14 15
void *dupClientReplyValue(void *o) {
    incrRefCount((robj*)o);
    return o;
}

int listMatchObjects(void *a, void *b) {
    return equalStringObjects(a,b);
}

redisClient *createClient(int fd) {
16
    redisClient *c = zmalloc(sizeof(redisClient));
17
    c->bufpos = 0;
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32
    /* passing -1 as fd it is possible to create a non connected client.
     * This is useful since all the Redis commands needs to be executed
     * in the context of a client. When commands are executed in other
     * contexts (for instance a Lua script) we need a non connected client. */
    if (fd != -1) {
        anetNonBlock(NULL,fd);
        anetTcpNoDelay(NULL,fd);
        if (aeCreateFileEvent(server.el,fd,AE_READABLE,
            readQueryFromClient, c) == AE_ERR)
        {
            close(fd);
            zfree(c);
            return NULL;
        }
33 34
    }

35 36 37
    selectDb(c,0);
    c->fd = fd;
    c->querybuf = sdsempty();
38
    c->reqtype = 0;
39 40
    c->argc = 0;
    c->argv = NULL;
41
    c->cmd = c->lastcmd = NULL;
42
    c->multibulklen = 0;
43 44 45 46 47 48 49
    c->bulklen = -1;
    c->sentlen = 0;
    c->flags = 0;
    c->lastinteraction = time(NULL);
    c->authenticated = 0;
    c->replstate = REDIS_REPL_NONE;
    c->reply = listCreate();
50
    c->reply_bytes = 0;
51 52
    listSetFreeMethod(c->reply,decrRefCount);
    listSetDupMethod(c->reply,dupClientReplyValue);
53 54 55 56
    c->bpop.keys = NULL;
    c->bpop.count = 0;
    c->bpop.timeout = 0;
    c->bpop.target = NULL;
57 58 59 60 61 62 63
    c->io_keys = listCreate();
    c->watched_keys = listCreate();
    listSetFreeMethod(c->io_keys,decrRefCount);
    c->pubsub_channels = dictCreate(&setDictType,NULL);
    c->pubsub_patterns = listCreate();
    listSetFreeMethod(c->pubsub_patterns,decrRefCount);
    listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
64
    if (fd != -1) listAddNodeTail(server.clients,c);
65 66 67 68
    initClientMultiState(c);
    return c;
}

69 70
/* Set the event loop to listen for write events on the client's socket.
 * Typically gets called every time a reply is built. */
71
int _installWriteEvent(redisClient *c) {
72
    if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK;
73
    if (c->fd <= 0) return REDIS_ERR;
74
    if (c->bufpos == 0 && listLength(c->reply) == 0 &&
75 76 77
        (c->replstate == REDIS_REPL_NONE ||
         c->replstate == REDIS_REPL_ONLINE) &&
        aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
78 79 80 81
        sendReplyToClient, c) == AE_ERR) return REDIS_ERR;
    return REDIS_OK;
}

82 83 84 85 86 87 88 89 90 91 92 93 94 95
/* Create a duplicate of the last object in the reply list when
 * it is not exclusively owned by the reply list. */
robj *dupLastObjectIfNeeded(list *reply) {
    robj *new, *cur;
    listNode *ln;
    redisAssert(listLength(reply) > 0);
    ln = listLast(reply);
    cur = listNodeValue(ln);
    if (cur->refcount > 1) {
        new = dupStringObject(cur);
        decrRefCount(cur);
        listNodeValue(ln) = new;
    }
    return listNodeValue(ln);
96 97
}

98 99 100 101
/* -----------------------------------------------------------------------------
 * Low level functions to add more data to output buffers.
 * -------------------------------------------------------------------------- */

102
int _addReplyToBuffer(redisClient *c, char *s, size_t len) {
103
    size_t available = sizeof(c->buf)-c->bufpos;
104

105 106
    if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK;

107 108 109 110 111 112
    /* If there already are entries in the reply list, we cannot
     * add anything more to the static buffer. */
    if (listLength(c->reply) > 0) return REDIS_ERR;

    /* Check that the buffer has enough space available for this string. */
    if (len > available) return REDIS_ERR;
113

114 115 116
    memcpy(c->buf+c->bufpos,s,len);
    c->bufpos+=len;
    return REDIS_OK;
117 118
}

119 120
void _addReplyObjectToList(redisClient *c, robj *o) {
    robj *tail;
121 122 123

    if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;

124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    if (listLength(c->reply) == 0) {
        incrRefCount(o);
        listAddNodeTail(c->reply,o);
    } else {
        tail = listNodeValue(listLast(c->reply));

        /* Append to this object when possible. */
        if (tail->ptr != NULL &&
            sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES)
        {
            tail = dupLastObjectIfNeeded(c->reply);
            tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr));
        } else {
            incrRefCount(o);
            listAddNodeTail(c->reply,o);
        }
    }
141
    c->reply_bytes += sdslen(o->ptr);
142
}
143

144 145 146 147
/* This method takes responsibility over the sds. When it is no longer
 * needed it will be free'd, otherwise it ends up in a robj. */
void _addReplySdsToList(redisClient *c, sds s) {
    robj *tail;
148

149 150 151 152
    if (c->flags & REDIS_CLOSE_AFTER_REPLY) {
        sdsfree(s);
        return;
    }
153

154
    c->reply_bytes += sdslen(s);
155 156 157 158 159 160 161 162 163 164 165 166
    if (listLength(c->reply) == 0) {
        listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
    } else {
        tail = listNodeValue(listLast(c->reply));

        /* Append to this object when possible. */
        if (tail->ptr != NULL &&
            sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES)
        {
            tail = dupLastObjectIfNeeded(c->reply);
            tail->ptr = sdscatlen(tail->ptr,s,sdslen(s));
            sdsfree(s);
167
        } else {
168
            listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
169
        }
170 171 172 173 174
    }
}

void _addReplyStringToList(redisClient *c, char *s, size_t len) {
    robj *tail;
175 176 177

    if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;

178 179
    if (listLength(c->reply) == 0) {
        listAddNodeTail(c->reply,createStringObject(s,len));
180
    } else {
181 182 183 184 185 186 187 188
        tail = listNodeValue(listLast(c->reply));

        /* Append to this object when possible. */
        if (tail->ptr != NULL &&
            sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
        {
            tail = dupLastObjectIfNeeded(c->reply);
            tail->ptr = sdscatlen(tail->ptr,s,len);
189
        } else {
190
            listAddNodeTail(c->reply,createStringObject(s,len));
191 192
        }
    }
193
    c->reply_bytes += len;
194
}
195

196 197 198 199 200
/* -----------------------------------------------------------------------------
 * Higher level functions to queue data on the client output buffer.
 * The following functions are the ones that commands implementations will call.
 * -------------------------------------------------------------------------- */

201
void addReply(redisClient *c, robj *obj) {
202 203 204 205 206 207 208 209 210 211 212 213
    if (_installWriteEvent(c) != REDIS_OK) return;

    /* This is an important place where we can avoid copy-on-write
     * when there is a saving child running, avoiding touching the
     * refcount field of the object if it's not needed.
     *
     * If the encoding is RAW and there is room in the static buffer
     * we'll be able to send the object to the client without
     * messing with its page. */
    if (obj->encoding == REDIS_ENCODING_RAW) {
        if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
            _addReplyObjectToList(c,obj);
214
    } else {
215 216 217
        /* FIXME: convert the long into string and use _addReplyToBuffer()
         * instead of calling getDecodedObject. As this place in the
         * code is too performance critical. */
218
        obj = getDecodedObject(obj);
219 220 221
        if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
            _addReplyObjectToList(c,obj);
        decrRefCount(obj);
222 223 224 225
    }
}

void addReplySds(redisClient *c, sds s) {
226
    if (_installWriteEvent(c) != REDIS_OK) {
227 228 229 230
        /* The caller expects the sds to be free'd. */
        sdsfree(s);
        return;
    }
231
    if (_addReplyToBuffer(c,s,sdslen(s)) == REDIS_OK) {
232 233
        sdsfree(s);
    } else {
234 235
        /* This method free's the sds when it is no longer needed. */
        _addReplySdsToList(c,s);
236
    }
237 238
}

239
void addReplyString(redisClient *c, char *s, size_t len) {
240
    if (_installWriteEvent(c) != REDIS_OK) return;
241 242
    if (_addReplyToBuffer(c,s,len) != REDIS_OK)
        _addReplyStringToList(c,s,len);
243
}
244

245 246 247 248
void _addReplyError(redisClient *c, char *s, size_t len) {
    addReplyString(c,"-ERR ",5);
    addReplyString(c,s,len);
    addReplyString(c,"\r\n",2);
249 250
}

251 252 253
void addReplyError(redisClient *c, char *err) {
    _addReplyError(c,err,strlen(err));
}
254

255
void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
256
    size_t l, j;
257 258 259 260
    va_list ap;
    va_start(ap,fmt);
    sds s = sdscatvprintf(sdsempty(),fmt,ap);
    va_end(ap);
261 262 263 264 265 266
    /* Make sure there are no newlines in the string, otherwise invalid protocol
     * is emitted. */
    l = sdslen(s);
    for (j = 0; j < l; j++) {
        if (s[j] == '\r' || s[j] == '\n') s[j] = ' ';
    }
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
    _addReplyError(c,s,sdslen(s));
    sdsfree(s);
}

void _addReplyStatus(redisClient *c, char *s, size_t len) {
    addReplyString(c,"+",1);
    addReplyString(c,s,len);
    addReplyString(c,"\r\n",2);
}

void addReplyStatus(redisClient *c, char *status) {
    _addReplyStatus(c,status,strlen(status));
}

void addReplyStatusFormat(redisClient *c, const char *fmt, ...) {
    va_list ap;
    va_start(ap,fmt);
    sds s = sdscatvprintf(sdsempty(),fmt,ap);
    va_end(ap);
    _addReplyStatus(c,s,sdslen(s));
    sdsfree(s);
}

290 291 292
/* Adds an empty object to the reply list that will contain the multi bulk
 * length, which is not known when this function is called. */
void *addDeferredMultiBulkLength(redisClient *c) {
293 294 295 296
    /* Note that we install the write event here even if the object is not
     * ready to be sent, since we are sure that before returning to the
     * event loop setDeferredMultiBulkLength() will be called. */
    if (_installWriteEvent(c) != REDIS_OK) return NULL;
297
    listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL));
298 299 300 301 302 303 304 305 306 307 308 309 310
    return listLast(c->reply);
}

/* Populate the length object and try glueing it to the next chunk. */
void setDeferredMultiBulkLength(redisClient *c, void *node, long length) {
    listNode *ln = (listNode*)node;
    robj *len, *next;

    /* Abort when *node is NULL (see addDeferredMultiBulkLength). */
    if (node == NULL) return;

    len = listNodeValue(ln);
    len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length);
311
    c->reply_bytes += sdslen(len->ptr);
312 313
    if (ln->next != NULL) {
        next = listNodeValue(ln->next);
314

315
        /* Only glue when the next node is non-NULL (an sds in this case) */
316
        if (next->ptr != NULL) {
317
            len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr));
318 319
            listDelNode(c->reply,ln->next);
        }
320
    }
321 322
}

323
/* Add a duble as a bulk reply */
324 325 326 327 328 329
void addReplyDouble(redisClient *c, double d) {
    char dbuf[128], sbuf[128];
    int dlen, slen;
    dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d);
    slen = snprintf(sbuf,sizeof(sbuf),"$%d\r\n%s\r\n",dlen,dbuf);
    addReplyString(c,sbuf,slen);
330 331
}

332 333
/* Add a long long as integer reply or bulk len / multi bulk count.
 * Basically this is used to output <prefix><long long><crlf>. */
334
void _addReplyLongLong(redisClient *c, long long ll, char prefix) {
335
    char buf[128];
336 337
    int len;
    buf[0] = prefix;
338 339 340
    len = ll2string(buf+1,sizeof(buf)-1,ll);
    buf[len+1] = '\r';
    buf[len+2] = '\n';
341
    addReplyString(c,buf,len+3);
342 343
}

344
void addReplyLongLong(redisClient *c, long long ll) {
345 346 347 348 349 350
    if (ll == 0)
        addReply(c,shared.czero);
    else if (ll == 1)
        addReply(c,shared.cone);
    else
        _addReplyLongLong(c,ll,':');
351
}
352

353 354
void addReplyMultiBulkLen(redisClient *c, long length) {
    _addReplyLongLong(c,length,'*');
355 356
}

357
/* Create the length prefix of a bulk reply, example: $2234 */
358
void addReplyBulkLen(redisClient *c, robj *obj) {
359
    size_t len;
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375

    if (obj->encoding == REDIS_ENCODING_RAW) {
        len = sdslen(obj->ptr);
    } else {
        long n = (long)obj->ptr;

        /* Compute how many bytes will take this integer as a radix 10 string */
        len = 1;
        if (n < 0) {
            len++;
            n = -n;
        }
        while((n = n/10) != 0) {
            len++;
        }
    }
376
    _addReplyLongLong(c,len,'$');
377 378
}

379
/* Add a Redis Object as a bulk reply */
380 381 382 383 384 385
void addReplyBulk(redisClient *c, robj *obj) {
    addReplyBulkLen(c,obj);
    addReply(c,obj);
    addReply(c,shared.crlf);
}

386 387 388 389 390 391 392 393
/* Add a C buffer as bulk reply */
void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
    _addReplyLongLong(c,len,'$');
    addReplyString(c,p,len);
    addReply(c,shared.crlf);
}

/* Add a C nul term string as bulk reply */
394 395 396 397
void addReplyBulkCString(redisClient *c, char *s) {
    if (s == NULL) {
        addReply(c,shared.nullbulk);
    } else {
398
        addReplyBulkCBuffer(c,s,strlen(s));
399 400 401
    }
}

402 403 404 405 406 407 408 409 410
/* Add a long long as a bulk reply */
void addReplyBulkLongLong(redisClient *c, long long ll) {
    char buf[64];
    int len;

    len = ll2string(buf,64,ll);
    addReplyBulkCBuffer(c,buf,len);
}

411 412 413 414 415 416 417 418
/* Copy 'src' client output buffers into 'dst' client output buffers.
 * The function takes care of freeing the old output buffers of the
 * destination client. */
void copyClientOutputBuffer(redisClient *dst, redisClient *src) {
    listRelease(dst->reply);
    dst->reply = listDup(src->reply);
    memcpy(dst->buf,src->buf,src->bufpos);
    dst->bufpos = src->bufpos;
419
    dst->reply_bytes = src->reply_bytes;
420 421
}

422
static void acceptCommonHandler(int fd) {
423
    redisClient *c;
424
    if ((c = createClient(fd)) == NULL) {
425
        redisLog(REDIS_WARNING,"Error allocating resoures for the client");
426
        close(fd); /* May be already closed, just ingore errors */
427 428 429 430 431 432
        return;
    }
    /* If maxclient directive is set and this is one client more... close the
     * connection. Note that we create the client instead to check before
     * for this condition, since now the socket is already set in nonblocking
     * mode and we can send an error for free using the Kernel I/O */
433
    if (listLength(server.clients) > server.maxclients) {
434 435 436 437 438 439
        char *err = "-ERR max number of clients reached\r\n";

        /* That's a best effort error message, don't check write errors */
        if (write(c->fd,err,strlen(err)) == -1) {
            /* Nothing to do, Just to avoid the warning... */
        }
440
        server.stat_rejected_conn++;
441 442 443 444 445 446
        freeClient(c);
        return;
    }
    server.stat_numconnections++;
}

447 448 449 450 451 452 453 454 455
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
    int cport, cfd;
    char cip[128];
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);
    REDIS_NOTUSED(privdata);

    cfd = anetTcpAccept(server.neterr, fd, cip, &cport);
    if (cfd == AE_ERR) {
456
        redisLog(REDIS_WARNING,"Accepting client connection: %s", server.neterr);
457 458 459 460 461 462 463 464 465 466 467 468
        return;
    }
    redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
    acceptCommonHandler(cfd);
}

void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
    int cfd;
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);
    REDIS_NOTUSED(privdata);

469
    cfd = anetUnixAccept(server.neterr, fd);
470
    if (cfd == AE_ERR) {
471
        redisLog(REDIS_WARNING,"Accepting client connection: %s", server.neterr);
472 473 474 475 476 477 478
        return;
    }
    redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket);
    acceptCommonHandler(cfd);
}


479 480 481 482 483
static void freeClientArgv(redisClient *c) {
    int j;
    for (j = 0; j < c->argc; j++)
        decrRefCount(c->argv[j]);
    c->argc = 0;
484
    c->cmd = NULL;
485 486 487 488 489
}

void freeClient(redisClient *c) {
    listNode *ln;

490 491 492
    /* If this is marked as current client unset it */
    if (server.current_client == c) server.current_client = NULL;

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
    /* Note that if the client we are freeing is blocked into a blocking
     * call, we have to set querybuf to NULL *before* to call
     * unblockClientWaitingData() to avoid processInputBuffer() will get
     * called. Also it is important to remove the file events after
     * this, because this call adds the READABLE event. */
    sdsfree(c->querybuf);
    c->querybuf = NULL;
    if (c->flags & REDIS_BLOCKED)
        unblockClientWaitingData(c);

    /* UNWATCH all the keys */
    unwatchAllKeys(c);
    listRelease(c->watched_keys);
    /* Unsubscribe from all the pubsub channels */
    pubsubUnsubscribeAllChannels(c,0);
    pubsubUnsubscribeAllPatterns(c,0);
    dictRelease(c->pubsub_channels);
    listRelease(c->pubsub_patterns);
    /* Obvious cleanup */
    aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
    aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
    listRelease(c->reply);
    freeClientArgv(c);
    close(c->fd);
    /* Remove from the list of clients */
    ln = listSearchKey(server.clients,c);
    redisAssert(ln != NULL);
    listDelNode(server.clients,ln);
521 522 523 524 525 526 527
    /* When client was just unblocked because of a blocking operation,
     * remove it from the list with unblocked clients. */
    if (c->flags & REDIS_UNBLOCKED) {
        ln = listSearchKey(server.unblocked_clients,c);
        redisAssert(ln != NULL);
        listDelNode(server.unblocked_clients,ln);
    }
528
    listRelease(c->io_keys);
529 530
    /* Master/slave cleanup.
     * Case 1: we lost the connection with a slave. */
531 532 533 534 535 536 537 538
    if (c->flags & REDIS_SLAVE) {
        if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
            close(c->repldbfd);
        list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
        ln = listSearchKey(l,c);
        redisAssert(ln != NULL);
        listDelNode(l,ln);
    }
539 540

    /* Case 2: we lost the connection with the master. */
541 542
    if (c->flags & REDIS_MASTER) {
        server.master = NULL;
A
antirez 已提交
543
        server.repl_state = REDIS_REPL_CONNECT;
544
        server.repl_down_since = time(NULL);
545 546 547 548
        /* Since we lost the connection with the master, we should also
         * close the connection with all our slaves if we have any, so
         * when we'll resync with the master the other slaves will sync again
         * with us as well. Note that also when the slave is not connected
549 550 551 552 553 554 555 556 557 558
         * to the master it will keep refusing connections by other slaves.
         *
         * We do this only if server.masterhost != NULL. If it is NULL this
         * means the user called SLAVEOF NO ONE and we are freeing our
         * link with the master, so no need to close link with slaves. */
        if (server.masterhost != NULL) {
            while (listLength(server.slaves)) {
                ln = listFirst(server.slaves);
                freeClient((redisClient*)ln->value);
            }
559
        }
560 561 562 563 564 565 566 567 568 569 570 571 572 573
    }
    /* Release memory */
    zfree(c->argv);
    freeClientMultiState(c);
    zfree(c);
}

void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
    redisClient *c = privdata;
    int nwritten = 0, totwritten = 0, objlen;
    robj *o;
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);

574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
    while(c->bufpos > 0 || listLength(c->reply)) {
        if (c->bufpos > 0) {
            if (c->flags & REDIS_MASTER) {
                /* Don't reply to a master */
                nwritten = c->bufpos - c->sentlen;
            } else {
                nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
                if (nwritten <= 0) break;
            }
            c->sentlen += nwritten;
            totwritten += nwritten;

            /* If the buffer was sent, set bufpos to zero to continue with
             * the remainder of the reply. */
            if (c->sentlen == c->bufpos) {
                c->bufpos = 0;
                c->sentlen = 0;
            }
        } else {
            o = listNodeValue(listFirst(c->reply));
            objlen = sdslen(o->ptr);
595

596 597 598 599
            if (objlen == 0) {
                listDelNode(c->reply,listFirst(c->reply));
                continue;
            }
600

601 602 603 604 605 606 607 608 609
            if (c->flags & REDIS_MASTER) {
                /* Don't reply to a master */
                nwritten = objlen - c->sentlen;
            } else {
                nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen);
                if (nwritten <= 0) break;
            }
            c->sentlen += nwritten;
            totwritten += nwritten;
610

611 612 613 614
            /* If we fully sent the object on head go to the next one */
            if (c->sentlen == objlen) {
                listDelNode(c->reply,listFirst(c->reply));
                c->sentlen = 0;
615
                c->reply_bytes -= objlen;
616
            }
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
        }
        /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
         * bytes, in a single threaded server it's a good idea to serve
         * other clients as well, even if a very large request comes from
         * super fast link that is always able to accept data (in real world
         * scenario think about 'KEYS *' against the loopback interfae) */
        if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
    }
    if (nwritten == -1) {
        if (errno == EAGAIN) {
            nwritten = 0;
        } else {
            redisLog(REDIS_VERBOSE,
                "Error writing to client: %s", strerror(errno));
            freeClient(c);
            return;
        }
    }
    if (totwritten > 0) c->lastinteraction = time(NULL);
636
    if (c->bufpos == 0 && listLength(c->reply) == 0) {
637 638
        c->sentlen = 0;
        aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
P
Pieter Noordhuis 已提交
639 640

        /* Close connection after entire reply has been sent. */
641
        if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c);
642 643 644 645 646 647
    }
}

/* resetClient prepare the client to process the next command */
void resetClient(redisClient *c) {
    freeClientArgv(c);
648 649
    c->reqtype = 0;
    c->multibulklen = 0;
650
    c->bulklen = -1;
651 652
    /* We clear the ASKING flag as well if we are not inside a MULTI. */
    if (!(c->flags & REDIS_MULTI)) c->flags &= (~REDIS_ASKING);
653 654 655 656 657 658 659 660 661 662 663 664 665 666
}

void closeTimedoutClients(void) {
    redisClient *c;
    listNode *ln;
    time_t now = time(NULL);
    listIter li;

    listRewind(server.clients,&li);
    while ((ln = listNext(&li)) != NULL) {
        c = listNodeValue(ln);
        if (server.maxidletime &&
            !(c->flags & REDIS_SLAVE) &&    /* no timeout for slaves */
            !(c->flags & REDIS_MASTER) &&   /* no timeout for masters */
667
            !(c->flags & REDIS_BLOCKED) &&  /* no timeout for BLPOP */
668 669 670 671 672 673 674
            dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
            listLength(c->pubsub_patterns) == 0 &&
            (now - c->lastinteraction > server.maxidletime))
        {
            redisLog(REDIS_VERBOSE,"Closing idle client");
            freeClient(c);
        } else if (c->flags & REDIS_BLOCKED) {
675
            if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
676 677 678 679 680 681 682
                addReply(c,shared.nullmultibulk);
                unblockClientWaitingData(c);
            }
        }
    }
}

683 684 685 686 687 688 689
int processInlineBuffer(redisClient *c) {
    char *newline = strstr(c->querybuf,"\r\n");
    int argc, j;
    sds *argv;
    size_t querylen;

    /* Nothing to do without a \r\n */
690 691 692 693 694
    if (newline == NULL) {
        if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
            addReplyError(c,"Protocol error: too big inline request");
            setProtocolError(c,0);
        }
695
        return REDIS_ERR;
696
    }
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724

    /* Split the input buffer up to the \r\n */
    querylen = newline-(c->querybuf);
    argv = sdssplitlen(c->querybuf,querylen," ",1,&argc);

    /* Leave data after the first line of the query in the buffer */
    c->querybuf = sdsrange(c->querybuf,querylen+2,-1);

    /* Setup argv array on client structure */
    if (c->argv) zfree(c->argv);
    c->argv = zmalloc(sizeof(robj*)*argc);

    /* Create redis objects for all arguments. */
    for (c->argc = 0, j = 0; j < argc; j++) {
        if (sdslen(argv[j])) {
            c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
            c->argc++;
        } else {
            sdsfree(argv[j]);
        }
    }
    zfree(argv);
    return REDIS_OK;
}

/* Helper function. Trims query buffer to make the function that processes
 * multi bulk requests idempotent. */
static void setProtocolError(redisClient *c, int pos) {
725 726 727 728 729 730
    if (server.verbosity >= REDIS_VERBOSE) {
        sds client = getClientInfoString(c);
        redisLog(REDIS_VERBOSE,
            "Protocol error from client: %s", client);
        sdsfree(client);
    }
731 732 733 734 735 736
    c->flags |= REDIS_CLOSE_AFTER_REPLY;
    c->querybuf = sdsrange(c->querybuf,pos,-1);
}

int processMultibulkBuffer(redisClient *c) {
    char *newline = NULL;
737 738
    int pos = 0, ok;
    long long ll;
739 740 741

    if (c->multibulklen == 0) {
        /* The client should have been reset */
742
        redisAssertWithInfo(c,NULL,c->argc == 0);
743 744

        /* Multi bulk length cannot be read without a \r\n */
745
        newline = strchr(c->querybuf,'\r');
746 747 748 749 750
        if (newline == NULL) {
            if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
                addReplyError(c,"Protocol error: too big mbulk count string");
                setProtocolError(c,0);
            }
751
            return REDIS_ERR;
752
        }
753

P
Pieter Noordhuis 已提交
754 755 756 757
        /* Buffer should also contain \n */
        if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2))
            return REDIS_ERR;

758 759
        /* We know for sure there is a whole line since newline != NULL,
         * so go ahead and find out the multi bulk length. */
760
        redisAssertWithInfo(c,NULL,c->querybuf[0] == '*');
761 762
        ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll);
        if (!ok || ll > 1024*1024) {
763 764 765
            addReplyError(c,"Protocol error: invalid multibulk length");
            setProtocolError(c,pos);
            return REDIS_ERR;
766
        }
P
Pieter Noordhuis 已提交
767 768 769 770 771 772 773

        pos = (newline-c->querybuf)+2;
        if (ll <= 0) {
            c->querybuf = sdsrange(c->querybuf,pos,-1);
            return REDIS_OK;
        }

774
        c->multibulklen = ll;
775 776 777 778 779 780

        /* Setup argv array on client structure */
        if (c->argv) zfree(c->argv);
        c->argv = zmalloc(sizeof(robj*)*c->multibulklen);
    }

781
    redisAssertWithInfo(c,NULL,c->multibulklen > 0);
782 783 784
    while(c->multibulklen) {
        /* Read bulk length if unknown */
        if (c->bulklen == -1) {
785
            newline = strchr(c->querybuf+pos,'\r');
786 787 788 789 790
            if (newline == NULL) {
                if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
                    addReplyError(c,"Protocol error: too big bulk count string");
                    setProtocolError(c,0);
                }
P
Pieter Noordhuis 已提交
791
                break;
792
            }
P
Pieter Noordhuis 已提交
793 794 795

            /* Buffer should also contain \n */
            if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2))
796
                break;
P
Pieter Noordhuis 已提交
797 798 799 800 801 802 803

            if (c->querybuf[pos] != '$') {
                addReplyErrorFormat(c,
                    "Protocol error: expected '$', got '%c'",
                    c->querybuf[pos]);
                setProtocolError(c,pos);
                return REDIS_ERR;
804
            }
P
Pieter Noordhuis 已提交
805 806 807 808 809 810 811 812 813

            ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll);
            if (!ok || ll < 0 || ll > 512*1024*1024) {
                addReplyError(c,"Protocol error: invalid bulk length");
                setProtocolError(c,pos);
                return REDIS_ERR;
            }

            pos += newline-(c->querybuf+pos)+2;
814
            if (ll >= REDIS_MBULK_BIG_ARG) {
815 816 817 818 819 820
                /* If we are going to read a large object from network
                 * try to make it likely that it will start at c->querybuf
                 * boundary so that we can optimized object creation
                 * avoiding a large copy of data. */
                c->querybuf = sdsrange(c->querybuf,pos,-1);
                pos = 0;
A
antirez 已提交
821 822
                /* Hint the sds library about the amount of bytes this string is
                 * going to contain. */
823
                c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2);
A
antirez 已提交
824
            }
P
Pieter Noordhuis 已提交
825
            c->bulklen = ll;
826 827 828 829 830 831 832
        }

        /* Read bulk argument */
        if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) {
            /* Not enough data (+2 == trailing \r\n) */
            break;
        } else {
833 834 835 836
            /* Optimization: if the buffer contanins JUST our bulk element
             * instead of creating a new object by *copying* the sds we
             * just use the current sds string. */
            if (pos == 0 &&
837
                c->bulklen >= REDIS_MBULK_BIG_ARG &&
838 839 840 841 842 843 844 845 846 847 848 849 850 851
                (signed) sdslen(c->querybuf) == c->bulklen+2)
            {
                c->argv[c->argc++] = createObject(REDIS_STRING,c->querybuf);
                sdsIncrLen(c->querybuf,-2); /* remove CRLF */
                c->querybuf = sdsempty();
                /* Assume that if we saw a fat argument we'll see another one
                 * likely... */
                c->querybuf = sdsMakeRoomFor(c->querybuf,c->bulklen+2);
                pos = 0;
            } else {
                c->argv[c->argc++] =
                    createStringObject(c->querybuf+pos,c->bulklen);
                pos += c->bulklen+2;
            }
852 853 854 855 856 857
            c->bulklen = -1;
            c->multibulklen--;
        }
    }

    /* Trim to pos */
858
    if (pos) c->querybuf = sdsrange(c->querybuf,pos,-1);
859 860

    /* We're done when c->multibulk == 0 */
861 862 863
    if (c->multibulklen == 0) return REDIS_OK;

    /* Still not read to process the command */
864 865 866 867 868 869
    return REDIS_ERR;
}

void processInputBuffer(redisClient *c) {
    /* Keep processing while there is something in the input buffer */
    while(sdslen(c->querybuf)) {
870 871 872
        /* Immediately abort if the client is in the middle of something. */
        if (c->flags & REDIS_BLOCKED) return;

873 874 875 876
        /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is
         * written to the client. Make sure to not let the reply grow after
         * this flag has been set (i.e. don't process more commands). */
        if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
877 878 879 880 881

        /* Determine request type when unknown. */
        if (!c->reqtype) {
            if (c->querybuf[0] == '*') {
                c->reqtype = REDIS_REQ_MULTIBULK;
882
            } else {
883
                c->reqtype = REDIS_REQ_INLINE;
884 885
            }
        }
886 887 888 889 890 891 892

        if (c->reqtype == REDIS_REQ_INLINE) {
            if (processInlineBuffer(c) != REDIS_OK) break;
        } else if (c->reqtype == REDIS_REQ_MULTIBULK) {
            if (processMultibulkBuffer(c) != REDIS_OK) break;
        } else {
            redisPanic("Unknown request type");
893
        }
894 895

        /* Multibulk processing could see a <= 0 length. */
896 897 898 899 900 901 902
        if (c->argc == 0) {
            resetClient(c);
        } else {
            /* Only reset the client when the command was executed. */
            if (processCommand(c) == REDIS_OK)
                resetClient(c);
        }
903 904 905 906 907
    }
}

void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
    redisClient *c = (redisClient*) privdata;
908
    int nread, readlen;
909
    size_t qblen;
910 911 912
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);

913
    server.current_client = c;
914 915 916 917 918 919 920 921
    readlen = REDIS_IOBUF_LEN;
    /* If this is a multi bulk request, and we are processing a bulk reply
     * that is large enough, try to maximize the probabilty that the query
     * buffer contains excatly the SDS string representing the object, even
     * at the risk of requring more read(2) calls. This way the function
     * processMultiBulkBuffer() can avoid copying buffers to create the
     * Redis Object representing the argument. */
    if (c->reqtype == REDIS_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1
922
        && c->bulklen >= REDIS_MBULK_BIG_ARG)
923 924 925 926 927 928
    {
        int remaining = (unsigned)(c->bulklen+2)-sdslen(c->querybuf);

        if (remaining < readlen) readlen = remaining;
    }

929
    qblen = sdslen(c->querybuf);
930 931
    c->querybuf = sdsMakeRoomFor(c->querybuf, readlen);
    nread = read(fd, c->querybuf+qblen, readlen);
932 933 934 935 936 937 938 939 940 941 942 943 944 945
    if (nread == -1) {
        if (errno == EAGAIN) {
            nread = 0;
        } else {
            redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
            freeClient(c);
            return;
        }
    } else if (nread == 0) {
        redisLog(REDIS_VERBOSE, "Client closed connection");
        freeClient(c);
        return;
    }
    if (nread) {
946
        sdsIncrLen(c->querybuf,nread);
947 948
        c->lastinteraction = time(NULL);
    } else {
949
        server.current_client = NULL;
950 951
        return;
    }
952
    if (sdslen(c->querybuf) > server.client_max_querybuf_len) {
953 954 955 956
        sds ci = getClientInfoString(c), bytes = sdsempty();

        bytes = sdscatrepr(bytes,c->querybuf,64);
        redisLog(REDIS_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
957
        sdsfree(ci);
958
        sdsfree(bytes);
959 960 961
        freeClient(c);
        return;
    }
962
    processInputBuffer(c);
963
    server.current_client = NULL;
964
}
965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983

void getClientsMaxBuffers(unsigned long *longest_output_list,
                          unsigned long *biggest_input_buffer) {
    redisClient *c;
    listNode *ln;
    listIter li;
    unsigned long lol = 0, bib = 0;

    listRewind(server.clients,&li);
    while ((ln = listNext(&li)) != NULL) {
        c = listNodeValue(ln);

        if (listLength(c->reply) > lol) lol = listLength(c->reply);
        if (sdslen(c->querybuf) > bib) bib = sdslen(c->querybuf);
    }
    *longest_output_list = lol;
    *biggest_input_buffer = bib;
}

984 985
/* Turn a Redis client into an sds string representing its state. */
sds getClientInfoString(redisClient *client) {
986
    char ip[32], flags[16], events[3], *p;
987 988
    int port;
    time_t now = time(NULL);
989
    int emask;
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008

    if (anetPeerToString(client->fd,ip,&port) == -1) {
        ip[0] = '?';
        ip[1] = '\0';
        port = 0;
    }
    p = flags;
    if (client->flags & REDIS_SLAVE) {
        if (client->flags & REDIS_MONITOR)
            *p++ = 'O';
        else
            *p++ = 'S';
    }
    if (client->flags & REDIS_MASTER) *p++ = 'M';
    if (client->flags & REDIS_MULTI) *p++ = 'x';
    if (client->flags & REDIS_BLOCKED) *p++ = 'b';
    if (client->flags & REDIS_DIRTY_CAS) *p++ = 'd';
    if (client->flags & REDIS_CLOSE_AFTER_REPLY) *p++ = 'c';
    if (client->flags & REDIS_UNBLOCKED) *p++ = 'u';
1009
    if (p == flags) *p++ = 'N';
1010
    *p++ = '\0';
1011 1012 1013 1014 1015 1016

    emask = client->fd == -1 ? 0 : aeGetFileEvents(server.el,client->fd);
    p = events;
    if (emask & AE_READABLE) *p++ = 'r';
    if (emask & AE_WRITABLE) *p++ = 'w';
    *p = '\0';
1017
    return sdscatprintf(sdsempty(),
1018
        "addr=%s:%d fd=%d idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s",
1019 1020 1021 1022 1023
        ip,port,client->fd,
        (long)(now - client->lastinteraction),
        flags,
        client->db->id,
        (int) dictSize(client->pubsub_channels),
1024 1025 1026
        (int) listLength(client->pubsub_patterns),
        (unsigned long) sdslen(client->querybuf),
        (unsigned long) client->bufpos,
1027
        (unsigned long) listLength(client->reply),
1028
        getClientOutputBufferMemoryUsage(client),
1029 1030
        events,
        client->lastcmd ? client->lastcmd->name : "NULL");
1031 1032
}

1033 1034 1035 1036 1037 1038 1039 1040
sds getAllClientsInfoString(void) {
    listNode *ln;
    listIter li;
    redisClient *client;
    sds o = sdsempty();

    listRewind(server.clients,&li);
    while ((ln = listNext(&li)) != NULL) {
1041 1042
        sds cs;

1043
        client = listNodeValue(ln);
1044 1045 1046
        cs = getClientInfoString(client);
        o = sdscatsds(o,cs);
        sdsfree(cs);
1047 1048 1049 1050 1051
        o = sdscatlen(o,"\n",1);
    }
    return o;
}

A
antirez 已提交
1052
void clientCommand(redisClient *c) {
A
antirez 已提交
1053 1054 1055 1056
    listNode *ln;
    listIter li;
    redisClient *client;

A
antirez 已提交
1057
    if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) {
1058
        sds o = getAllClientsInfoString();
A
antirez 已提交
1059 1060
        addReplyBulkCBuffer(c,o,sdslen(o));
        sdsfree(o);
A
antirez 已提交
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
    } else if (!strcasecmp(c->argv[1]->ptr,"kill") && c->argc == 3) {
        listRewind(server.clients,&li);
        while ((ln = listNext(&li)) != NULL) {
            char ip[32], addr[64];
            int port;

            client = listNodeValue(ln);
            if (anetPeerToString(client->fd,ip,&port) == -1) continue;
            snprintf(addr,sizeof(addr),"%s:%d",ip,port);
            if (strcmp(addr,c->argv[2]->ptr) == 0) {
                addReply(c,shared.ok);
                if (c == client) {
                    client->flags |= REDIS_CLOSE_AFTER_REPLY;
                } else {
                    freeClient(client);
                }
                return;
            }
        }
        addReplyError(c,"No such client");
A
antirez 已提交
1081 1082 1083 1084
    } else {
        addReplyError(c, "Syntax error, try CLIENT (LIST | KILL ip:port)");
    }
}
1085

1086 1087 1088
/* Rewrite the command vector of the client. All the new objects ref count
 * is incremented. The old command vector is freed, and the old objects
 * ref count is decremented. */
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
void rewriteClientCommandVector(redisClient *c, int argc, ...) {
    va_list ap;
    int j;
    robj **argv; /* The new argument vector */

    argv = zmalloc(sizeof(robj*)*argc);
    va_start(ap,argc);
    for (j = 0; j < argc; j++) {
        robj *a;
        
        a = va_arg(ap, robj*);
        argv[j] = a;
        incrRefCount(a);
    }
    /* We free the objects in the original vector at the end, so we are
     * sure that if the same objects are reused in the new vector the
     * refcount gets incremented before it gets decremented. */
    for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]);
    zfree(c->argv);
    /* Replace argv and argc with our new versions. */
    c->argv = argv;
    c->argc = argc;
1111
    c->cmd = lookupCommand(c->argv[0]->ptr);
1112
    redisAssertWithInfo(c,NULL,c->cmd != NULL);
1113 1114
    va_end(ap);
}
1115 1116 1117 1118 1119 1120

/* Rewrite a single item in the command vector.
 * The new val ref count is incremented, and the old decremented. */
void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
    robj *oldval;
   
1121
    redisAssertWithInfo(c,NULL,i < c->argc);
1122 1123 1124 1125 1126 1127 1128 1129
    oldval = c->argv[i];
    c->argv[i] = newval;
    incrRefCount(newval);
    decrRefCount(oldval);

    /* If this is the command name make sure to fix c->cmd. */
    if (i == 0) {
        c->cmd = lookupCommand(c->argv[0]->ptr);
1130
        redisAssertWithInfo(c,NULL,c->cmd != NULL);
1131 1132
    }
}
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151

/* This function returns the number of bytes that Redis is virtually
 * using to store the reply still not read by the client.
 * It is "virtual" since the reply output list may contain objects that
 * are shared and are not really using additional memory.
 *
 * The function returns the total sum of the length of all the objects
 * stored in the output list, plus the memory used to allocate every
 * list node. The static reply buffer is not taken into account since it
 * is allocated anyway.
 *
 * Note: this function is very fast so can be called as many time as
 * the caller wishes. The main usage of this function currently is
 * enforcing the client output lenght limits. */
unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
    unsigned long list_item_size = sizeof(listNode);

    return c->reply_bytes + (list_item_size*listLength(c->reply));
}
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166

/* Get the class of a client, used in order to envorce limits to different
 * classes of clients.
 *
 * The function will return one of the following:
 * REDIS_CLIENT_LIMIT_CLASS_NORMAL -> Normal client
 * REDIS_CLIENT_LIMIT_CLASS_SLAVE  -> Slave or client executing MONITOR command
 * REDIS_CLIENT_LIMIT_CLASS_PUBSUB -> Client subscribed to Pub/Sub channels
 */
int getClientLimitClass(redisClient *c) {
    if (c->flags & REDIS_SLAVE) return REDIS_CLIENT_LIMIT_CLASS_SLAVE;
    if (dictSize(c->pubsub_channels) || listLength(c->pubsub_patterns))
        return REDIS_CLIENT_LIMIT_CLASS_PUBSUB;
    return REDIS_CLIENT_LIMIT_CLASS_NORMAL;
}