rdb.c 35.8 KB
Newer Older
1
#include <math.h>
2 3 4 5 6
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <arpa/inet.h>
7
#include <sys/stat.h>
8 9
#include "rdb.h"
#include "lzf.h" /* LZF compression library */
10

11
static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
12
    if (rdb && rioWrite(rdb,p,len) == 0)
13
        return -1;
14 15 16
    return len;
}

17 18
int rdbSaveType(rio *rdb, unsigned char type) {
    return rdbWriteRaw(rdb,&type,1);
19 20
}

21 22 23 24
int rdbLoadType(rio *rdb) {
    unsigned char type;
    if (rioRead(rdb,&type,1) == 0) return -1;
    return type;
25 26
}

27
int rdbSaveTime(rio *rdb, time_t t) {
28
    int32_t t32 = (int32_t) t;
29
    return rdbWriteRaw(rdb,&t32,4);
30 31
}

32 33 34 35
time_t rdbLoadTime(rio *rdb) {
    int32_t t32;
    if (rioRead(rdb,&t32,4) == 0) return -1;
    return (time_t)t32;
36 37
}

38 39 40
/* Saves an encoded length. The first two bits in the first byte are used to
 * hold the encoding type. See the REDIS_RDB_* definitions for more information
 * on the types of encoding. */
41
int rdbSaveLen(rio *rdb, uint32_t len) {
42
    unsigned char buf[2];
43
    size_t nwritten;
44 45 46 47

    if (len < (1<<6)) {
        /* Save a 6 bit len */
        buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
48
        if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
49
        nwritten = 1;
50 51 52 53
    } else if (len < (1<<14)) {
        /* Save a 14 bit len */
        buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
        buf[1] = len&0xFF;
54
        if (rdbWriteRaw(rdb,buf,2) == -1) return -1;
55
        nwritten = 2;
56 57 58
    } else {
        /* Save a 32 bit len */
        buf[0] = (REDIS_RDB_32BITLEN<<6);
59
        if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
60
        len = htonl(len);
61
        if (rdbWriteRaw(rdb,&len,4) == -4) return -1;
62
        nwritten = 1+4;
63
    }
64
    return nwritten;
65 66
}

67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
/* Load an encoded length. The "isencoded" argument is set to 1 if the length
 * is not actually a length but an "encoding type". See the REDIS_RDB_ENC_*
 * definitions in rdb.h for more information. */
uint32_t rdbLoadLen(rio *rdb, int *isencoded) {
    unsigned char buf[2];
    uint32_t len;
    int type;

    if (isencoded) *isencoded = 0;
    if (rioRead(rdb,buf,1) == 0) return REDIS_RDB_LENERR;
    type = (buf[0]&0xC0)>>6;
    if (type == REDIS_RDB_ENCVAL) {
        /* Read a 6 bit encoding type. */
        if (isencoded) *isencoded = 1;
        return buf[0]&0x3F;
    } else if (type == REDIS_RDB_6BITLEN) {
        /* Read a 6 bit len. */
        return buf[0]&0x3F;
    } else if (type == REDIS_RDB_14BITLEN) {
        /* Read a 14 bit len. */
        if (rioRead(rdb,buf+1,1) == 0) return REDIS_RDB_LENERR;
        return ((buf[0]&0x3F)<<8)|buf[1];
    } else {
        /* Read a 32 bit len. */
        if (rioRead(rdb,&len,4) == 0) return REDIS_RDB_LENERR;
        return ntohl(len);
    }
}

/* Encodes the "value" argument as integer when it fits in the supported ranges
 * for encoded types. If the function successfully encodes the integer, the
 * representation is stored in the buffer pointer to by "enc" and the string
 * length is returned. Otherwise 0 is returned. */
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
int rdbEncodeInteger(long long value, unsigned char *enc) {
    if (value >= -(1<<7) && value <= (1<<7)-1) {
        enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
        enc[1] = value&0xFF;
        return 2;
    } else if (value >= -(1<<15) && value <= (1<<15)-1) {
        enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
        enc[1] = value&0xFF;
        enc[2] = (value>>8)&0xFF;
        return 3;
    } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
        enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
        enc[1] = value&0xFF;
        enc[2] = (value>>8)&0xFF;
        enc[3] = (value>>16)&0xFF;
        enc[4] = (value>>24)&0xFF;
        return 5;
    } else {
        return 0;
    }
}

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
/* Loads an integer-encoded object with the specified encoding type "enctype".
 * If the "encode" argument is set the function may return an integer-encoded
 * string object, otherwise it always returns a raw string object. */
robj *rdbLoadIntegerObject(rio *rdb, int enctype, int encode) {
    unsigned char enc[4];
    long long val;

    if (enctype == REDIS_RDB_ENC_INT8) {
        if (rioRead(rdb,enc,1) == 0) return NULL;
        val = (signed char)enc[0];
    } else if (enctype == REDIS_RDB_ENC_INT16) {
        uint16_t v;
        if (rioRead(rdb,enc,2) == 0) return NULL;
        v = enc[0]|(enc[1]<<8);
        val = (int16_t)v;
    } else if (enctype == REDIS_RDB_ENC_INT32) {
        uint32_t v;
        if (rioRead(rdb,enc,4) == 0) return NULL;
        v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
        val = (int32_t)v;
    } else {
        val = 0; /* anti-warning */
        redisPanic("Unknown RDB integer encoding type");
    }
    if (encode)
        return createStringObjectFromLongLong(val);
    else
        return createObject(REDIS_STRING,sdsfromlonglong(val));
}

152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
/* String objects in the form "2391" "-100" without any space and with a
 * range of values that can fit in an 8, 16 or 32 bit signed value can be
 * encoded as integers to save space */
int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
    long long value;
    char *endptr, buf[32];

    /* Check if it's possible to encode this value as a number */
    value = strtoll(s, &endptr, 10);
    if (endptr[0] != '\0') return 0;
    ll2string(buf,32,value);

    /* If the number converted back into a string is not identical
     * then it's not possible to encode the string as integer */
    if (strlen(buf) != len || memcmp(buf,s,len)) return 0;

    return rdbEncodeInteger(value,enc);
}

171
int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
172 173
    size_t comprlen, outlen;
    unsigned char byte;
174
    int n, nwritten = 0;
175 176 177 178 179 180 181 182 183 184 185 186 187
    void *out;

    /* We require at least four bytes compression for this to be worth it */
    if (len <= 4) return 0;
    outlen = len-4;
    if ((out = zmalloc(outlen+1)) == NULL) return 0;
    comprlen = lzf_compress(s, len, out, outlen);
    if (comprlen == 0) {
        zfree(out);
        return 0;
    }
    /* Data compressed! Let's save it on disk */
    byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
188
    if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
189
    nwritten += n;
190

191
    if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
192 193
    nwritten += n;

194
    if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
195 196
    nwritten += n;

197
    if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
198
    nwritten += n;
199

200
    zfree(out);
201
    return nwritten;
202 203 204 205 206 207

writeerr:
    zfree(out);
    return -1;
}

208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
robj *rdbLoadLzfStringObject(rio *rdb) {
    unsigned int len, clen;
    unsigned char *c = NULL;
    sds val = NULL;

    if ((clen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
    if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
    if ((c = zmalloc(clen)) == NULL) goto err;
    if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
    if (rioRead(rdb,c,clen) == 0) goto err;
    if (lzf_decompress(c,clen,val,len) == 0) goto err;
    zfree(c);
    return createObject(REDIS_STRING,val);
err:
    zfree(c);
    sdsfree(val);
    return NULL;
}

227
/* Save a string objet as [len][data] on disk. If the object is a string
228
 * representation of an integer value we try to save it in a special form */
229
int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
230
    int enclen;
231
    int n, nwritten = 0;
232 233 234 235 236

    /* Try integer encoding */
    if (len <= 11) {
        unsigned char buf[5];
        if ((enclen = rdbTryIntegerEncoding((char*)s,len,buf)) > 0) {
237
            if (rdbWriteRaw(rdb,buf,enclen) == -1) return -1;
238
            return enclen;
239 240 241 242 243 244
        }
    }

    /* Try LZF compression - under 20 bytes it's unable to compress even
     * aaaaaaaaaaaaaaaaaa so skip it */
    if (server.rdbcompression && len > 20) {
245
        n = rdbSaveLzfStringObject(rdb,s,len);
246 247 248
        if (n == -1) return -1;
        if (n > 0) return n;
        /* Return value of 0 means data can't be compressed, save the old way */
249 250 251
    }

    /* Store verbatim */
252
    if ((n = rdbSaveLen(rdb,len)) == -1) return -1;
253 254
    nwritten += n;
    if (len > 0) {
255
        if (rdbWriteRaw(rdb,s,len) == -1) return -1;
256 257 258
        nwritten += len;
    }
    return nwritten;
259 260 261
}

/* Save a long long value as either an encoded string or a string. */
262
int rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
263
    unsigned char buf[32];
264
    int n, nwritten = 0;
265 266
    int enclen = rdbEncodeInteger(value,buf);
    if (enclen > 0) {
267
        return rdbWriteRaw(rdb,buf,enclen);
268 269 270 271
    } else {
        /* Encode as string */
        enclen = ll2string((char*)buf,32,value);
        redisAssert(enclen < 32);
272
        if ((n = rdbSaveLen(rdb,enclen)) == -1) return -1;
273
        nwritten += n;
274
        if ((n = rdbWriteRaw(rdb,buf,enclen)) == -1) return -1;
275
        nwritten += n;
276
    }
277
    return nwritten;
278 279 280
}

/* Like rdbSaveStringObjectRaw() but handle encoded objects */
281
int rdbSaveStringObject(rio *rdb, robj *obj) {
282 283 284
    /* Avoid to decode the object, then encode it again, if the
     * object is alrady integer encoded. */
    if (obj->encoding == REDIS_ENCODING_INT) {
285
        return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
286 287
    } else {
        redisAssert(obj->encoding == REDIS_ENCODING_RAW);
288
        return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
289 290 291
    }
}

292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
robj *rdbGenericLoadStringObject(rio *rdb, int encode) {
    int isencoded;
    uint32_t len;
    sds val;

    len = rdbLoadLen(rdb,&isencoded);
    if (isencoded) {
        switch(len) {
        case REDIS_RDB_ENC_INT8:
        case REDIS_RDB_ENC_INT16:
        case REDIS_RDB_ENC_INT32:
            return rdbLoadIntegerObject(rdb,len,encode);
        case REDIS_RDB_ENC_LZF:
            return rdbLoadLzfStringObject(rdb);
        default:
            redisPanic("Unknown RDB encoding type");
        }
    }

    if (len == REDIS_RDB_LENERR) return NULL;
    val = sdsnewlen(NULL,len);
    if (len && rioRead(rdb,val,len) == 0) {
        sdsfree(val);
        return NULL;
    }
    return createObject(REDIS_STRING,val);
}

robj *rdbLoadStringObject(rio *rdb) {
    return rdbGenericLoadStringObject(rdb,0);
}

robj *rdbLoadEncodedStringObject(rio *rdb) {
    return rdbGenericLoadStringObject(rdb,1);
}

328 329 330 331 332 333 334 335
/* Save a double value. Doubles are saved as strings prefixed by an unsigned
 * 8 bit integer specifing the length of the representation.
 * This 8 bit integer has special values in order to specify the following
 * conditions:
 * 253: not a number
 * 254: + inf
 * 255: - inf
 */
336
int rdbSaveDoubleValue(rio *rdb, double val) {
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
    unsigned char buf[128];
    int len;

    if (isnan(val)) {
        buf[0] = 253;
        len = 1;
    } else if (!isfinite(val)) {
        len = 1;
        buf[0] = (val < 0) ? 255 : 254;
    } else {
#if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
        /* Check if the float is in a safe range to be casted into a
         * long long. We are assuming that long long is 64 bit here.
         * Also we are assuming that there are no implementations around where
         * double has precision < 52 bit.
         *
         * Under this assumptions we test if a double is inside an interval
         * where casting to long long is safe. Then using two castings we
         * make sure the decimal part is zero. If all this is true we use
         * integer printing function that is much faster. */
        double min = -4503599627370495; /* (2^52)-1 */
        double max = 4503599627370496; /* -(2^52) */
        if (val > min && val < max && val == ((double)((long long)val)))
            ll2string((char*)buf+1,sizeof(buf),(long long)val);
        else
#endif
            snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
        buf[0] = strlen((char*)buf+1);
        len = buf[0]+1;
    }
367
    return rdbWriteRaw(rdb,buf,len);
368 369
}

370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
/* For information about double serialization check rdbSaveDoubleValue() */
int rdbLoadDoubleValue(rio *rdb, double *val) {
    char buf[128];
    unsigned char len;

    if (rioRead(rdb,&len,1) == 0) return -1;
    switch(len) {
    case 255: *val = R_NegInf; return 0;
    case 254: *val = R_PosInf; return 0;
    case 253: *val = R_Nan; return 0;
    default:
        if (rioRead(rdb,buf,len) == 0) return -1;
        buf[len] = '\0';
        sscanf(buf, "%lg", val);
        return 0;
    }
}

/* Save the object type of object "o". */
int rdbSaveObjectType(rio *rdb, robj *o) {
    switch (o->type) {
    case REDIS_STRING:
        return rdbSaveType(rdb,REDIS_RDB_TYPE_STRING);
    case REDIS_LIST:
        if (o->encoding == REDIS_ENCODING_ZIPLIST)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_ZIPLIST);
        else if (o->encoding == REDIS_ENCODING_LINKEDLIST)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST);
        else
            redisPanic("Unknown list encoding");
    case REDIS_SET:
        if (o->encoding == REDIS_ENCODING_INTSET)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_SET_INTSET);
        else if (o->encoding == REDIS_ENCODING_HT)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_SET);
        else
            redisPanic("Unknown set encoding");
    case REDIS_ZSET:
        if (o->encoding == REDIS_ENCODING_ZIPLIST)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_ZSET_ZIPLIST);
        else if (o->encoding == REDIS_ENCODING_SKIPLIST)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_ZSET);
        else
            redisPanic("Unknown sorted set encoding");
    case REDIS_HASH:
        if (o->encoding == REDIS_ENCODING_ZIPMAP)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPMAP);
        else if (o->encoding == REDIS_ENCODING_HT)
            return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH);
        else
            redisPanic("Unknown hash encoding");
    default:
        redisPanic("Unknown object type");
    }
    return -1; /* avoid warning */
}

/* Load object type. Return -1 when the byte doesn't contain an object type. */
int rdbLoadObjectType(rio *rdb) {
    int type;
    if ((type = rdbLoadType(rdb)) == -1) return -1;
    if (!rdbIsObjectType(type)) return -1;
    return type;
433 434
}

A
antirez 已提交
435
/* Save a Redis object. Returns -1 on error, 0 on success. */
436
int rdbSaveObject(rio *rdb, robj *o) {
437 438
    int n, nwritten = 0;

439 440
    if (o->type == REDIS_STRING) {
        /* Save a string value */
441
        if ((n = rdbSaveStringObject(rdb,o)) == -1) return -1;
442
        nwritten += n;
443 444 445
    } else if (o->type == REDIS_LIST) {
        /* Save a list value */
        if (o->encoding == REDIS_ENCODING_ZIPLIST) {
446
            size_t l = ziplistBlobLen((unsigned char*)o->ptr);
447

448
            if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
449
            nwritten += n;
450 451 452 453 454
        } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
            list *list = o->ptr;
            listIter li;
            listNode *ln;

455
            if ((n = rdbSaveLen(rdb,listLength(list))) == -1) return -1;
456 457
            nwritten += n;

458 459 460
            listRewind(list,&li);
            while((ln = listNext(&li))) {
                robj *eleobj = listNodeValue(ln);
461
                if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
462
                nwritten += n;
463 464 465 466 467 468
            }
        } else {
            redisPanic("Unknown list encoding");
        }
    } else if (o->type == REDIS_SET) {
        /* Save a set value */
469 470 471 472
        if (o->encoding == REDIS_ENCODING_HT) {
            dict *set = o->ptr;
            dictIterator *di = dictGetIterator(set);
            dictEntry *de;
473

474
            if ((n = rdbSaveLen(rdb,dictSize(set))) == -1) return -1;
475 476
            nwritten += n;

477 478
            while((de = dictNext(di)) != NULL) {
                robj *eleobj = dictGetEntryKey(de);
479
                if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
480
                nwritten += n;
481 482 483
            }
            dictReleaseIterator(di);
        } else if (o->encoding == REDIS_ENCODING_INTSET) {
484
            size_t l = intsetBlobLen((intset*)o->ptr);
485

486
            if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
487
            nwritten += n;
488 489
        } else {
            redisPanic("Unknown set encoding");
490 491
        }
    } else if (o->type == REDIS_ZSET) {
492 493 494
        /* Save a sorted set value */
        if (o->encoding == REDIS_ENCODING_ZIPLIST) {
            size_t l = ziplistBlobLen((unsigned char*)o->ptr);
495

496
            if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
497
            nwritten += n;
498
        } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
499 500 501 502
            zset *zs = o->ptr;
            dictIterator *di = dictGetIterator(zs->dict);
            dictEntry *de;

503
            if ((n = rdbSaveLen(rdb,dictSize(zs->dict))) == -1) return -1;
504
            nwritten += n;
505 506 507 508 509

            while((de = dictNext(di)) != NULL) {
                robj *eleobj = dictGetEntryKey(de);
                double *score = dictGetEntryVal(de);

510
                if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
511
                nwritten += n;
512
                if ((n = rdbSaveDoubleValue(rdb,*score)) == -1) return -1;
513 514 515 516
                nwritten += n;
            }
            dictReleaseIterator(di);
        } else {
P
Typo  
Pieter Noordhuis 已提交
517
            redisPanic("Unknown sorted set encoding");
518 519 520 521
        }
    } else if (o->type == REDIS_HASH) {
        /* Save a hash value */
        if (o->encoding == REDIS_ENCODING_ZIPMAP) {
522
            size_t l = zipmapBlobLen((unsigned char*)o->ptr);
523

524
            if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
525
            nwritten += n;
526 527 528 529
        } else {
            dictIterator *di = dictGetIterator(o->ptr);
            dictEntry *de;

530
            if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) return -1;
531 532
            nwritten += n;

533 534 535 536
            while((de = dictNext(di)) != NULL) {
                robj *key = dictGetEntryKey(de);
                robj *val = dictGetEntryVal(de);

537
                if ((n = rdbSaveStringObject(rdb,key)) == -1) return -1;
538
                nwritten += n;
539
                if ((n = rdbSaveStringObject(rdb,val)) == -1) return -1;
540
                nwritten += n;
541 542 543 544 545 546
            }
            dictReleaseIterator(di);
        }
    } else {
        redisPanic("Unknown object type");
    }
547
    return nwritten;
548 549 550 551 552 553
}

/* Return the length the object will have on disk if saved with
 * the rdbSaveObject() function. Currently we use a trick to get
 * this length with very little changes to the code. In the future
 * we could switch to a faster solution. */
554 555 556 557
off_t rdbSavedObjectLen(robj *o) {
    int len = rdbSaveObject(NULL,o);
    redisAssert(len != -1);
    return len;
558 559
}

560 561 562 563
/* Save a key-value pair, with expire time, type, key, value.
 * On error -1 is returned.
 * On success if the key was actaully saved 1 is returned, otherwise 0
 * is returned (the key was already expired). */
564
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
565
                        time_t expiretime, time_t now)
566 567 568 569 570
{
    /* Save the expire time */
    if (expiretime != -1) {
        /* If this key is already expired skip it */
        if (expiretime < now) return 0;
571
        if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EXPIRETIME) == -1) return -1;
572
        if (rdbSaveTime(rdb,expiretime) == -1) return -1;
573
    }
574

575
    /* Save type, key, value */
576
    if (rdbSaveObjectType(rdb,val) == -1) return -1;
577 578
    if (rdbSaveStringObject(rdb,key) == -1) return -1;
    if (rdbSaveObject(rdb,val) == -1) return -1;
579 580 581
    return 1;
}

582 583 584 585 586 587 588
/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
int rdbSave(char *filename) {
    dictIterator *di = NULL;
    dictEntry *de;
    char tmpfile[256];
    int j;
    time_t now = time(NULL);
589 590
    FILE *fp;
    rio rdb;
591 592 593 594

    snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
    fp = fopen(tmpfile,"w");
    if (!fp) {
A
antirez 已提交
595 596
        redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s",
            strerror(errno));
597 598
        return REDIS_ERR;
    }
599

600
    rioInitWithFile(&rdb,fp);
601 602
    if (rdbWriteRaw(&rdb,"REDIS0002",9) == -1) goto werr;

603 604 605 606
    for (j = 0; j < server.dbnum; j++) {
        redisDb *db = server.db+j;
        dict *d = db->dict;
        if (dictSize(d) == 0) continue;
607
        di = dictGetSafeIterator(d);
608 609 610 611 612 613
        if (!di) {
            fclose(fp);
            return REDIS_ERR;
        }

        /* Write the SELECT DB opcode */
614
        if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
615
        if (rdbSaveLen(&rdb,j) == -1) goto werr;
616 617 618 619 620

        /* Iterate this DB writing every entry */
        while((de = dictNext(di)) != NULL) {
            sds keystr = dictGetEntryKey(de);
            robj key, *o = dictGetEntryVal(de);
621
            time_t expire;
622 623
            
            initStaticStringObject(key,keystr);
624
            expire = getExpire(db,&key);
625
            if (rdbSaveKeyValuePair(&rdb,&key,o,expire,now) == -1) goto werr;
626 627 628 629
        }
        dictReleaseIterator(di);
    }
    /* EOF opcode */
630
    if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr;
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658

    /* Make sure data will not remain on the OS's output buffers */
    fflush(fp);
    fsync(fileno(fp));
    fclose(fp);

    /* Use RENAME to make sure the DB file is changed atomically only
     * if the generate DB file is ok. */
    if (rename(tmpfile,filename) == -1) {
        redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
        unlink(tmpfile);
        return REDIS_ERR;
    }
    redisLog(REDIS_NOTICE,"DB saved on disk");
    server.dirty = 0;
    server.lastsave = time(NULL);
    return REDIS_OK;

werr:
    fclose(fp);
    unlink(tmpfile);
    redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
    if (di) dictReleaseIterator(di);
    return REDIS_ERR;
}

int rdbSaveBackground(char *filename) {
    pid_t childpid;
659
    long long start;
660

A
antirez 已提交
661
    if (server.bgsavechildpid != -1) return REDIS_ERR;
A
antirez 已提交
662

663
    server.dirty_before_bgsave = server.dirty;
A
antirez 已提交
664

665
    start = ustime();
666
    if ((childpid = fork()) == 0) {
A
antirez 已提交
667 668
        int retval;

669
        /* Child */
670 671
        if (server.ipfd > 0) close(server.ipfd);
        if (server.sofd > 0) close(server.sofd);
672
        retval = rdbSave(filename);
A
antirez 已提交
673
        _exit((retval == REDIS_OK) ? 0 : 1);
674 675
    } else {
        /* Parent */
676
        server.stat_fork_time = ustime()-start;
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
        if (childpid == -1) {
            redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
                strerror(errno));
            return REDIS_ERR;
        }
        redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
        server.bgsavechildpid = childpid;
        updateDictResizePolicy();
        return REDIS_OK;
    }
    return REDIS_OK; /* unreached */
}

void rdbRemoveTempFile(pid_t childpid) {
    char tmpfile[256];

    snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
    unlink(tmpfile);
}

/* Load a Redis object of the specified type from the specified file.
 * On success a newly allocated object is returned, otherwise NULL. */
699
robj *rdbLoadObject(int rdbtype, rio *rdb) {
700 701
    robj *o, *ele, *dec;
    size_t len;
702
    unsigned int i;
703

704 705
    redisLog(REDIS_DEBUG,"LOADING OBJECT %d (at %d)\n",rdbtype,rdb->tell(rdb));
    if (rdbtype == REDIS_RDB_TYPE_STRING) {
706
        /* Read string value */
707
        if ((o = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
708
        o = tryObjectEncoding(o);
709
    } else if (rdbtype == REDIS_RDB_TYPE_LIST) {
710
        /* Read list value */
711
        if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
712 713 714 715 716 717 718 719 720 721

        /* Use a real list when there are too many entries */
        if (len > server.list_max_ziplist_entries) {
            o = createListObject();
        } else {
            o = createZiplistObject();
        }

        /* Load every single element of the list */
        while(len--) {
722
            if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740

            /* If we are using a ziplist and the value is too big, convert
             * the object to a real list. */
            if (o->encoding == REDIS_ENCODING_ZIPLIST &&
                ele->encoding == REDIS_ENCODING_RAW &&
                sdslen(ele->ptr) > server.list_max_ziplist_value)
                    listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);

            if (o->encoding == REDIS_ENCODING_ZIPLIST) {
                dec = getDecodedObject(ele);
                o->ptr = ziplistPush(o->ptr,dec->ptr,sdslen(dec->ptr),REDIS_TAIL);
                decrRefCount(dec);
                decrRefCount(ele);
            } else {
                ele = tryObjectEncoding(ele);
                listAddNodeTail(o->ptr,ele);
            }
        }
741
    } else if (rdbtype == REDIS_RDB_TYPE_SET) {
742
        /* Read list/set value */
743
        if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
744 745 746 747 748 749 750 751 752 753 754 755

        /* Use a regular set when there are too many entries. */
        if (len > server.set_max_intset_entries) {
            o = createSetObject();
            /* It's faster to expand the dict to the right size asap in order
             * to avoid rehashing */
            if (len > DICT_HT_INITIAL_SIZE)
                dictExpand(o->ptr,len);
        } else {
            o = createIntsetObject();
        }

756
        /* Load every single element of the list/set */
757 758
        for (i = 0; i < len; i++) {
            long long llval;
759
            if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
760
            ele = tryObjectEncoding(ele);
761 762 763

            if (o->encoding == REDIS_ENCODING_INTSET) {
                /* Fetch integer value from element */
A
antirez 已提交
764
                if (isObjectRepresentableAsLongLong(ele,&llval) == REDIS_OK) {
765 766 767 768 769 770 771 772 773 774 775
                    o->ptr = intsetAdd(o->ptr,llval,NULL);
                } else {
                    setTypeConvert(o,REDIS_ENCODING_HT);
                    dictExpand(o->ptr,len);
                }
            }

            /* This will also be called when the set was just converted
             * to regular hashtable encoded set */
            if (o->encoding == REDIS_ENCODING_HT) {
                dictAdd((dict*)o->ptr,ele,NULL);
776 777
            } else {
                decrRefCount(ele);
778
            }
779
        }
780
    } else if (rdbtype == REDIS_RDB_TYPE_ZSET) {
781 782
        /* Read list/set value */
        size_t zsetlen;
783
        size_t maxelelen = 0;
784 785
        zset *zs;

786
        if ((zsetlen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
787 788
        o = createZsetObject();
        zs = o->ptr;
789

790 791 792
        /* Load every single element of the list/set */
        while(zsetlen--) {
            robj *ele;
793 794
            double score;
            zskiplistNode *znode;
795

796
            if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
797
            ele = tryObjectEncoding(ele);
798
            if (rdbLoadDoubleValue(rdb,&score) == -1) return NULL;
799 800 801 802 803 804

            /* Don't care about integer-encoded strings. */
            if (ele->encoding == REDIS_ENCODING_RAW &&
                sdslen(ele->ptr) > maxelelen)
                    maxelelen = sdslen(ele->ptr);

805 806
            znode = zslInsert(zs->zsl,score,ele);
            dictAdd(zs->dict,ele,&znode->score);
807 808
            incrRefCount(ele); /* added to skiplist */
        }
809 810 811 812 813

        /* Convert *after* loading, since sorted sets are not stored ordered. */
        if (zsetLength(o) <= server.zset_max_ziplist_entries &&
            maxelelen <= server.zset_max_ziplist_value)
                zsetConvert(o,REDIS_ENCODING_ZIPLIST);
814
    } else if (rdbtype == REDIS_RDB_TYPE_HASH) {
815 816
        size_t hashlen;

817
        if ((hashlen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
818 819 820 821 822 823 824 825 826
        o = createHashObject();
        /* Too many entries? Use an hash table. */
        if (hashlen > server.hash_max_zipmap_entries)
            convertToRealHash(o);
        /* Load every key/value, then set it into the zipmap or hash
         * table, as needed. */
        while(hashlen--) {
            robj *key, *val;

827 828
            if ((key = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
            if ((val = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
            /* If we are using a zipmap and there are too big values
             * the object is converted to real hash table encoding. */
            if (o->encoding != REDIS_ENCODING_HT &&
               ((key->encoding == REDIS_ENCODING_RAW &&
                sdslen(key->ptr) > server.hash_max_zipmap_value) ||
                (val->encoding == REDIS_ENCODING_RAW &&
                sdslen(val->ptr) > server.hash_max_zipmap_value)))
            {
                    convertToRealHash(o);
            }

            if (o->encoding == REDIS_ENCODING_ZIPMAP) {
                unsigned char *zm = o->ptr;
                robj *deckey, *decval;

                /* We need raw string objects to add them to the zipmap */
                deckey = getDecodedObject(key);
                decval = getDecodedObject(val);
                zm = zipmapSet(zm,deckey->ptr,sdslen(deckey->ptr),
                                  decval->ptr,sdslen(decval->ptr),NULL);
                o->ptr = zm;
                decrRefCount(deckey);
                decrRefCount(decval);
                decrRefCount(key);
                decrRefCount(val);
            } else {
                key = tryObjectEncoding(key);
                val = tryObjectEncoding(val);
                dictAdd((dict*)o->ptr,key,val);
            }
        }
860 861 862 863
    } else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP  ||
               rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST ||
               rdbtype == REDIS_RDB_TYPE_SET_INTSET   ||
               rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST)
864
    {
865
        robj *aux = rdbLoadStringObject(rdb);
866 867

        if (aux == NULL) return NULL;
868
        o = createObject(REDIS_STRING,NULL); /* string is just placeholder */
869 870 871
        o->ptr = zmalloc(sdslen(aux->ptr));
        memcpy(o->ptr,aux->ptr,sdslen(aux->ptr));
        decrRefCount(aux);
872 873 874 875 876 877 878

        /* Fix the object encoding, and make sure to convert the encoded
         * data type into the base type if accordingly to the current
         * configuration there are too many elements in the encoded data
         * type. Note that we only check the length and not max element
         * size as this is an O(N) scan. Eventually everything will get
         * converted. */
879 880
        switch(rdbtype) {
            case REDIS_RDB_TYPE_HASH_ZIPMAP:
881 882 883 884 885
                o->type = REDIS_HASH;
                o->encoding = REDIS_ENCODING_ZIPMAP;
                if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries)
                    convertToRealHash(o);
                break;
886
            case REDIS_RDB_TYPE_LIST_ZIPLIST:
887 888 889 890 891
                o->type = REDIS_LIST;
                o->encoding = REDIS_ENCODING_ZIPLIST;
                if (ziplistLen(o->ptr) > server.list_max_ziplist_entries)
                    listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
                break;
892
            case REDIS_RDB_TYPE_SET_INTSET:
893 894 895 896 897
                o->type = REDIS_SET;
                o->encoding = REDIS_ENCODING_INTSET;
                if (intsetLen(o->ptr) > server.set_max_intset_entries)
                    setTypeConvert(o,REDIS_ENCODING_HT);
                break;
898
            case REDIS_RDB_TYPE_ZSET_ZIPLIST:
899 900
                o->type = REDIS_ZSET;
                o->encoding = REDIS_ENCODING_ZIPLIST;
901
                if (zsetLength(o) > server.zset_max_ziplist_entries)
902
                    zsetConvert(o,REDIS_ENCODING_SKIPLIST);
903
                break;
904
            default:
905
                redisPanic("Unknown encoding");
906
                break;
907
        }
908 909 910 911 912 913
    } else {
        redisPanic("Unknown object type");
    }
    return o;
}

914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
/* Mark that we are loading in the global state and setup the fields
 * needed to provide loading stats. */
void startLoading(FILE *fp) {
    struct stat sb;

    /* Load the DB */
    server.loading = 1;
    server.loading_start_time = time(NULL);
    if (fstat(fileno(fp), &sb) == -1) {
        server.loading_total_bytes = 1; /* just to avoid division by zero */
    } else {
        server.loading_total_bytes = sb.st_size;
    }
}

/* Refresh the loading progress info */
void loadingProgress(off_t pos) {
    server.loading_loaded_bytes = pos;
}

/* Loading finished */
void stopLoading(void) {
    server.loading = 0;
}

939 940
int rdbLoad(char *filename) {
    uint32_t dbid;
941
    int type, rdbver;
942 943 944
    redisDb *db = server.db+0;
    char buf[1024];
    time_t expiretime, now = time(NULL);
945
    long loops = 0;
946 947
    FILE *fp;
    rio rdb;
948 949 950

    fp = fopen(filename,"r");
    if (!fp) return REDIS_ERR;
951
    rioInitWithFile(&rdb,fp);
P
Pieter Noordhuis 已提交
952
    if (rioRead(&rdb,buf,9) == 0) goto eoferr;
953 954 955 956 957 958 959
    buf[9] = '\0';
    if (memcmp(buf,"REDIS",5) != 0) {
        fclose(fp);
        redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
        return REDIS_ERR;
    }
    rdbver = atoi(buf+5);
960
    if (rdbver < 1 || rdbver > 2) {
961 962 963 964
        fclose(fp);
        redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
        return REDIS_ERR;
    }
965 966

    startLoading(fp);
967 968 969
    while(1) {
        robj *key, *val;
        expiretime = -1;
970 971 972

        /* Serve the clients from time to time */
        if (!(loops++ % 1000)) {
973
            loadingProgress(rdb.tell(&rdb));
974 975 976
            aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
        }

977
        /* Read type. */
978
        if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
979
        if (type == REDIS_RDB_OPCODE_EXPIRETIME) {
980
            if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
981
            /* We read the time so we need to read the object type again. */
982
            if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
983
        }
984 985 986 987

        if (type == REDIS_RDB_OPCODE_EOF)
            break;

988
        /* Handle SELECT DB opcode as a special case */
989
        if (type == REDIS_RDB_OPCODE_SELECTDB) {
990
            if ((dbid = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
991 992 993 994 995 996 997 998 999
                goto eoferr;
            if (dbid >= (unsigned)server.dbnum) {
                redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
                exit(1);
            }
            db = server.db+dbid;
            continue;
        }
        /* Read key */
1000
        if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
1001
        /* Read value */
1002
        if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
1003 1004 1005 1006 1007 1008 1009
        /* Check if the key already expired */
        if (expiretime != -1 && expiretime < now) {
            decrRefCount(key);
            decrRefCount(val);
            continue;
        }
        /* Add the new object in the hash table */
1010 1011
        dbAdd(db,key,val);

1012 1013 1014 1015 1016 1017
        /* Set the expire time if needed */
        if (expiretime != -1) setExpire(db,key,expiretime);

        decrRefCount(key);
    }
    fclose(fp);
1018
    stopLoading();
1019 1020 1021 1022 1023 1024 1025 1026 1027
    return REDIS_OK;

eoferr: /* unexpected end of file is handled here with a fatal exit */
    redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
    exit(1);
    return REDIS_ERR; /* Just to avoid warning */
}

/* A background saving child (BGSAVE) terminated its work. Handle this. */
1028
void backgroundSaveDoneHandler(int exitcode, int bysignal) {
1029 1030 1031
    if (!bysignal && exitcode == 0) {
        redisLog(REDIS_NOTICE,
            "Background saving terminated with success");
1032
        server.dirty = server.dirty - server.dirty_before_bgsave;
1033 1034 1035 1036 1037
        server.lastsave = time(NULL);
    } else if (!bysignal && exitcode != 0) {
        redisLog(REDIS_WARNING, "Background saving error");
    } else {
        redisLog(REDIS_WARNING,
1038
            "Background saving terminated by signal %d", bysignal);
1039 1040 1041 1042 1043 1044 1045
        rdbRemoveTempFile(server.bgsavechildpid);
    }
    server.bgsavechildpid = -1;
    /* Possibly there are slaves waiting for a BGSAVE in order to be served
     * (the first stage of SYNC is a bulk transfer of dump.rdb) */
    updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
}
1046 1047

void saveCommand(redisClient *c) {
A
antirez 已提交
1048
    if (server.bgsavechildpid != -1) {
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
        addReplyError(c,"Background save already in progress");
        return;
    }
    if (rdbSave(server.dbfilename) == REDIS_OK) {
        addReply(c,shared.ok);
    } else {
        addReply(c,shared.err);
    }
}

void bgsaveCommand(redisClient *c) {
A
antirez 已提交
1060
    if (server.bgsavechildpid != -1) {
1061
        addReplyError(c,"Background save already in progress");
1062 1063
    } else if (server.bgrewritechildpid != -1) {
        addReplyError(c,"Can't BGSAVE while AOF log rewriting is in progress");
A
antirez 已提交
1064
    } else if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
1065 1066 1067 1068 1069
        addReplyStatus(c,"Background saving started");
    } else {
        addReply(c,shared.err);
    }
}