提交 d0b58d53 编写于 作者: P Pieter Noordhuis

intset encoding for sets, refactored set tests to test both encodings

上级 e24d9376
......@@ -15,7 +15,7 @@ endif
CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF)
DEBUG?= -g -rdynamic -ggdb
OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o
OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o intset.o
BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o
CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o
CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o
......@@ -54,6 +54,7 @@ redis.o: redis.c fmacros.h config.h redis.h ae.h sds.h anet.h dict.h \
sds.o: sds.c sds.h zmalloc.h
zipmap.o: zipmap.c zmalloc.h
ziplist.o: ziplist.c zmalloc.h
intset.o: intset.c zmalloc.h
zmalloc.o: zmalloc.c config.h
redis-server: $(OBJ)
......
......@@ -60,8 +60,8 @@ static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t
* the value is not present in the intset and sets "pos" to the position
* where "value" can be inserted. */
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
int min = 0, max = is->length-1, mid;
int64_t cur;
int min = 0, max = is->length-1, mid = -1;
int64_t cur = -1;
/* The value can never be found when the set is empty */
if (is->length == 0) {
......@@ -179,6 +179,21 @@ int64_t intsetRandom(intset *is) {
return INTSET_GET(is,rand()%is->length);
}
/* Sets the value to the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
if (pos < is->length) {
*value = INTSET_GET(is,pos);
return 1;
}
return 0;
}
/* Return intset length */
uint32_t intsetLen(intset *is) {
return is->length;
}
#ifdef INTSET_TEST_MAIN
#include <sys/time.h>
......
......@@ -13,5 +13,7 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
intset *intsetRemove(intset *is, int64_t value, uint8_t *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
uint32_t intsetLen(intset *is);
#endif // __INTSET_H
......@@ -76,6 +76,7 @@
#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
#include "zipmap.h" /* Compact dictionary-alike data structure */
#include "ziplist.h" /* Compact list data structure */
#include "intset.h" /* Compact integer set structure */
#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
#include "release.h" /* Release and/or git repository information */
......@@ -132,9 +133,10 @@
#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
#define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */
#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
static char* strencoding[] = {
"raw", "int", "hashtable", "zipmap", "list", "ziplist"
"raw", "int", "hashtable", "zipmap", "list", "ziplist", "intset"
};
/* Object types only used for dumping to disk */
......@@ -651,6 +653,7 @@ static void call(redisClient *c, struct redisCommand *cmd);
static void resetClient(redisClient *c);
static void convertToRealHash(robj *o);
static void listTypeConvert(robj *o, int enc);
static void setTypeConvert(robj *o, int enc);
static int pubsubUnsubscribeAllChannels(redisClient *c, int notify);
static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
static void freePubsubPattern(void *p);
......@@ -3069,6 +3072,13 @@ static robj *createSetObject(void) {
return o;
}
static robj *createIntsetObject(void) {
intset *is = intsetNew();
robj *o = createObject(REDIS_SET,is);
o->encoding = REDIS_ENCODING_INTSET;
return o;
}
static robj *createHashObject(void) {
/* All the Hashes start as zipmaps. Will be automatically converted
* into hash tables if there are enough elements or big elements
......@@ -3107,7 +3117,16 @@ static void freeListObject(robj *o) {
}
static void freeSetObject(robj *o) {
switch (o->encoding) {
case REDIS_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
case REDIS_ENCODING_INTSET:
zfree(o->ptr);
break;
default:
redisPanic("Unknown set encoding type");
}
}
static void freeZsetObject(robj *o) {
......@@ -3371,7 +3390,7 @@ static int getLongLongFromObject(robj *o, long long *target) {
}
}
*target = value;
if (target) *target = value;
return REDIS_OK;
}
......@@ -3789,6 +3808,7 @@ static int rdbSaveObject(FILE *fp, robj *o) {
}
} else if (o->type == REDIS_SET) {
/* Save a set value */
if (o->encoding == REDIS_ENCODING_HT) {
dict *set = o->ptr;
dictIterator *di = dictGetIterator(set);
dictEntry *de;
......@@ -3796,10 +3816,21 @@ static int rdbSaveObject(FILE *fp, robj *o) {
if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
while((de = dictNext(di)) != NULL) {
robj *eleobj = dictGetEntryKey(de);
if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
}
dictReleaseIterator(di);
} else if (o->encoding == REDIS_ENCODING_INTSET) {
intset *is = o->ptr;
long long llval;
int i = 0;
if (rdbSaveLen(fp,intsetLen(is)) == -1) return -1;
while(intsetGet(is,i++,&llval)) {
if (rdbSaveLongLongAsStringObject(fp,llval) == -1) return -1;
}
} else {
redisPanic("Unknown set encoding");
}
} else if (o->type == REDIS_ZSET) {
/* Save a set value */
zset *zs = o->ptr;
......@@ -5459,12 +5490,37 @@ static void rpoplpushcommand(redisClient *c) {
/* ==================================== Sets ================================ */
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */
static robj *setTypeCreate(robj *value) {
if (getLongLongFromObject(value,NULL) == REDIS_OK)
return createIntsetObject();
return createSetObject();
}
static int setTypeAdd(robj *subject, robj *value) {
long long llval;
if (subject->encoding == REDIS_ENCODING_HT) {
if (dictAdd(subject->ptr,value,NULL) == DICT_OK) {
incrRefCount(value);
return 1;
}
} else if (subject->encoding == REDIS_ENCODING_INTSET) {
if (getLongLongFromObject(value,&llval) == REDIS_OK) {
uint8_t success;
subject->ptr = intsetAdd(subject->ptr,llval,&success);
if (success) return 1;
} else {
/* Failed to get integer from object, convert to regular set. */
setTypeConvert(subject,REDIS_ENCODING_HT);
/* The set *was* an intset and this value is not integer
* encodable, so dictAdd should always work. */
redisAssert(dictAdd(subject->ptr,value,NULL) == DICT_OK);
incrRefCount(value);
return 1;
}
} else {
redisPanic("Unknown set encoding");
}
......@@ -5472,11 +5528,18 @@ static int setTypeAdd(robj *subject, robj *value) {
}
static int setTypeRemove(robj *subject, robj *value) {
long long llval;
if (subject->encoding == REDIS_ENCODING_HT) {
if (dictDelete(subject->ptr,value) == DICT_OK) {
if (htNeedsResize(subject->ptr)) dictResize(subject->ptr);
return 1;
}
} else if (subject->encoding == REDIS_ENCODING_INTSET) {
if (getLongLongFromObject(value,&llval) == REDIS_OK) {
uint8_t success;
subject->ptr = intsetRemove(subject->ptr,llval,&success);
if (success) return 1;
}
} else {
redisPanic("Unknown set encoding");
}
......@@ -5484,24 +5547,35 @@ static int setTypeRemove(robj *subject, robj *value) {
}
static int setTypeIsMember(robj *subject, robj *value) {
long long llval;
if (subject->encoding == REDIS_ENCODING_HT) {
return dictFind((dict*)subject->ptr,value) != NULL;
} else if (subject->encoding == REDIS_ENCODING_INTSET) {
if (getLongLongFromObject(value,&llval) == REDIS_OK) {
return intsetFind((intset*)subject->ptr,llval);
}
} else {
redisPanic("Unknown set encoding");
}
return 0;
}
/* Structure to hold set iteration abstraction. */
typedef struct {
robj *subject;
int encoding;
int ii; /* intset iterator */
dictIterator *di;
} setIterator;
static setIterator *setTypeInitIterator(robj *subject) {
setIterator *si = zmalloc(sizeof(setIterator));
si->subject = subject;
si->encoding = subject->encoding;
if (si->encoding == REDIS_ENCODING_HT) {
si->di = dictGetIterator(subject->ptr);
} else if (si->encoding == REDIS_ENCODING_INTSET) {
si->ii = 0;
} else {
redisPanic("Unknown set encoding");
}
......@@ -5525,6 +5599,10 @@ static robj *setTypeNext(setIterator *si) {
ret = dictGetEntryKey(de);
incrRefCount(ret);
}
} else if (si->encoding == REDIS_ENCODING_INTSET) {
long long llval;
if (intsetGet(si->subject->ptr,si->ii++,&llval))
ret = createStringObjectFromLongLong(llval);
}
return ret;
}
......@@ -5538,6 +5616,9 @@ robj *setTypeRandomElement(robj *subject) {
dictEntry *de = dictGetRandomKey(subject->ptr);
ret = dictGetEntryKey(de);
incrRefCount(ret);
} else if (subject->encoding == REDIS_ENCODING_INTSET) {
long long llval = intsetRandom(subject->ptr);
ret = createStringObjectFromLongLong(llval);
} else {
redisPanic("Unknown set encoding");
}
......@@ -5547,17 +5628,41 @@ robj *setTypeRandomElement(robj *subject) {
static unsigned long setTypeSize(robj *subject) {
if (subject->encoding == REDIS_ENCODING_HT) {
return dictSize((dict*)subject->ptr);
} else if (subject->encoding == REDIS_ENCODING_INTSET) {
return intsetLen((intset*)subject->ptr);
} else {
redisPanic("Unknown set encoding");
}
}
static void setTypeConvert(robj *subject, int enc) {
setIterator *si;
robj *element;
redisAssert(subject->type == REDIS_SET);
if (enc == REDIS_ENCODING_HT) {
dict *d = dictCreate(&setDictType,NULL);
/* setTypeGet returns a robj with incremented refcount */
si = setTypeInitIterator(subject);
while ((element = setTypeNext(si)) != NULL)
redisAssert(dictAdd(d,element,NULL) == DICT_OK);
setTypeReleaseIterator(si);
subject->encoding = REDIS_ENCODING_HT;
zfree(subject->ptr);
subject->ptr = d;
} else {
redisPanic("Unsupported set conversion");
}
}
static void saddCommand(redisClient *c) {
robj *set;
set = lookupKeyWrite(c->db,c->argv[1]);
if (set == NULL) {
set = createSetObject();
set = setTypeCreate(c->argv[2]);
dbAdd(c->db,c->argv[1],set);
} else {
if (set->type != REDIS_SET) {
......@@ -5616,7 +5721,7 @@ static void smoveCommand(redisClient *c) {
server.dirty++;
/* Add the element to the destination set */
if (!dstset) {
dstset = createSetObject();
dstset = setTypeCreate(c->argv[3]);
dbAdd(c->db,c->argv[2],dstset);
}
setTypeAdd(dstset,c->argv[3]);
......@@ -5724,7 +5829,7 @@ static void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long s
} else {
/* If we have a target key where to store the resulting set
* create this key with an empty set inside */
dstset = createSetObject();
dstset = createIntsetObject();
}
/* Iterate all the elements of the first (smallest) set, and test
......@@ -5802,7 +5907,7 @@ static void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
/* We need a temp set object to store our union. If the dstkey
* is not NULL (that is, we are inside an SUNIONSTORE operation) then
* this set object will be the resulting object to set into the target key*/
dstset = createSetObject();
dstset = createIntsetObject();
/* Iterate all the elements of all the sets, add every element a single
* time to the result set */
......
start_server {tags {"set"}} {
test {SADD, SCARD, SISMEMBER, SMEMBERS basics} {
r sadd myset foo
r sadd myset bar
list [r scard myset] [r sismember myset foo] \
[r sismember myset bar] [r sismember myset bla] \
[lsort [r smembers myset]]
} {2 1 1 0 {bar foo}}
test {SADD adding the same element multiple times} {
r sadd myset foo
r sadd myset foo
r sadd myset foo
r scard myset
} {2}
proc create_set {key entries} {
r del $key
foreach entry $entries { r sadd $key $entry }
}
test {SADD, SCARD, SISMEMBER, SMEMBERS basics - regular set} {
create_set myset {foo}
assert_encoding hashtable myset
assert_equal 1 [r sadd myset bar]
assert_equal 0 [r sadd myset bar]
assert_equal 2 [r scard myset]
assert_equal 1 [r sismember myset foo]
assert_equal 1 [r sismember myset bar]
assert_equal 0 [r sismember myset bla]
assert_equal {bar foo} [lsort [r smembers myset]]
}
test {SADD, SCARD, SISMEMBER, SMEMBERS basics - intset} {
create_set myset {17}
assert_encoding intset myset
assert_equal 1 [r sadd myset 16]
assert_equal 0 [r sadd myset 16]
assert_equal 2 [r scard myset]
assert_equal 1 [r sismember myset 16]
assert_equal 1 [r sismember myset 17]
assert_equal 0 [r sismember myset 18]
assert_equal {16 17} [lsort [r smembers myset]]
}
test {SADD against non set} {
r lpush mylist foo
catch {r sadd mylist bar} err
format $err
} {ERR*kind*}
assert_error ERR*kind* {r sadd mylist bar}
}
test {SREM basics} {
r sadd myset ciao
r srem myset foo
lsort [r smembers myset]
} {bar ciao}
test {SREM basics - regular set} {
create_set myset {foo bar ciao}
assert_encoding hashtable myset
assert_equal 0 [r srem myset qux]
assert_equal 1 [r srem myset foo]
assert_equal {bar ciao} [lsort [r smembers myset]]
}
test {SREM basics - intset} {
create_set myset {3 4 5}
assert_encoding intset myset
assert_equal 0 [r srem myset 6]
assert_equal 1 [r srem myset 4]
assert_equal {3 5} [lsort [r smembers myset]]
}
test {Mass SADD and SINTER with two sets} {
foreach {type} {hashtable intset} {
for {set i 1} {$i <= 5} {incr i} {
r del [format "set%d" $i]
}
for {set i 0} {$i < 1000} {incr i} {
r sadd set1 $i
r sadd set2 [expr $i+995]
}
lsort [r sinter set1 set2]
} {995 996 997 998 999}
foreach i {999 995 1000 2000} {
r sadd set3 $i
}
for {set i 5} {$i < 1000} {incr i} {
r sadd set4 $i
}
r sadd set5 0
# it is possible that a hashtable encoded only contains integers,
# because it is converted from an intset to a hashtable when a
# non-integer element is added and then removed.
if {$type eq "hashtable"} {
for {set i 1} {$i <= 5} {incr i} {
r sadd [format "set%d" $i] foo
r srem [format "set%d" $i] foo
}
}
test "Generated sets must be encoded as $type" {
for {set i 1} {$i <= 5} {incr i} {
assert_encoding $type [format "set%d" $i]
}
}
test {SUNION with two sets} {
lsort [r sunion set1 set2]
} [lsort -uniq "[r smembers set1] [r smembers set2]"]
test "SINTER with two sets - $type" {
assert_equal {995 996 997 998 999} [lsort [r sinter set1 set2]]
}
test {SINTERSTORE with two sets} {
test "SINTERSTORE with two sets - $type" {
r sinterstore setres set1 set2
lsort [r smembers setres]
} {995 996 997 998 999}
assert_encoding intset setres
assert_equal {995 996 997 998 999} [lsort [r smembers setres]]
}
test {SINTERSTORE with two sets, after a DEBUG RELOAD} {
test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" {
r debug reload
r sinterstore setres set1 set2
lsort [r smembers setres]
} {995 996 997 998 999}
assert_encoding intset setres
assert_equal {995 996 997 998 999} [lsort [r smembers setres]]
}
test {SUNIONSTORE with two sets} {
test "SUNION with two sets - $type" {
set expected [lsort -uniq "[r smembers set1] [r smembers set2]"]
assert_equal $expected [lsort [r sunion set1 set2]]
}
test "SUNIONSTORE with two sets - $type" {
r sunionstore setres set1 set2
lsort [r smembers setres]
} [lsort -uniq "[r smembers set1] [r smembers set2]"]
assert_encoding intset setres
set expected [lsort -uniq "[r smembers set1] [r smembers set2]"]
assert_equal $expected [lsort [r smembers setres]]
}
test {SUNIONSTORE against non existing keys} {
r set setres xxx
list [r sunionstore setres foo111 bar222] [r exists xxx]
} {0 0}
test {SINTER against three sets} {
r sadd set3 999
r sadd set3 995
r sadd set3 1000
r sadd set3 2000
lsort [r sinter set1 set2 set3]
} {995 999}
test {SINTERSTORE with three sets} {
test "SINTER against three sets - $type" {
assert_equal {995 999} [lsort [r sinter set1 set2 set3]]
}
test "SINTERSTORE with three sets - $type" {
r sinterstore setres set1 set2 set3
lsort [r smembers setres]
} {995 999}
assert_equal {995 999} [r smembers setres]
}
test {SUNION with non existing keys} {
lsort [r sunion nokey1 set1 set2 nokey2]
} [lsort -uniq "[r smembers set1] [r smembers set2]"]
test "SUNION with non existing keys - $type" {
set expected [lsort -uniq "[r smembers set1] [r smembers set2]"]
assert_equal $expected [lsort [r sunion nokey1 set1 set2 nokey2]]
}
test {SDIFF with two sets} {
for {set i 5} {$i < 1000} {incr i} {
r sadd set4 $i
test "SDIFF with two sets - $type" {
assert_equal {0 1 2 3 4} [lsort [r sdiff set1 set4]]
}
lsort [r sdiff set1 set4]
} {0 1 2 3 4}
test {SDIFF with three sets} {
r sadd set5 0
lsort [r sdiff set1 set4 set5]
} {1 2 3 4}
test {SDIFFSTORE with three sets} {
r sdiffstore sres set1 set4 set5
lsort [r smembers sres]
} {1 2 3 4}
test {SPOP basics} {
r del myset
r sadd myset 1
r sadd myset 2
r sadd myset 3
list [lsort [list [r spop myset] [r spop myset] [r spop myset]]] [r scard myset]
} {{1 2 3} 0}
test {SRANDMEMBER} {
r del myset
r sadd myset a
r sadd myset b
r sadd myset c
test "SDIFF with three sets - $type" {
assert_equal {1 2 3 4} [lsort [r sdiff set1 set4 set5]]
}
test "SDIFFSTORE with three sets - $type" {
r sdiffstore setres set1 set4 set5
assert_encoding intset setres
assert_equal {1 2 3 4} [lsort [r smembers setres]]
}
}
test "SINTER against non-set should throw error" {
r set key1 x
assert_error "ERR*wrong kind*" {r sinter key1 noset}
}
test "SUNION against non-set should throw error" {
r set key1 x
assert_error "ERR*wrong kind*" {r sunion key1 noset}
}
test "SINTERSTORE against non existing keys should delete dstkey" {
r set setres xxx
assert_equal 0 [r sinterstore setres foo111 bar222]
assert_equal 0 [r exists setres]
}
test "SUNIONSTORE against non existing keys should delete dstkey" {
r set setres xxx
assert_equal 0 [r sunionstore setres foo111 bar222]
assert_equal 0 [r exists setres]
}
foreach {type contents} {hashtable {a b c} intset {1 2 3}} {
test "SPOP basics - $type" {
create_set myset $contents
assert_encoding $type myset
assert_equal $contents [lsort [list [r spop myset] [r spop myset] [r spop myset]]]
assert_equal 0 [r scard myset]
}
test "SRANDMEMBER - $type" {
create_set myset $contents
unset -nocomplain myset
array set myset {}
for {set i 0} {$i < 100} {incr i} {
set myset([r srandmember myset]) 1
}
lsort [array names myset]
} {a b c}
assert_equal $contents [lsort [array names myset]]
}
}
test {SMOVE basics} {
r sadd myset1 a
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册