#include "redis.h" #include /*----------------------------------------------------------------------------- * Sorted set API *----------------------------------------------------------------------------*/ /* ZSETs are ordered sets using two data structures to hold the same elements * in order to get O(log(N)) INSERT and REMOVE operations into a sorted * data structure. * * The elements are added to an hash table mapping Redis objects to scores. * At the same time the elements are added to a skip list mapping scores * to Redis objects (so objects are sorted by scores in this "view"). */ /* This skiplist implementation is almost a C translation of the original * algorithm described by William Pugh in "Skip Lists: A Probabilistic * Alternative to Balanced Trees", modified in three ways: * a) this implementation allows for repeated values. * b) the comparison is not just by key (our 'score') but by satellite data. * c) there is a back pointer, so it's a doubly linked list with the back * pointers being only at "level 1". This allows to traverse the list * from tail to head, useful for ZREVRANGE. */ zskiplistNode *zslCreateNode(int level, double score, robj *obj) { zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); zn->score = score; zn->obj = obj; return zn; } zskiplist *zslCreate(void) { int j; zskiplist *zsl; zsl = zmalloc(sizeof(*zsl)); zsl->level = 1; zsl->length = 0; zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) { zsl->header->level[j].forward = NULL; zsl->header->level[j].span = 0; } zsl->header->backward = NULL; zsl->tail = NULL; return zsl; } void zslFreeNode(zskiplistNode *node) { decrRefCount(node->obj); zfree(node); } void zslFree(zskiplist *zsl) { zskiplistNode *node = zsl->header->level[0].forward, *next; zfree(zsl->header); while(node) { next = node->level[0].forward; zslFreeNode(node); node = next; } zfree(zsl); } int zslRandomLevel(void) { int level = 1; while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF)) level += 1; return (levelheader; for (i = zsl->level-1; i >= 0; i--) { /* store rank that is crossed to reach the insert position */ rank[i] = i == (zsl->level-1) ? 0 : rank[i+1]; while (x->level[i].forward && (x->level[i].forward->score < score || (x->level[i].forward->score == score && compareStringObjects(x->level[i].forward->obj,obj) < 0))) { rank[i] += x->level[i].span; x = x->level[i].forward; } update[i] = x; } /* we assume the key is not already inside, since we allow duplicated * scores, and the re-insertion of score and redis object should never * happpen since the caller of zslInsert() should test in the hash table * if the element is already inside or not. */ level = zslRandomLevel(); if (level > zsl->level) { for (i = zsl->level; i < level; i++) { rank[i] = 0; update[i] = zsl->header; update[i]->level[i].span = zsl->length; } zsl->level = level; } x = zslCreateNode(level,score,obj); for (i = 0; i < level; i++) { x->level[i].forward = update[i]->level[i].forward; update[i]->level[i].forward = x; /* update span covered by update[i] as x is inserted here */ x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]); update[i]->level[i].span = (rank[0] - rank[i]) + 1; } /* increment span for untouched levels */ for (i = level; i < zsl->level; i++) { update[i]->level[i].span++; } x->backward = (update[0] == zsl->header) ? NULL : update[0]; if (x->level[0].forward) x->level[0].forward->backward = x; else zsl->tail = x; zsl->length++; return x; } /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) { int i; for (i = 0; i < zsl->level; i++) { if (update[i]->level[i].forward == x) { update[i]->level[i].span += x->level[i].span - 1; update[i]->level[i].forward = x->level[i].forward; } else { update[i]->level[i].span -= 1; } } if (x->level[0].forward) { x->level[0].forward->backward = x->backward; } else { zsl->tail = x->backward; } while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL) zsl->level--; zsl->length--; } /* Delete an element with matching score/object from the skiplist. */ int zslDelete(zskiplist *zsl, double score, robj *obj) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (x->level[i].forward->score < score || (x->level[i].forward->score == score && compareStringObjects(x->level[i].forward->obj,obj) < 0))) x = x->level[i].forward; update[i] = x; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ x = x->level[0].forward; if (x && score == x->score && equalStringObjects(x->obj,obj)) { zslDeleteNode(zsl, x, update); zslFreeNode(x); return 1; } else { return 0; /* not found */ } return 0; /* not found */ } /* Struct to hold a inclusive/exclusive range spec. */ typedef struct { double min, max; int minex, maxex; /* are min or max exclusive? */ } zrangespec; static int zslValueGteMin(double value, zrangespec *spec) { return spec->minex ? (value > spec->min) : (value >= spec->min); } static int zslValueLteMax(double value, zrangespec *spec) { return spec->maxex ? (value < spec->max) : (value <= spec->max); } static int zslValueInRange(double value, zrangespec *spec) { return zslValueGteMin(value,spec) && zslValueLteMax(value,spec); } /* Returns if there is a part of the zset is in range. */ int zslIsInRange(zskiplist *zsl, zrangespec *range) { zskiplistNode *x; /* Test for ranges that will always be empty. */ if (range->min > range->max || (range->min == range->max && (range->minex || range->maxex))) return 0; x = zsl->tail; if (x == NULL || !zslValueGteMin(x->score,range)) return 0; x = zsl->header->level[0].forward; if (x == NULL || !zslValueLteMax(x->score,range)) return 0; return 1; } /* Find the first node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ if (!zslIsInRange(zsl,&range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *OUT* of range. */ while (x->level[i].forward && !zslValueGteMin(x->level[i].forward->score,&range)) x = x->level[i].forward; } /* The tail is in range, so the previous block should always return a * node that is non-NULL and the last one to be out of range. */ x = x->level[0].forward; redisAssert(x != NULL && zslValueInRange(x->score,&range)); return x; } /* Find the last node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ if (!zslIsInRange(zsl,&range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *IN* range. */ while (x->level[i].forward && zslValueLteMax(x->level[i].forward->score,&range)) x = x->level[i].forward; } /* The header is in range, so the previous block should always return a * node that is non-NULL and in range. */ redisAssert(x != NULL && zslValueInRange(x->score,&range)); return x; } /* Delete all the elements with score between min and max from the skiplist. * Min and mx are inclusive, so a score >= min || score <= max is deleted. * Note that this function takes the reference to the hash table view of the * sorted set, in order to remove the elements from the hash table too. */ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec range, dict *dict) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long removed = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (range.minex ? x->level[i].forward->score <= range.min : x->level[i].forward->score < range.min)) x = x->level[i].forward; update[i] = x; } /* Current node is the last with score < or <= min. */ x = x->level[0].forward; /* Delete nodes while in range. */ while (x && (range.maxex ? x->score < range.max : x->score <= range.max)) { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl,x,update); dictDelete(dict,x->obj); zslFreeNode(x); removed++; x = next; } return removed; } /* Delete all the elements with rank between start and end from the skiplist. * Start and end are inclusive. Note that start and end need to be 1-based */ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long traversed = 0, removed = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (traversed + x->level[i].span) < start) { traversed += x->level[i].span; x = x->level[i].forward; } update[i] = x; } traversed++; x = x->level[0].forward; while (x && traversed <= end) { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl,x,update); dictDelete(dict,x->obj); zslFreeNode(x); removed++; traversed++; x = next; } return removed; } /* Find the rank for an element by both score and key. * Returns 0 when the element cannot be found, rank otherwise. * Note that the rank is 1-based due to the span of zsl->header to the * first element. */ unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) { zskiplistNode *x; unsigned long rank = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (x->level[i].forward->score < score || (x->level[i].forward->score == score && compareStringObjects(x->level[i].forward->obj,o) <= 0))) { rank += x->level[i].span; x = x->level[i].forward; } /* x might be equal to zsl->header, so test if obj is non-NULL */ if (x->obj && equalStringObjects(x->obj,o)) { return rank; } } return 0; } /* Finds an element by its rank. The rank argument needs to be 1-based. */ zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) { zskiplistNode *x; unsigned long traversed = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (traversed + x->level[i].span) <= rank) { traversed += x->level[i].span; x = x->level[i].forward; } if (traversed == rank) { return x; } } return NULL; } /* Populate the rangespec according to the objects min and max. */ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { char *eptr; spec->minex = spec->maxex = 0; /* Parse the min-max interval. If one of the values is prefixed * by the "(" character, it's considered "open". For instance * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ if (min->encoding == REDIS_ENCODING_INT) { spec->min = (long)min->ptr; } else { if (((char*)min->ptr)[0] == '(') { spec->min = strtod((char*)min->ptr+1,&eptr); if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR; spec->minex = 1; } else { spec->min = strtod((char*)min->ptr,&eptr); if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR; } } if (max->encoding == REDIS_ENCODING_INT) { spec->max = (long)max->ptr; } else { if (((char*)max->ptr)[0] == '(') { spec->max = strtod((char*)max->ptr+1,&eptr); if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR; spec->maxex = 1; } else { spec->max = strtod((char*)max->ptr,&eptr); if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR; } } return REDIS_OK; } /*----------------------------------------------------------------------------- * Ziplist-backed sorted set API *----------------------------------------------------------------------------*/ double zzlGetScore(unsigned char *sptr) { unsigned char *vstr; unsigned int vlen; long long vlong; char buf[128]; double score; redisAssert(sptr != NULL); redisAssert(ziplistGet(sptr,&vstr,&vlen,&vlong)); if (vstr) { memcpy(buf,vstr,vlen); buf[vlen] = '\0'; score = strtod(buf,NULL); } else { score = vlong; } return score; } /* Compare element in sorted set with given element. */ int zzlCompareElements(unsigned char *eptr, unsigned char *cstr, unsigned int clen) { unsigned char *vstr; unsigned int vlen; long long vlong; unsigned char vbuf[32]; int minlen, cmp; redisAssert(ziplistGet(eptr,&vstr,&vlen,&vlong)); if (vstr == NULL) { /* Store string representation of long long in buf. */ vlen = ll2string((char*)vbuf,sizeof(vbuf),vlong); vstr = vbuf; } minlen = (vlen < clen) ? vlen : clen; cmp = memcmp(vstr,cstr,minlen); if (cmp == 0) return vlen-clen; return cmp; } unsigned int zzlLength(robj *zobj) { unsigned char *zl = zobj->ptr; redisAssert(zobj->encoding == REDIS_ENCODING_ZIPLIST); return ziplistLen(zl)/2; } /* Move to next entry based on the values in eptr and sptr. Both are set to * NULL when there is no next entry. */ void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) { unsigned char *_eptr, *_sptr; redisAssert(*eptr != NULL && *sptr != NULL); _eptr = ziplistNext(zl,*sptr); if (_eptr != NULL) { _sptr = ziplistNext(zl,_eptr); redisAssert(_sptr != NULL); } else { /* No next entry. */ _sptr = NULL; } *eptr = _eptr; *sptr = _sptr; } /* Move to the previous entry based on the values in eptr and sptr. Both are * set to NULL when there is no next entry. */ void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) { unsigned char *_eptr, *_sptr; redisAssert(*eptr != NULL && *sptr != NULL); _sptr = ziplistPrev(zl,*eptr); if (_sptr != NULL) { _eptr = ziplistPrev(zl,_sptr); redisAssert(_eptr != NULL); } else { /* No previous entry. */ _eptr = NULL; } *eptr = _eptr; *sptr = _sptr; } /* Returns if there is a part of the zset is in range. Should only be used * internally by zzlFirstInRange and zzlLastInRange. */ int zzlIsInRange(unsigned char *zl, zrangespec *range) { unsigned char *p; double score; /* Test for ranges that will always be empty. */ if (range->min > range->max || (range->min == range->max && (range->minex || range->maxex))) return 0; p = ziplistIndex(zl,-1); /* Last score. */ redisAssert(p != NULL); score = zzlGetScore(p); if (!zslValueGteMin(score,range)) return 0; p = ziplistIndex(zl,1); /* First score. */ redisAssert(p != NULL); score = zzlGetScore(p); if (!zslValueLteMax(score,range)) return 0; return 1; } /* Find pointer to the first element contained in the specified range. * Returns NULL when no element is contained in the range. */ unsigned char *zzlFirstInRange(robj *zobj, zrangespec range) { unsigned char *zl = zobj->ptr; unsigned char *eptr = ziplistIndex(zl,0), *sptr; double score; /* If everything is out of range, return early. */ if (!zzlIsInRange(zl,&range)) return NULL; while (eptr != NULL) { sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); score = zzlGetScore(sptr); if (zslValueGteMin(score,&range)) return eptr; /* Move to next element. */ eptr = ziplistNext(zl,sptr); } return NULL; } /* Find pointer to the last element contained in the specified range. * Returns NULL when no element is contained in the range. */ unsigned char *zzlLastInRange(robj *zobj, zrangespec range) { unsigned char *zl = zobj->ptr; unsigned char *eptr = ziplistIndex(zl,-2), *sptr; double score; /* If everything is out of range, return early. */ if (!zzlIsInRange(zl,&range)) return NULL; while (eptr != NULL) { sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); score = zzlGetScore(sptr); if (zslValueLteMax(score,&range)) return eptr; /* Move to previous element by moving to the score of previous element. * When this returns NULL, we know there also is no element. */ sptr = ziplistPrev(zl,eptr); if (sptr != NULL) redisAssert((eptr = ziplistPrev(zl,sptr)) != NULL); else eptr = NULL; } return NULL; } unsigned char *zzlFind(robj *zobj, robj *ele, double *score) { unsigned char *zl = zobj->ptr; unsigned char *eptr = ziplistIndex(zl,0), *sptr; ele = getDecodedObject(ele); while (eptr != NULL) { sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); if (ziplistCompare(eptr,ele->ptr,sdslen(ele->ptr))) { /* Matching element, pull out score. */ if (score != NULL) *score = zzlGetScore(sptr); decrRefCount(ele); return eptr; } /* Move to next element. */ eptr = ziplistNext(zl,sptr); } decrRefCount(ele); return NULL; } /* Delete (element,score) pair from ziplist. Use local copy of eptr because we * don't want to modify the one given as argument. */ int zzlDelete(robj *zobj, unsigned char *eptr) { unsigned char *zl = zobj->ptr; unsigned char *p = eptr; /* TODO: add function to ziplist API to delete N elements from offset. */ zl = ziplistDelete(zl,&p); zl = ziplistDelete(zl,&p); zobj->ptr = zl; return REDIS_OK; } int zzlInsertAt(robj *zobj, robj *ele, double score, unsigned char *eptr) { unsigned char *zl = zobj->ptr; unsigned char *sptr; char scorebuf[128]; int scorelen; int offset; redisAssert(ele->encoding == REDIS_ENCODING_RAW); scorelen = d2string(scorebuf,sizeof(scorebuf),score); if (eptr == NULL) { zl = ziplistPush(zl,ele->ptr,sdslen(ele->ptr),ZIPLIST_TAIL); zl = ziplistPush(zl,(unsigned char*)scorebuf,scorelen,ZIPLIST_TAIL); } else { /* Keep offset relative to zl, as it might be re-allocated. */ offset = eptr-zl; zl = ziplistInsert(zl,eptr,ele->ptr,sdslen(ele->ptr)); eptr = zl+offset; /* Insert score after the element. */ redisAssert((sptr = ziplistNext(zl,eptr)) != NULL); zl = ziplistInsert(zl,sptr,(unsigned char*)scorebuf,scorelen); } zobj->ptr = zl; return REDIS_OK; } /* Insert (element,score) pair in ziplist. This function assumes the element is * not yet present in the list. */ int zzlInsert(robj *zobj, robj *ele, double score) { unsigned char *zl = zobj->ptr; unsigned char *eptr = ziplistIndex(zl,0), *sptr; double s; ele = getDecodedObject(ele); while (eptr != NULL) { sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); s = zzlGetScore(sptr); if (s > score) { /* First element with score larger than score for element to be * inserted. This means we should take its spot in the list to * maintain ordering. */ zzlInsertAt(zobj,ele,score,eptr); break; } else if (s == score) { /* Ensure lexicographical ordering for elements. */ if (zzlCompareElements(eptr,ele->ptr,sdslen(ele->ptr)) > 0) { zzlInsertAt(zobj,ele,score,eptr); break; } } /* Move to next element. */ eptr = ziplistNext(zl,sptr); } /* Push on tail of list when it was not yet inserted. */ if (eptr == NULL) zzlInsertAt(zobj,ele,score,NULL); decrRefCount(ele); return REDIS_OK; } unsigned long zzlDeleteRangeByScore(robj *zobj, zrangespec range) { unsigned char *zl = zobj->ptr; unsigned char *eptr, *sptr; double score; unsigned long deleted = 0; eptr = zzlFirstInRange(zobj,range); if (eptr == NULL) return deleted; /* When the tail of the ziplist is deleted, eptr will point to the sentinel * byte and ziplistNext will return NULL. */ while ((sptr = ziplistNext(zl,eptr)) != NULL) { score = zzlGetScore(sptr); if (zslValueLteMax(score,&range)) { /* Delete both the element and the score. */ zl = ziplistDelete(zl,&eptr); zl = ziplistDelete(zl,&eptr); deleted++; } else { /* No longer in range. */ break; } } return deleted; } /* Delete all the elements with rank between start and end from the skiplist. * Start and end are inclusive. Note that start and end need to be 1-based */ unsigned long zzlDeleteRangeByRank(robj *zobj, unsigned int start, unsigned int end) { unsigned int num = (end-start)+1; zobj->ptr = ziplistDeleteRange(zobj->ptr,2*(start-1),2*num); return num; } /*----------------------------------------------------------------------------- * Common sorted set API *----------------------------------------------------------------------------*/ int zsLength(robj *zobj) { int length = -1; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { length = zzlLength(zobj); } else if (zobj->encoding == REDIS_ENCODING_RAW) { length = ((zset*)zobj->ptr)->zsl->length; } else { redisPanic("Unknown sorted set encoding"); } return length; } void zsConvert(robj *zobj, int encoding) { zset *zs; zskiplistNode *node, *next; robj *ele; double score; if (zobj->encoding == encoding) return; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = zobj->ptr; unsigned char *eptr, *sptr; unsigned char *vstr; unsigned int vlen; long long vlong; if (encoding != REDIS_ENCODING_RAW) redisPanic("Unknown target encoding"); zs = zmalloc(sizeof(*zs)); zs->dict = dictCreate(&zsetDictType,NULL); zs->zsl = zslCreate(); eptr = ziplistIndex(zl,0); redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); while (eptr != NULL) { score = zzlGetScore(sptr); redisAssert(ziplistGet(eptr,&vstr,&vlen,&vlong)); if (vstr == NULL) ele = createStringObjectFromLongLong(vlong); else ele = createStringObject((char*)vstr,vlen); /* Has incremented refcount since it was just created. */ node = zslInsert(zs->zsl,score,ele); redisAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK); incrRefCount(ele); /* Added to dictionary. */ zzlNext(zl,&eptr,&sptr); } zfree(zobj->ptr); zobj->ptr = zs; zobj->encoding = REDIS_ENCODING_RAW; } else if (zobj->encoding == REDIS_ENCODING_RAW) { unsigned char *zl = ziplistNew(); if (encoding != REDIS_ENCODING_ZIPLIST) redisPanic("Unknown target encoding"); /* Approach similar to zslFree(), since we want to free the skiplist at * the same time as creating the ziplist. */ zs = zobj->ptr; dictRelease(zs->dict); node = zs->zsl->header->level[0].forward; zfree(zs->zsl->header); zfree(zs->zsl); /* Immediately store pointer to ziplist in object because it will * change because of reallocations when pushing to the ziplist. */ zobj->ptr = zl; while (node) { ele = getDecodedObject(node->obj); redisAssert(zzlInsertAt(zobj,ele,node->score,NULL) == REDIS_OK); decrRefCount(ele); next = node->level[0].forward; zslFreeNode(node); node = next; } zfree(zs); zobj->encoding = REDIS_ENCODING_ZIPLIST; } else { redisPanic("Unknown sorted set encoding"); } } /*----------------------------------------------------------------------------- * Sorted set commands *----------------------------------------------------------------------------*/ /* This generic command implements both ZADD and ZINCRBY. */ void zaddGenericCommand(redisClient *c, int incr) { static char *nanerr = "resulting score is not a number (NaN)"; robj *key = c->argv[1]; robj *ele; robj *zobj; robj *curobj; double score, curscore = 0.0; if (getDoubleFromObjectOrReply(c,c->argv[2],&score,NULL) != REDIS_OK) return; zobj = lookupKeyWrite(c->db,key); if (zobj == NULL) { if (server.zset_max_ziplist_entries == 0 || server.zset_max_ziplist_value < sdslen(c->argv[3]->ptr)) { zobj = createZsetObject(); } else { zobj = createZsetZiplistObject(); } dbAdd(c->db,key,zobj); } else { if (zobj->type != REDIS_ZSET) { addReply(c,shared.wrongtypeerr); return; } } if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *eptr; /* Prefer non-encoded element when dealing with ziplists. */ ele = c->argv[3]; if ((eptr = zzlFind(zobj,ele,&curscore)) != NULL) { if (incr) { score += curscore; if (isnan(score)) { addReplyError(c,nanerr); /* Don't need to check if the sorted set is empty, because * we know it has at least one element. */ return; } } /* Remove and re-insert when score changed. */ if (score != curscore) { redisAssert(zzlDelete(zobj,eptr) == REDIS_OK); redisAssert(zzlInsert(zobj,ele,score) == REDIS_OK); signalModifiedKey(c->db,key); server.dirty++; } if (incr) /* ZINCRBY */ addReplyDouble(c,score); else /* ZADD */ addReply(c,shared.czero); } else { /* Optimize: check if the element is too large or the list becomes * too long *before* executing zzlInsert. */ redisAssert(zzlInsert(zobj,ele,score) == REDIS_OK); if (zzlLength(zobj) > server.zset_max_ziplist_entries) zsConvert(zobj,REDIS_ENCODING_RAW); if (sdslen(ele->ptr) > server.zset_max_ziplist_value) zsConvert(zobj,REDIS_ENCODING_RAW); signalModifiedKey(c->db,key); server.dirty++; if (incr) /* ZINCRBY */ addReplyDouble(c,score); else /* ZADD */ addReply(c,shared.cone); } } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; zskiplistNode *znode; dictEntry *de; ele = c->argv[3] = tryObjectEncoding(c->argv[3]); de = dictFind(zs->dict,ele); if (de != NULL) { curobj = dictGetEntryKey(de); curscore = *(double*)dictGetEntryVal(de); if (incr) { score += curscore; if (isnan(score)) { addReplyError(c,nanerr); /* Don't need to check if the sorted set is empty, because * we know it has at least one element. */ return; } } /* Remove and re-insert when score changed. We can safely delete * the key object from the skiplist, since the dictionary still has * a reference to it. */ if (score != curscore) { redisAssert(zslDelete(zs->zsl,curscore,curobj)); znode = zslInsert(zs->zsl,score,curobj); incrRefCount(curobj); /* Re-inserted in skiplist. */ dictGetEntryVal(de) = &znode->score; /* Update score ptr. */ signalModifiedKey(c->db,key); server.dirty++; } if (incr) /* ZINCRBY */ addReplyDouble(c,score); else /* ZADD */ addReply(c,shared.czero); } else { znode = zslInsert(zs->zsl,score,ele); incrRefCount(ele); /* Inserted in skiplist. */ redisAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK); incrRefCount(ele); /* Added to dictionary. */ signalModifiedKey(c->db,key); server.dirty++; if (incr) /* ZINCRBY */ addReplyDouble(c,score); else /* ZADD */ addReply(c,shared.cone); } } else { redisPanic("Unknown sorted set encoding"); } } void zaddCommand(redisClient *c) { zaddGenericCommand(c,0); } void zincrbyCommand(redisClient *c) { zaddGenericCommand(c,1); } void zremCommand(redisClient *c) { robj *key = c->argv[1]; robj *ele = c->argv[2]; robj *zobj; if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *eptr; if ((eptr = zzlFind(zobj,ele,NULL)) != NULL) { redisAssert(zzlDelete(zobj,eptr) == REDIS_OK); if (zzlLength(zobj) == 0) dbDelete(c->db,key); } else { addReply(c,shared.czero); return; } } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; dictEntry *de; double score; de = dictFind(zs->dict,ele); if (de != NULL) { /* Delete from the skiplist */ score = *(double*)dictGetEntryVal(de); redisAssert(zslDelete(zs->zsl,score,ele)); /* Delete from the hash table */ dictDelete(zs->dict,ele); if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) dbDelete(c->db,key); } else { addReply(c,shared.czero); return; } } else { redisPanic("Unknown sorted set encoding"); } signalModifiedKey(c->db,key); server.dirty++; addReply(c,shared.cone); } void zremrangebyscoreCommand(redisClient *c) { robj *key = c->argv[1]; robj *zobj; zrangespec range; unsigned long deleted; /* Parse the range arguments. */ if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) { addReplyError(c,"min or max is not a double"); return; } if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { deleted = zzlDeleteRangeByScore(zobj,range); } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; deleted = zslDeleteRangeByScore(zs->zsl,range,zs->dict); if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) dbDelete(c->db,key); } else { redisPanic("Unknown sorted set encoding"); } if (deleted) signalModifiedKey(c->db,key); server.dirty += deleted; addReplyLongLong(c,deleted); } void zremrangebyrankCommand(redisClient *c) { robj *key = c->argv[1]; robj *zobj; long start; long end; int llen; unsigned long deleted; if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) || (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return; if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; /* Sanitize indexes. */ llen = zsLength(zobj); if (start < 0) start = llen+start; if (end < 0) end = llen+end; if (start < 0) start = 0; /* Invariant: start >= 0, so this test will be true when end < 0. * The range is empty when start > end or start >= length. */ if (start > end || start >= llen) { addReply(c,shared.czero); return; } if (end >= llen) end = llen-1; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { /* Correct for 1-based rank. */ deleted = zzlDeleteRangeByRank(zobj,start+1,end+1); } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; /* Correct for 1-based rank. */ deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict); if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) dbDelete(c->db,key); } else { redisPanic("Unknown sorted set encoding"); } if (deleted) signalModifiedKey(c->db,key); server.dirty += deleted; addReplyLongLong(c,deleted); } typedef struct { dict *dict; double weight; } zsetopsrc; int qsortCompareZsetopsrcByCardinality(const void *s1, const void *s2) { zsetopsrc *d1 = (void*) s1, *d2 = (void*) s2; unsigned long size1, size2; size1 = d1->dict ? dictSize(d1->dict) : 0; size2 = d2->dict ? dictSize(d2->dict) : 0; return size1 - size2; } #define REDIS_AGGR_SUM 1 #define REDIS_AGGR_MIN 2 #define REDIS_AGGR_MAX 3 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e)) inline static void zunionInterAggregate(double *target, double val, int aggregate) { if (aggregate == REDIS_AGGR_SUM) { *target = *target + val; /* The result of adding two doubles is NaN when one variable * is +inf and the other is -inf. When these numbers are added, * we maintain the convention of the result being 0.0. */ if (isnan(*target)) *target = 0.0; } else if (aggregate == REDIS_AGGR_MIN) { *target = val < *target ? val : *target; } else if (aggregate == REDIS_AGGR_MAX) { *target = val > *target ? val : *target; } else { /* safety net */ redisPanic("Unknown ZUNION/INTER aggregate type"); } } void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { int i, j, setnum; int aggregate = REDIS_AGGR_SUM; zsetopsrc *src; robj *dstobj; zset *dstzset; zskiplistNode *znode; dictIterator *di; dictEntry *de; int touched = 0; /* expect setnum input keys to be given */ setnum = atoi(c->argv[2]->ptr); if (setnum < 1) { addReplyError(c, "at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE"); return; } /* test if the expected number of keys would overflow */ if (3+setnum > c->argc) { addReply(c,shared.syntaxerr); return; } /* read keys to be used for input */ src = zmalloc(sizeof(zsetopsrc) * setnum); for (i = 0, j = 3; i < setnum; i++, j++) { robj *obj = lookupKeyWrite(c->db,c->argv[j]); if (!obj) { src[i].dict = NULL; } else { if (obj->type == REDIS_ZSET) { src[i].dict = ((zset*)obj->ptr)->dict; } else if (obj->type == REDIS_SET) { src[i].dict = (obj->ptr); } else { zfree(src); addReply(c,shared.wrongtypeerr); return; } } /* default all weights to 1 */ src[i].weight = 1.0; } /* parse optional extra arguments */ if (j < c->argc) { int remaining = c->argc - j; while (remaining) { if (remaining >= (setnum + 1) && !strcasecmp(c->argv[j]->ptr,"weights")) { j++; remaining--; for (i = 0; i < setnum; i++, j++, remaining--) { if (getDoubleFromObjectOrReply(c,c->argv[j],&src[i].weight, "weight value is not a double") != REDIS_OK) { zfree(src); return; } } } else if (remaining >= 2 && !strcasecmp(c->argv[j]->ptr,"aggregate")) { j++; remaining--; if (!strcasecmp(c->argv[j]->ptr,"sum")) { aggregate = REDIS_AGGR_SUM; } else if (!strcasecmp(c->argv[j]->ptr,"min")) { aggregate = REDIS_AGGR_MIN; } else if (!strcasecmp(c->argv[j]->ptr,"max")) { aggregate = REDIS_AGGR_MAX; } else { zfree(src); addReply(c,shared.syntaxerr); return; } j++; remaining--; } else { zfree(src); addReply(c,shared.syntaxerr); return; } } } /* sort sets from the smallest to largest, this will improve our * algorithm's performance */ qsort(src,setnum,sizeof(zsetopsrc),qsortCompareZsetopsrcByCardinality); dstobj = createZsetObject(); dstzset = dstobj->ptr; if (op == REDIS_OP_INTER) { /* skip going over all entries if the smallest zset is NULL or empty */ if (src[0].dict && dictSize(src[0].dict) > 0) { /* precondition: as src[0].dict is non-empty and the zsets are ordered * from small to large, all src[i > 0].dict are non-empty too */ di = dictGetIterator(src[0].dict); while((de = dictNext(di)) != NULL) { double score, value; score = src[0].weight * zunionInterDictValue(de); for (j = 1; j < setnum; j++) { dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de)); if (other) { value = src[j].weight * zunionInterDictValue(other); zunionInterAggregate(&score,value,aggregate); } else { break; } } /* Only continue when present in every source dict. */ if (j == setnum) { robj *o = dictGetEntryKey(de); znode = zslInsert(dstzset->zsl,score,o); incrRefCount(o); /* added to skiplist */ dictAdd(dstzset->dict,o,&znode->score); incrRefCount(o); /* added to dictionary */ } } dictReleaseIterator(di); } } else if (op == REDIS_OP_UNION) { for (i = 0; i < setnum; i++) { if (!src[i].dict) continue; di = dictGetIterator(src[i].dict); while((de = dictNext(di)) != NULL) { double score, value; /* skip key when already processed */ if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue; /* initialize score */ score = src[i].weight * zunionInterDictValue(de); /* because the zsets are sorted by size, its only possible * for sets at larger indices to hold this entry */ for (j = (i+1); j < setnum; j++) { dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de)); if (other) { value = src[j].weight * zunionInterDictValue(other); zunionInterAggregate(&score,value,aggregate); } } robj *o = dictGetEntryKey(de); znode = zslInsert(dstzset->zsl,score,o); incrRefCount(o); /* added to skiplist */ dictAdd(dstzset->dict,o,&znode->score); incrRefCount(o); /* added to dictionary */ } dictReleaseIterator(di); } } else { /* unknown operator */ redisAssert(op == REDIS_OP_INTER || op == REDIS_OP_UNION); } if (dbDelete(c->db,dstkey)) { signalModifiedKey(c->db,dstkey); touched = 1; server.dirty++; } if (dstzset->zsl->length) { dbAdd(c->db,dstkey,dstobj); addReplyLongLong(c, dstzset->zsl->length); if (!touched) signalModifiedKey(c->db,dstkey); server.dirty++; } else { decrRefCount(dstobj); addReply(c, shared.czero); } zfree(src); } void zunionstoreCommand(redisClient *c) { zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION); } void zinterstoreCommand(redisClient *c) { zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER); } void zrangeGenericCommand(redisClient *c, int reverse) { robj *key = c->argv[1]; robj *zobj; int withscores = 0; long start; long end; int llen; int rangelen; if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) || (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return; if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) { withscores = 1; } else if (c->argc >= 5) { addReply(c,shared.syntaxerr); return; } if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; /* Sanitize indexes. */ llen = zsLength(zobj); if (start < 0) start = llen+start; if (end < 0) end = llen+end; if (start < 0) start = 0; /* Invariant: start >= 0, so this test will be true when end < 0. * The range is empty when start > end or start >= length. */ if (start > end || start >= llen) { addReply(c,shared.emptymultibulk); return; } if (end >= llen) end = llen-1; rangelen = (end-start)+1; /* Return the result in form of a multi-bulk reply */ addReplyMultiBulkLen(c, withscores ? (rangelen*2) : rangelen); if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = zobj->ptr; unsigned char *eptr, *sptr; unsigned char *vstr; unsigned int vlen; long long vlong; if (reverse) eptr = ziplistIndex(zl,-2-(2*start)); else eptr = ziplistIndex(zl,2*start); redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); while (rangelen--) { redisAssert(eptr != NULL && sptr != NULL); redisAssert(ziplistGet(eptr,&vstr,&vlen,&vlong)); if (vstr == NULL) addReplyBulkLongLong(c,vlong); else addReplyBulkCBuffer(c,vstr,vlen); if (withscores) addReplyDouble(c,zzlGetScore(sptr)); if (reverse) zzlPrev(zl,&eptr,&sptr); else zzlNext(zl,&eptr,&sptr); } } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; zskiplist *zsl = zs->zsl; zskiplistNode *ln; robj *ele; /* Check if starting point is trivial, before doing log(N) lookup. */ if (reverse) { ln = zsl->tail; if (start > 0) ln = zslGetElementByRank(zsl,llen-start); } else { ln = zsl->header->level[0].forward; if (start > 0) ln = zslGetElementByRank(zsl,start+1); } while(rangelen--) { redisAssert(ln != NULL); ele = ln->obj; addReplyBulk(c,ele); if (withscores) addReplyDouble(c,ln->score); ln = reverse ? ln->backward : ln->level[0].forward; } } else { redisPanic("Unknown sorted set encoding"); } } void zrangeCommand(redisClient *c) { zrangeGenericCommand(c,0); } void zrevrangeCommand(redisClient *c) { zrangeGenericCommand(c,1); } /* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE and ZCOUNT. * If "justcount", only the number of elements in the range is returned. */ void genericZrangebyscoreCommand(redisClient *c, int reverse, int justcount) { zrangespec range; robj *key = c->argv[1]; robj *emptyreply, *zobj; int offset = 0, limit = -1; int withscores = 0; unsigned long rangelen = 0; void *replylen = NULL; int minidx, maxidx; /* Parse the range arguments. */ if (reverse) { /* Range is given as [max,min] */ maxidx = 2; minidx = 3; } else { /* Range is given as [min,max] */ minidx = 2; maxidx = 3; } if (zslParseRange(c->argv[minidx],c->argv[maxidx],&range) != REDIS_OK) { addReplyError(c,"min or max is not a double"); return; } /* Parse optional extra arguments. Note that ZCOUNT will exactly have * 4 arguments, so we'll never enter the following code path. */ if (c->argc > 4) { int remaining = c->argc - 4; int pos = 4; while (remaining) { if (remaining >= 1 && !strcasecmp(c->argv[pos]->ptr,"withscores")) { pos++; remaining--; withscores = 1; } else if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) { offset = atoi(c->argv[pos+1]->ptr); limit = atoi(c->argv[pos+2]->ptr); pos += 3; remaining -= 3; } else { addReply(c,shared.syntaxerr); return; } } } /* Ok, lookup the key and get the range */ emptyreply = justcount ? shared.czero : shared.emptymultibulk; if ((zobj = lookupKeyReadOrReply(c,key,emptyreply)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = zobj->ptr; unsigned char *eptr, *sptr; unsigned char *vstr; unsigned int vlen; long long vlong; double score; /* If reversed, get the last node in range as starting point. */ if (reverse) eptr = zzlLastInRange(zobj,range); else eptr = zzlFirstInRange(zobj,range); /* No "first" element in the specified interval. */ if (eptr == NULL) { addReply(c,emptyreply); return; } /* Get score pointer for the first element. */ redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); /* We don't know in advance how many matching elements there are in the * list, so we push this object that will represent the multi-bulk * length in the output buffer, and will "fix" it later */ if (!justcount) replylen = addDeferredMultiBulkLength(c); /* If there is an offset, just traverse the number of elements without * checking the score because that is done in the next loop. */ while (eptr && offset--) if (reverse) zzlPrev(zl,&eptr,&sptr); else zzlNext(zl,&eptr,&sptr); while (eptr && limit--) { score = zzlGetScore(sptr); /* Abort when the node is no longer in range. */ if (reverse) { if (!zslValueGteMin(score,&range)) break; } else { if (!zslValueLteMax(score,&range)) break; } /* Do our magic */ rangelen++; if (!justcount) { redisAssert(ziplistGet(eptr,&vstr,&vlen,&vlong)); if (vstr == NULL) addReplyBulkLongLong(c,vlong); else addReplyBulkCBuffer(c,vstr,vlen); if (withscores) addReplyDouble(c,score); } /* Move to next node */ if (reverse) zzlPrev(zl,&eptr,&sptr); else zzlNext(zl,&eptr,&sptr); } } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; zskiplist *zsl = zs->zsl; zskiplistNode *ln; /* If reversed, get the last node in range as starting point. */ if (reverse) ln = zslLastInRange(zsl,range); else ln = zslFirstInRange(zsl,range); /* No "first" element in the specified interval. */ if (ln == NULL) { addReply(c,emptyreply); return; } /* We don't know in advance how many matching elements there are in the * list, so we push this object that will represent the multi-bulk * length in the output buffer, and will "fix" it later */ if (!justcount) replylen = addDeferredMultiBulkLength(c); /* If there is an offset, just traverse the number of elements without * checking the score because that is done in the next loop. */ while (ln && offset--) ln = reverse ? ln->backward : ln->level[0].forward; while (ln && limit--) { /* Abort when the node is no longer in range. */ if (reverse) { if (!zslValueGteMin(ln->score,&range)) break; } else { if (!zslValueLteMax(ln->score,&range)) break; } /* Do our magic */ rangelen++; if (!justcount) { addReplyBulk(c,ln->obj); if (withscores) addReplyDouble(c,ln->score); } /* Move to next node */ ln = reverse ? ln->backward : ln->level[0].forward; } } else { redisPanic("Unknown sorted set encoding"); } if (justcount) { addReplyLongLong(c,(long)rangelen); } else { if (withscores) rangelen *= 2; setDeferredMultiBulkLength(c,replylen,rangelen); } } void zrangebyscoreCommand(redisClient *c) { genericZrangebyscoreCommand(c,0,0); } void zrevrangebyscoreCommand(redisClient *c) { genericZrangebyscoreCommand(c,1,0); } void zcountCommand(redisClient *c) { genericZrangebyscoreCommand(c,0,1); } void zcardCommand(redisClient *c) { robj *key = c->argv[1]; robj *zobj; if ((zobj = lookupKeyReadOrReply(c,key,shared.czero)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; addReplyLongLong(c,zsLength(zobj)); } void zscoreCommand(redisClient *c) { robj *key = c->argv[1]; robj *zobj; double score; if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { if (zzlFind(zobj,c->argv[2],&score) != NULL) addReplyDouble(c,score); else addReply(c,shared.nullbulk); } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; dictEntry *de; c->argv[2] = tryObjectEncoding(c->argv[2]); de = dictFind(zs->dict,c->argv[2]); if (de != NULL) { score = *(double*)dictGetEntryVal(de); addReplyDouble(c,score); } else { addReply(c,shared.nullbulk); } } else { redisPanic("Unknown sorted set encoding"); } } void zrankGenericCommand(redisClient *c, int reverse) { robj *key = c->argv[1]; robj *ele = c->argv[2]; robj *zobj; unsigned long llen; unsigned long rank; if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; llen = zsLength(zobj); redisAssert(ele->encoding == REDIS_ENCODING_RAW); if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = zobj->ptr; unsigned char *eptr, *sptr; eptr = ziplistIndex(zl,0); redisAssert(eptr != NULL); sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); rank = 1; while(eptr != NULL) { if (ziplistCompare(eptr,ele->ptr,sdslen(ele->ptr))) break; rank++; zzlNext(zl,&eptr,&sptr); } if (eptr != NULL) { if (reverse) addReplyLongLong(c,llen-rank); else addReplyLongLong(c,rank-1); } else { addReply(c,shared.nullbulk); } } else if (zobj->encoding == REDIS_ENCODING_RAW) { zset *zs = zobj->ptr; zskiplist *zsl = zs->zsl; dictEntry *de; double score; ele = c->argv[2] = tryObjectEncoding(c->argv[2]); de = dictFind(zs->dict,ele); if (de != NULL) { score = *(double*)dictGetEntryVal(de); rank = zslGetRank(zsl,score,ele); redisAssert(rank); /* Existing elements always have a rank. */ if (reverse) addReplyLongLong(c,llen-rank); else addReplyLongLong(c,rank-1); } else { addReply(c,shared.nullbulk); } } else { redisPanic("Unknown sorted set encoding"); } } void zrankCommand(redisClient *c) { zrankGenericCommand(c, 0); } void zrevrankCommand(redisClient *c) { zrankGenericCommand(c, 1); }