From ff551fad24d6a563fb1916dbb33ec4e8c0287b65 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 2 Apr 2014 09:58:47 +0200 Subject: [PATCH] HyperLogLog: added magic / version. This will allow future changes like compressed representations. Currently the magic is not checked for performance reasons but this may change in the future, for example if we add new types encoded in strings that may have the same size of HyperLogLogs. --- src/hyperloglog.c | 70 ++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 546dd0fb..ca904daf 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -55,9 +55,9 @@ * * The representation used by Redis is the following: * - * +--------+--------+--------+------// //--+---------------------+ - * |11000000|22221111|33333322|55444444 .... | 64 bit cardinality | - * +--------+--------+--------+------// //--+---------------------+ + * +--------+--------+--------+------// //--+----------+------+-----+ + * |11000000|22221111|33333322|55444444 .... | uint64_t | HYLL | Ver | + * +--------+--------+--------+------// //--+----------+------+-----+ * * The 6 bits counters are encoded one after the other starting from the * LSB to the MSB, and using the next bytes as needed. @@ -69,6 +69,10 @@ * HLLADD operations don't modify the actual data structure and hence the * approximated cardinality). * + * After the cached cardinality there are 4 bytes of magic set to the + * string "HYLL", and a 4 bytes version field that is reserved for + * future uses and is currently set to 0. + * * When the most significant bit in the most significant byte of the cached * cardinality is set, it means that the data structure was modified and * we can't reuse the cached value that must be recomputed. */ @@ -80,7 +84,7 @@ #define REDIS_HLL_REGISTER_MAX ((1<ptr; + memcpy(p+REDIS_HLL_SIZE-8,"HYLL",4); + return o; +} + /* PFADD var ele ele ele ... ele => :0 or :1 */ void pfaddCommand(redisClient *c) { robj *o = lookupKeyWrite(c->db,c->argv[1]); @@ -446,7 +466,7 @@ void pfaddCommand(redisClient *c) { /* Create the key with a string value of the exact length to * hold our HLL data structure. sdsnewlen() when NULL is passed * is guaranteed to return bytes initialized to zero. */ - o = createObject(REDIS_STRING,sdsnewlen(NULL,REDIS_HLL_SIZE)); + o = createHLLObject(); dbAdd(c->db,c->argv[1],o); updated++; } else { @@ -478,7 +498,7 @@ void pfaddCommand(redisClient *c) { notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id); server.dirty++; /* Invalidate the cached cardinality. */ - registers[REDIS_HLL_SIZE-1] |= (1<<7); + registers[REDIS_HLL_SIZE-9] |= (1<<7); } addReply(c, updated ? shared.cone : shared.czero); } @@ -509,27 +529,27 @@ void pfcountCommand(redisClient *c) { /* Check if the cached cardinality is valid. */ registers = o->ptr; - if ((registers[REDIS_HLL_SIZE-1] & (1<<7)) == 0) { + if ((registers[REDIS_HLL_SIZE-9] & (1<<7)) == 0) { /* Just return the cached value. */ - card = (uint64_t)registers[REDIS_HLL_SIZE-8]; - card |= (uint64_t)registers[REDIS_HLL_SIZE-7] << 8; - card |= (uint64_t)registers[REDIS_HLL_SIZE-6] << 16; - card |= (uint64_t)registers[REDIS_HLL_SIZE-5] << 24; - card |= (uint64_t)registers[REDIS_HLL_SIZE-4] << 32; - card |= (uint64_t)registers[REDIS_HLL_SIZE-3] << 40; - card |= (uint64_t)registers[REDIS_HLL_SIZE-2] << 48; - card |= (uint64_t)registers[REDIS_HLL_SIZE-1] << 56; + card = (uint64_t)registers[REDIS_HLL_SIZE-16]; + card |= (uint64_t)registers[REDIS_HLL_SIZE-15] << 8; + card |= (uint64_t)registers[REDIS_HLL_SIZE-14] << 16; + card |= (uint64_t)registers[REDIS_HLL_SIZE-13] << 24; + card |= (uint64_t)registers[REDIS_HLL_SIZE-12] << 32; + card |= (uint64_t)registers[REDIS_HLL_SIZE-11] << 40; + card |= (uint64_t)registers[REDIS_HLL_SIZE-10] << 48; + card |= (uint64_t)registers[REDIS_HLL_SIZE-9] << 56; } else { /* Recompute it and update the cached value. */ card = hllCount(registers); - registers[REDIS_HLL_SIZE-8] = card & 0xff; - registers[REDIS_HLL_SIZE-7] = (card >> 8) & 0xff; - registers[REDIS_HLL_SIZE-6] = (card >> 16) & 0xff; - registers[REDIS_HLL_SIZE-5] = (card >> 24) & 0xff; - registers[REDIS_HLL_SIZE-4] = (card >> 32) & 0xff; - registers[REDIS_HLL_SIZE-3] = (card >> 40) & 0xff; - registers[REDIS_HLL_SIZE-2] = (card >> 48) & 0xff; - registers[REDIS_HLL_SIZE-1] = (card >> 56) & 0xff; + registers[REDIS_HLL_SIZE-16] = card & 0xff; + registers[REDIS_HLL_SIZE-15] = (card >> 8) & 0xff; + registers[REDIS_HLL_SIZE-14] = (card >> 16) & 0xff; + registers[REDIS_HLL_SIZE-13] = (card >> 24) & 0xff; + registers[REDIS_HLL_SIZE-12] = (card >> 32) & 0xff; + registers[REDIS_HLL_SIZE-11] = (card >> 40) & 0xff; + registers[REDIS_HLL_SIZE-10] = (card >> 48) & 0xff; + registers[REDIS_HLL_SIZE-9] = (card >> 56) & 0xff; /* This is not considered a read-only command even if the * data structure is not modified, since the cached value * may be modified and given that the HLL is a Redis string @@ -582,7 +602,7 @@ void pfmergeCommand(redisClient *c) { /* Create the key with a string value of the exact length to * hold our HLL data structure. sdsnewlen() when NULL is passed * is guaranteed to return bytes initialized to zero. */ - o = createObject(REDIS_STRING,sdsnewlen(NULL,REDIS_HLL_SIZE)); + o = createHLLObject(); dbAdd(c->db,c->argv[1],o); } else { /* If key exists we are sure it's of the right type/size @@ -597,7 +617,7 @@ void pfmergeCommand(redisClient *c) { for (j = 0; j < REDIS_HLL_REGISTERS; j++) { HLL_SET_REGISTER(registers,j,max[j]); } - registers[REDIS_HLL_SIZE-1] |= (1<<7); + registers[REDIS_HLL_SIZE-9] |= (1<<7); signalModifiedKey(c->db,c->argv[1]); /* We generate an HLLADD event for HLLMERGE for semantical simplicity -- GitLab