diff --git a/.gitignore b/.gitignore index 5463a140850da9ef907fb2b550083e48200ed43a..ed57c1bf91ff1091457aacff0b68838dd28d2f64 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ mkrelease.sh release myredis.conf misc/* +release.h diff --git a/Changelog b/Changelog index a657927a8a49edd295625d0845b478ccded3b63a..0ddf1c70327d1f7414da9a7ca739f3ca5c84300d 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,86 @@ +2010-05-16 Redis version is now 1.3.12 (antirez) +2010-05-16 redis version is now 1.3.11 (antirez) +2010-05-16 random refactoring and speedups (antirez) +2010-05-16 faster INCR with very little efforts... (antirez) +2010-05-15 Merge branch 'redis-cli-fix' of http://github.com/tizoc/redis (antirez) +2010-05-15 added pid info to the check memory leaks test, so that those tests don't appear to be duplicated (antirez) +2010-05-15 Merge branch 'integration' of git://github.com/pietern/redis (antirez) +2010-05-14 more endianess detection fix for SHA1 (antirez) +2010-05-14 fixed a warning seen with some GCC version under Linux (antirez) +2010-05-14 initial rough integration test for replication (Pieter Noordhuis) +2010-05-14 store entire server object on the stack instead of just the client (Pieter Noordhuis) +2010-05-14 proc to retrieve values from INFO properties (Pieter Noordhuis) +2010-05-14 one more fix for endianess detection (antirez) +2010-05-14 Fixed sha1.c compilation on Linux, due to endianess detection lameness (antirez) +2010-05-14 ZUNION,ZINTER -> ZUNIONSTORE,ZINTERSTORE (antirez) +2010-05-14 minor fixes to the new test suite, html doc updated (antirez) +2010-05-14 wait for redis-server to be settled and ready for connections (Pieter Noordhuis) +2010-05-14 fix cleaning up tmp folder (Pieter Noordhuis) +2010-05-14 update makefile to use the new test suite (Pieter Noordhuis) +2010-05-14 check for memory leaks before killing a server (Pieter Noordhuis) +2010-05-14 extract code to kill a server to a separate proc (Pieter Noordhuis) +2010-05-14 start servers on different ports to prevent conflicts (Pieter Noordhuis) +2010-05-14 use DEBUG DIGEST in new test suite (Pieter Noordhuis) +2010-05-14 split test suite into multiple files; runs redis-server in isolation (Pieter Noordhuis) +2010-05-14 use DEBUG DIGEST in the test instead of a function that was doing a similar work, but in a much slower and buggy way (antirez) +2010-05-14 Don't rely on cliReadReply being able to return on shutdown (Bruno Deferrari) +2010-05-14 If command is a shutdown, ignore errors on reply (Bruno Deferrari) +2010-05-14 DEBUG DIGEST implemented, in order to improve the ability to test persistence and replication consistency (antirez) +2010-05-13 makefile deps updated (antirez) +2010-05-13 conflicts resolved (antirez) +2010-05-13 feed SETEX as SET and EXPIREAT to AOF (Pieter Noordhuis) +2010-05-13 very strong speedup in saving time performance when there are many integers in the dataset. Instead of decoding the object before to pass them to the rdbSaveObject layer we check asap if the object is integer encoded and can be written on disk as an integer. (antirez) +2010-05-13 include limits.h otherwise no double precison macros (antirez) +2010-05-13 explicitly checks with ifdefs if our floating point and long long assumptions are verified (antirez) +2010-05-13 Yet another version of the double saving code, with comments explaining what's happening there (antirez) +2010-05-12 added overflow check in the double -> long long conversion trick to avoid integer overflows. I think this was not needed in practical terms, but it is safer (antirez) +2010-05-12 use withscores when performing the dataset digest (antirez) +2010-05-12 If a float can be casted to a long long without rounding loss, we can use the integer conversion function to write the score on disk. This is a seriuous speedup (antirez) +2010-05-12 fixed compilation warnings in the AOF sanity check tool (antirez) +2010-05-12 Merge branch 'vm-speedup' (antirez) +2010-05-11 fix to return error when calling INCR on a non-string type (Pieter Noordhuis) +2010-05-11 load objects encoded from disk directly without useless conversion (antirez) +2010-05-11 fixed a problem leading to crashes, as keys can't be currently specially encoded, so we can't encode integers at object loading time... For now this can be fixed passing a few flags, or later can be fixed allowing encoded keys as well (antirez) +2010-05-11 long long to string conversion speedup applied in other places as well. Still the code has bugs, fixing right now... (antirez) +2010-05-11 hand written code to turn a long long into a string -> very big speed win (antirez) +2010-05-11 added specialized function to compare string objects for perfect match that is optimized for this task (antirez) +2010-05-11 better use of encoding inforamtion in dictEncObjKeyCompare (antirez) +2010-05-10 CONFIG now can change appendfsync policy at run time (antirez) +2010-05-10 CONFIG command now supports hot modification of RDB saving parameters. (antirez) +2010-05-10 while loading the rdb file don't add the key to the dictionary at all if it's already expired, instead of removing it just after the insertion. (antirez) +2010-05-10 Merge branch 'check-aof' of git://github.com/pietern/redis (antirez) +2010-05-08 minor changes to improve code readability (antirez) +2010-05-08 swap objects out directly while loading an RDB file if we detect we can't stay in the vm max memory limits anyway (antirez) +2010-05-07 change command names no longer used to zunion/zinter (Pieter Noordhuis) +2010-05-07 DEBUG POPULATE command for fast creation of test databases (antirez) +2010-05-07 update TODO (Pieter Noordhuis) +2010-05-07 swap arguments in blockClientOnSwappedKeys to be consistent (Pieter Noordhuis) +2010-05-07 added function that preloads all keys needed to execute a MULTI/EXEC block (Pieter Noordhuis) +2010-05-07 add sanity check to zunionInterBlockClientOnSwappedKeys, as the number of keys used is provided as argument to the function (Pieter Noordhuis) +2010-05-07 make prototype of custom function to preload keys from the vm match the prototype of waitForMultipleSwappedKeys (Pieter Noordhuis) +2010-05-07 extract preloading of multiple keys according to the command prototype to a separate function (Pieter Noordhuis) +2010-05-07 make append only filename configurable (Pieter Noordhuis) +2010-05-07 don't load value from VM for EXISTS (Pieter Noordhuis) +2010-05-07 swap file name pid expansion removed. Not suited for mission critical software... (antirez) +2010-05-07 Swap file is now locked (antirez) +2010-05-06 Merge branch 'master' into aof-speedup (antirez) +2010-05-06 log error and quit when the AOF contains an unfinished MULTI (antirez) +2010-05-06 log error and quit when the AOF contains an unfinished MULTI (Pieter Noordhuis) +2010-05-06 Merge branch 'master' into check-aof (Pieter Noordhuis) +2010-05-06 hincrby should report an error when called against a hash key that doesn't contain an integer (Pieter Noordhuis) +2010-05-06 AOF writes are now accumulated into a buffer and flushed into disk just before re-entering the event loop. A lot less writes but still this guarantees that AOF is written before the client gets a positive reply about a write operation, as no reply is trasnmitted before re-entering into the event loop. (antirez) +2010-05-06 clarified a few messages in redis.conf (antirez) +2010-05-05 ask for confirmation before AOF is truncated (Pieter Noordhuis) +2010-05-05 str can be free'd outside readString (Pieter Noordhuis) +2010-05-05 moved argument parsing around (Pieter Noordhuis) +2010-05-05 ignore redis-check-aof binary (Pieter Noordhuis) +2010-05-05 allow AOF to be fixed by truncating to the portion of the file that is valid (Pieter Noordhuis) +2010-05-05 tool to check if AOF is valid (Pieter Noordhuis) +2010-05-02 included fmacros.h in linenose.c to avoid compilation warnings on Linux (antirez) +2010-05-02 compilation fix for mac os x (antirez) +2010-05-02 Merge branch 'master' of git@github.com:antirez/redis (antirez) +2010-05-02 On Linux now fdatasync() is used insetad of fsync() in order to flush the AOF file kernel buffers (antirez) +2010-04-30 More tests for APPEND and tests for SUBSTR (antirez) 2010-04-30 linenoise.c updated, now redis-cli can be used in a pipe (antirez) 2010-04-29 redis-cli minor fix (less segfault is better) (antirez) 2010-04-29 New MONITOR output format with timestamp, every command in a single line, string representations (antirez) diff --git a/Makefile b/Makefile index c6cbf320e8ba9928254f998396529462508fc9bf..949f0b0c0ecc73e3f6170b7e54e0611628bb04ba 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ # Copyright (C) 2009 Salvatore Sanfilippo # This file is released under the BSD license, see the COPYING file +release_hdr := $(shell sh -c './mkreleasehdr.sh') uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') OPTIMIZATION?=-O2 ifeq ($(uname_S),SunOS) diff --git a/doc/AppendCommand.html b/doc/AppendCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..c2d8ed5b91fbc580ea164a4d5891c572398550db --- /dev/null +++ b/doc/AppendCommand.html @@ -0,0 +1,48 @@ + + + + + + + +
+ + + +
+
+ +AppendCommand: Contents
  APPEND _key_ _value_
    Return value
    Examples +
+ +

AppendCommand

+ +
+ +
+ +
+ #sidebar StringCommandsSidebar

APPEND _key_ _value_

+Time complexity: O(1). The amortized time complexity is O(1) assuming the appended value is small and the already present value is of any size, since the dynamic string library used by Redis will double the free space available on every reallocation.
If the key already exists and is a string, this command appends theprovided value at the end of the string.If the key does not exist it is created and set as an empty string, soAPPEND will be very similar to SET in this special case.
+

Return value

Integer reply, specifically the total length of the string after the append operation.

Examples

+redis> exists mykey
+(integer) 0
+redis> append mykey "Hello "
+(integer) 6
+redis> append mykey "World"
+(integer) 11
+redis> get mykey
+"Hello World"
+
+ +
+ +
+
+ + + diff --git a/doc/BlpopCommand.html b/doc/BlpopCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..9c64390afe9788ec26a07bc9bb6cfc92d343fb5c --- /dev/null +++ b/doc/BlpopCommand.html @@ -0,0 +1,49 @@ + + + + + + + +
+ + + +
+ + +

BlpopCommand

+ +
+ +
+ +
+ #sidebar ListCommandsSidebar

BLPOP _key1_ _key2_ ... _keyN_ _timeout_ (Redis >

1.3.1) = +

BRPOP _key1_ _key2_ ... _keyN_ _timeout_ (Redis >

1.3.1) = +Time complexity: O(1)
BLPOP (and BRPOP) is a blocking list pop primitive. You can see this commandsas blocking versions of LPOP and RPOP able toblock if the specified keys don't exist or contain empty lists.
+
The following is a description of the exact semantic. We describe BLPOP butthe two commands are identical, the only difference is that BLPOP pops theelement from the left (head) of the list, and BRPOP pops from the right (tail).
+

Non blocking behavior

When BLPOP is called, if at least one of the specified keys contain a nonempty list, an element is popped from the head of the list and returned tothe caller together with the name of the key (BLPOP returns a two elementsarray, the first element is the key, the second the popped value).
+
Keys are scanned from left to right, so for instance if youissue BLPOP list1 list2 list3 0 against a dataset where list1 does notexist but list2 and list3 contain non empty lists, BLPOP guaranteesto return an element from the list stored at list2 (since it is the firstnon empty list starting from the left).
+

Blocking behavior

If none of the specified keys exist or contain non empty lists, BLPOPblocks until some other client performs a LPUSH oran RPUSH operation against one of the lists.
+
Once new data is present on one of the lists, the client finally returnswith the name of the key unblocking it and the popped value.
+
When blocking, if a non-zero timeout is specified, the client will unblockreturning a nil special value if the specified amount of seconds passedwithout a push operation against at least one of the specified keys.
+
A timeout of zero means instead to block forever.
+

Multiple clients blocking for the same keys

Multiple clients can block for the same key. They are put intoa queue, so the first to be served will be the one that started to waitearlier, in a first-blpopping first-served fashion.
+

Return value

BLPOP returns a two-elements array via a multi bulk reply in order to returnboth the unblocking key and the popped value.
+
When a non-zero timeout is specified, and the BLPOP operation timed out,the return value is a nil multi bulk reply. Most client values will returnfalse or nil accordingly to the programming language used.
+Multi bulk reply +
+ +
+
+ + + diff --git a/doc/CommandReference.html b/doc/CommandReference.html index 6541e3a4e773272f2bb8575a295d20be5f0b2102..647c1b0c466c94b40344d973525df0b0e9095ea1 100644 --- a/doc/CommandReference.html +++ b/doc/CommandReference.html @@ -37,7 +37,7 @@

Transactions

Publish/Subscribe

Persistence control commands

-

Remote server control commands

+

Remote server control commands

diff --git a/doc/ConfigCommand.html b/doc/ConfigCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..1d7e49c2c9ba49a5c46bf5e61d1f6d3c519f8c6f --- /dev/null +++ b/doc/ConfigCommand.html @@ -0,0 +1,76 @@ + + + + + + + +
+ + + +
+ + +

ConfigCommand

+ +
+ +
+ +
+ #sidebar ControlCommandsSidebar

CONFIG GET _pattern_ (Redis >

2.0)= +

CONFIG SET _parameter_ _value_ (Redis >

2.0)=

The CONFIG command is able to retrieve or alter the configuration of a runningRedis server. Not all the configuration parameters are supported.
+
CONFIG has two sub commands, GET and SET. The GET command is used to readthe configuration, while the SET command is used to alter the configuration.
+

CONFIG GET _pattern_

CONFIG GET returns the current configuration parameters. This sub commandonly accepts a single argument, that is glob style pattern. All theconfiguration parameters matching this parameter are reported as alist of key-value pairs. Example:
+$ redis-cli config get '*'
+1. "dbfilename"
+2. "dump.rdb"
+3. "requirepass"
+4. (nil)
+5. "masterauth"
+6. (nil)
+7. "maxmemory"
+8. "0\n"
+9. "appendfsync"
+10. "everysec"
+11. "save"
+12. "3600 1 300 100 60 10000"
+
+$ redis-cli config get 'm*'
+1. "masterauth"
+2. (nil)
+3. "maxmemory"
+4. "0\n"
+
The return type of the command is a bulk reply.

CONFIG SET _parameter_ _value_

CONFIG SET is used in order to reconfigure the server, setting a specificconfiguration parameter to a new value.
+
The list of configuration parameters supported by CONFIG SET can beobtained issuing a CONFIG GET * command.
+
The configuration set using CONFIG SET is immediately loaded by the Redisserver that will start acting as specified starting from the next command.
+
Example:
+$ ./redis-cli 
+redis> set x 10
+OK
+redis> config set maxmemory 200
+OK
+redis> set y 20
+(error) ERR command not allowed when used memory > 'maxmemory'
+redis> config set maxmemory 0
+OK
+redis> set y 20
+OK
+

Parameters value format

The value of the configuration parameter is the same as the one of thesame parameter in the Redis configuration file, with the following exceptions:
+
  • The save paramter is a list of space-separated integers. Every pair of integers specify the time and number of changes limit to trigger a save. For instance the command CONFIG SET save "3600 10 60 10000" will configure the server to issue a background saving of the RDB file every 3600 seconds if there are at least 10 changes in the dataset, and every 60 seconds if there are at least 10000 changes. To completely disable automatic snapshots just set the parameter as an empty string.
  • All the integer parameters representing memory are returned and accepted only using bytes as unit.
+

See Also

The INFO command can be used in order to read configuriaton parameters that are not available in the CONFIG command. +
+ +
+
+ + + diff --git a/doc/EventLibray.html b/doc/EventLibray.html new file mode 100644 index 0000000000000000000000000000000000000000..45b9000352e1616b7c011106f8753f1925da1407 --- /dev/null +++ b/doc/EventLibray.html @@ -0,0 +1,44 @@ + + + + + + + +
+ + + +
+
+ +EventLibray: Contents
  Event Library
    Why is an Event Library needed at all? +
+ +

EventLibray

+ +
+ +
+ +
+ +

Event Library

Why is an Event Library needed at all?

Let us figure it out through a series of Q&As.

Q: What do you expect a network server to be doing all the time? <br/> +A: Watch for inbound connections on the port its listening and accept them.

Q: Calling accept yields a descriptor. What do I do with it?<br/> +A: Save the descriptor and do a non-blocking read/write operation on it.

Q: Why does the read/write have to be non-blocking?<br/> +A: If the file operation ( even a socket in Unix is a file ) is blocking how could the server for example accept other connection requests when its blocked in a file I/O operation.

Q: I guess I have to do many such non-blocking operations on the socket to see when it's ready. Am I right?<br/> +A: Yes. That is what an event library does for you. Now you get it.

Q: How do Event Libraries do what they do?<br/> +A: They use the operating system's polling facility along with timers.

Q: So are there any open source event libraries that do what you just described? <br/> +A: Yes. Libevent and Libev are two such event libraries that I can recall off the top of my head.

Q: Does Redis use such open source event libraries for handling socket I/O?<br/> +A: No. For various reasons Redis uses its own event library. +
+ +
+
+ + + diff --git a/doc/HackingStrings.html b/doc/HackingStrings.html new file mode 100644 index 0000000000000000000000000000000000000000..68ebb7a52bc5cd38e3439a8d677b08fa57a17425 --- /dev/null +++ b/doc/HackingStrings.html @@ -0,0 +1,83 @@ + + + + + + + +
+ + + +
+
+ +HackingStrings: Contents
  Hacking Strings
    Creating Redis Strings +
+ +

HackingStrings

+ +
+ +
+ +
+ +

Hacking Strings

The implementation of Redis strings is contained in sds.c ( sds stands for Simple Dynamic Strings ).

The C structure sdshdr declared in sds.h represents a Redis string:

+struct sdshdr {
+    long len;
+    long free;
+    char buf[];
+};
+
The buf character array stores the actual string.

The len field stores the length of buf. This makes obtaining the length +of a Redis string an O(1) operation.

The free field stores the number of additional bytes available for use.

Together the len and free field can be thought of as holding the metadata of the +buf character array.

Creating Redis Strings

A new data type named sds is defined in sds.h to be a synonymn for a character pointer:

+typedef char *sds;
+
sdsnewlen function defined in sds.c creates a new Redis String:

+sds sdsnewlen(const void *init, size_t initlen) {
+    struct sdshdr *sh;
+
+    sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
+#ifdef SDS_ABORT_ON_OOM
+    if (sh == NULL) sdsOomAbort();
+#else
+    if (sh == NULL) return NULL;
+#endif
+    sh->len = initlen;
+    sh->free = 0;
+    if (initlen) {
+        if (init) memcpy(sh->buf, init, initlen);
+        else memset(sh->buf,0,initlen);
+    }
+    sh->buf[initlen] = '\0';
+    return (char*)sh->buf;
+}
+
Remember a Redis string is a variable of type struct sdshdr. But sdsnewlen returns a character pointer!!

That's a trick and needs some explanation.

Suppose I create a Redis string using sdsnewlen like below:

+sdsnewlen("redis", 5);
+
This creates a new variable of type struct sdshdr allocating memory for len and free +fields as well as for the buf character array.

+sh = zmalloc(sizeof(struct sdshdr)+initlen+1); // initlen is length of init argument.
+
After sdsnewlen succesfully creates a Redis string the result is something like:

+-----------
+|5|0|redis|
+-----------
+^   ^
+sh  sh->buf 
+
sdsnewlen returns sh->buf to the caller.

What do you do if you need to free the Redis string pointed by sh?

You want the pointer sh but you only have the pointer sh->buf.

Can you get the pointer sh from sh->buf?

Yes. Pointer arithmetic. Notice from the above ASCII art that if you subtract +the size of two longs from sh->buf you get the pointer sh.

The sizeof two longs happens to be the size of struct sdshdr.

Look at sdslen function and see this trick at work:

+size_t sdslen(const sds s) {
+    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
+    return sh->len;
+}
+
Knowing this trick you could easily go through the rest of the functions in sds.c.

The Redis string implementation is hidden behind an interface that accepts only character pointers. The users of Redis strings need not care about how its implemented and treat Redis strings as a character pointer. +
+ +
+
+ + + diff --git a/doc/HashCommandsSidebar.html b/doc/HashCommandsSidebar.html new file mode 100644 index 0000000000000000000000000000000000000000..f4808af20bf217484484e597d7ea9db20ebb5f86 --- /dev/null +++ b/doc/HashCommandsSidebar.html @@ -0,0 +1,36 @@ + + + + + + + +
+ + + +
+
+ +HashCommandsSidebar: Contents +
+ +

HashCommandsSidebar

+ +
+ +
+ +
+ == Hash Commands ==

+
+ +
+
+ + + diff --git a/doc/Hashes.html b/doc/Hashes.html new file mode 100644 index 0000000000000000000000000000000000000000..e202f2cb8b841495ea428b59eb1705f9e26fc72b --- /dev/null +++ b/doc/Hashes.html @@ -0,0 +1,37 @@ + + + + + + + +
+ + + +
+
+ +Hashes: Contents
  Redis Hash Type
  Implementation details +
+ +

Hashes

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

Redis Hash Type

Redis Hashes are unordered maps of Redis Strings between fields and values. It is possible to add, remove, test for existence of fields in O(1) amortized time. It is also possible to enumerate all the keys, values, or both, in O(N) (where N is the number of fields inside the hash).

Redis Hashes are interesting because they are very well suited to represent objects. For instance web applications users can be represented by a Redis Hash containing fields such username, encrpypted_password, lastlogin, and so forth.

Another very important property of Redis Hashes is that they use very little memory for hashes composed of a small number of fields (configurable, check redis.conf for details), compared to storing every field as a top level Redis key. This is obtained using a different specialized representation for small hashes. See the implementation details paragraph below for more information.

Commands operating on hashes try to make a good use of the return value in order to signal the application about previous existence of fields. For instance the HSET command will return 1 if the field set was not already present in the hash, otherwise will return 0 (and the user knows this was just an update operation).

The max number of fields in a set is 232-1 (4294967295, more than 4 billion of members per hash).

Implementation details

The obvious internal representation of hashes is indeed an hash table, as the name of the data structure itself suggests. Still the drawback of this representation is that there is a lot of space overhead for hash table metadata.

Because one of the most interesting uses of Hashes is object encoding, and objects are often composed of a few fields each, Redis uses a different internal representation for small hashes (for Redis to consider a hash small, this must be composed a limited number of fields, and each field and value can't exceed a given number of bytes. All this is user-configurable).

Small hashes are thus encoded using a data structure called zipmap (is not something you can find in a CS book, the name is a Redis invention), that is a very memory efficient data structure to represent string to string maps, at the cost of being O(N) instead of O(1) for most operations. Since the constant times of this data structure are very small, and the zipmaps are converted into real hash tables once they are big enough, the amortized time of Redis hashes is still O(1), and in the practice small zipmaps are not slower than small hash tables because they are designed for good cache locality and fast access.

The result is that small hashes are both memory efficient and fast, while bigger hashes are fast but not as memory efficient than small hashes. + +
+ +
+
+ + + diff --git a/doc/HdelCommand.html b/doc/HdelCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..7afdb9355f2ef97612941224ec2bf34c92693779 --- /dev/null +++ b/doc/HdelCommand.html @@ -0,0 +1,39 @@ + + + + + + + +
+ + + +
+
+ +HdelCommand: Contents
  HDEL _key_ _field_ (Redis >
    Return value +
+ +

HdelCommand

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

HDEL _key_ _field_ (Redis >

1.3.10)= +Time complexity: O(1)
Remove the specified field from an hash stored at key.
+
If the field was present in the hash it is returned and 1 is returned, otherwise 0 is returned and no operation is performed.
+

Return value

Integer reply +
+ +
+
+ + + diff --git a/doc/HexistsCommand.html b/doc/HexistsCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..dde528a77e6fe5615df4806f72808cb341e1936f --- /dev/null +++ b/doc/HexistsCommand.html @@ -0,0 +1,39 @@ + + + + + + + +
+ + + +
+
+ +HexistsCommand: Contents
  HEXISTS _key_ _field_ (Redis >
    Return value +
+ +

HexistsCommand

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

HEXISTS _key_ _field_ (Redis >

1.3.10)= +Time complexity: O(1)
Return 1 if the hash stored at key contains the specified field.
+
Return 0 if the key is not found or the field is not present.
+

Return value

Integer reply +
+ +
+
+ + + diff --git a/doc/HgetCommand.html b/doc/HgetCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..e12889901bb3f07cf02c555e4c066f53b96e0414 --- /dev/null +++ b/doc/HgetCommand.html @@ -0,0 +1,39 @@ + + + + + + + +
+ + + +
+
+ +HgetCommand: Contents
  HGET _key_ _field_ (Redis >
    Return value +
+ +

HgetCommand

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

HGET _key_ _field_ (Redis >

1.3.10)= +Time complexity: O(1)
If key holds a hash, retrieve the value associated to the specified field.
+
If the field is not found or the key does not exist, a special 'nil' value is returned.
+

Return value

Bulk reply +
+ +
+
+ + + diff --git a/doc/HgetallCommand.html b/doc/HgetallCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..7d85c4156f7f4b6e2bced1ac22156349196409cd --- /dev/null +++ b/doc/HgetallCommand.html @@ -0,0 +1,40 @@ + + + + + + + +
+ + + +
+
+ +HgetallCommand: Contents
  HKEYS _key_ (Redis >
  HVALS _key_ (Redis >
  HGETALL _key_ (Redis >
    Return value +
+ +

HgetallCommand

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

HKEYS _key_ (Redis >

1.3.10)= +

HVALS _key_ (Redis >

1.3.10)= +

HGETALL _key_ (Redis >

1.3.10)= +Time complexity: O(N), where N is the total number of entries
HKEYS returns all the fields names contained into a hash, HVALS all the associated values, while HGETALL returns both the fields and values in the form of field1, value1, field2, value2, ..., fieldN, valueN.
+

Return value

Multi Bulk Reply +
+ +
+
+ + + diff --git a/doc/HincrbyCommand.html b/doc/HincrbyCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..69c9a61a0fceebcd5fec1ebfc70d6d8b6efc688b --- /dev/null +++ b/doc/HincrbyCommand.html @@ -0,0 +1,45 @@ + + + + + + + +
+ + + +
+
+ +HincrbyCommand: Contents
  HINCRBY _key_ _field_ _value_ (Redis >
    Examples
    Return value +
+ +

HincrbyCommand

+ +
+ +
+ +
+ +

HINCRBY _key_ _field_ _value_ (Redis >

1.3.10)= +Time complexity: O(1)
Increment the number stored at field in the hash at key by value. If key does not exist, a new key holding a hash is created. If field does not exist or holds a string, the value is set to 0 before applying the operation.
+
The range of values supported by HINCRBY is limited to 64 bit signed integers.

Examples

+Since the value argument is signed you can use this command to perform both increments and decrements:

+HINCRBY key field 1 (increment by one)
+HINCRBY key field -1 (decrement by one, just like the DECR command)
+HINCRBY key field -10 (decrement by 10)
+
+

Return value

Integer reply The new value at field after the increment operation. +
+ +
+
+ + + diff --git a/doc/HlenCommand.html b/doc/HlenCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..664ec9531373908f2e12faf7786e8e2f52f0eccb --- /dev/null +++ b/doc/HlenCommand.html @@ -0,0 +1,38 @@ + + + + + + + +
+ + + +
+
+ +HlenCommand: Contents
  HLEN _key_ (Redis >
    Return value +
+ +

HlenCommand

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

HLEN _key_ (Redis >

1.3.10)= +Time complexity: O(1)
Return the number of entries (fields) contained in the hash stored at key. If the specified key does not exist, 0 is returned assuming an empty hash.
+

Return value

Integer reply +
+ +
+
+ + + diff --git a/doc/HmsetCommand.html b/doc/HmsetCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..5b3deecae92da05fb38d668218db177e68f33352 --- /dev/null +++ b/doc/HmsetCommand.html @@ -0,0 +1,40 @@ + + + + + + + +
+ + + +
+ + +

HmsetCommand

+ +
+ +
+ +
+ +

HMSET _key_ _field1_ _value1_ ... _fieldN_ _valueN_ (Redis >

1.3.10) = +Time complexity: O(N) (with N being the number of fields)
Set the respective fields to the respective values. HMSET replaces old values with new values.
+
If key does not exist, a new key holding a hash is created.
+

Return value

Status code reply Always +OK because HMSET can't fail +
+ +
+
+ + + diff --git a/doc/HsetCommand.html b/doc/HsetCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..5d8070f2cc20eea3b883cdd3ed6e332a2c978f1b --- /dev/null +++ b/doc/HsetCommand.html @@ -0,0 +1,40 @@ + + + + + + + +
+ + + +
+
+ +HsetCommand: Contents
  HSET _key_ _field_ _value_ (Redis >
    Return value +
+ +

HsetCommand

+ +
+ +
+ +
+ #sidebar HashCommandsSidebar

HSET _key_ _field_ _value_ (Redis >

1.3.10)= +Time complexity: O(1)
Set the specified hash field to the specified value.
+
If key does not exist, a new key holding a hash is created.
+
If the field already exists, and the HSET just produced an update of thevalue, 0 is returned, otherwise if a new field is created 1 is returned.
+

Return value

Integer reply +
+ +
+
+ + + diff --git a/doc/HsetnxCommand.html b/doc/HsetnxCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..c72a6a93c6133e35eb086b692cf0c5d8622e2075 --- /dev/null +++ b/doc/HsetnxCommand.html @@ -0,0 +1,41 @@ + + + + + + + +
+ + + +
+
+ +HsetnxCommand: Contents
  HSETNX _key_ _field_ _value_ (Redis >
    Return value +
+ +

HsetnxCommand

+ +
+ +
+ +
+ +

HSETNX _key_ _field_ _value_ (Redis >

1.3.10)= +Time complexity: O(1)
Set the specified hash field to the specified value, if field does not exist yet.
+
If key does not exist, a new key holding a hash is created.
+
If the field already exists, this operation has no effect and returns 0.Otherwise, the field is set to value and the operation returns 1.
+

Return value

Integer reply +
+ +
+
+ + + diff --git a/doc/MultiExecCommand.html b/doc/MultiExecCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..e0a41983ab3fa98682feb3dad577cb74588b619d --- /dev/null +++ b/doc/MultiExecCommand.html @@ -0,0 +1,96 @@ + + + + + + + +
+ + + +
+
+ +MultiExecCommand: Contents
  MULTI
  COMMAND_1 ...
  COMMAND_2 ...
  COMMAND_N ...
  EXEC or DISCARD
    Usage
    The DISCARD command
    Return value +
+ +

MultiExecCommand

+ +
+ +
+ +
+ #sidebar GenericCommandsSidebar

MULTI

+

COMMAND_1 ...

+

COMMAND_2 ...

+

COMMAND_N ...

+

EXEC or DISCARD

MULTI, EXEC and DISCARD commands are the fundation of Redis Transactions.A Redis Transaction allows to execute a group of Redis commands in a singlestep, with two important guarantees:
+
  • All the commands in a transaction are serialized and executed sequentially. It can never happen that a request issued by another client is served in the middle of the execution of a Redis transaction. This guarantees that the commands are executed as a single atomic operation.
  • Either all of the commands or none are processed. The EXEC command triggers the execution of all the commands in the transaction, so if a client loses the connection to the server in the context of a transaction before calling the MULTI command none of the operations are performed, instead if the EXEC command is called, all the operations are performed. An exception to this rule is when the Append Only File is enabled: every command that is part of a Redis transaction will log in the AOF as long as the operation is completed, so if the Redis server crashes or is killed by the system administrator in some hard way it is possible that only a partial number of operations are registered.
+

Usage

A Redis transaction is entered using the MULTI command. The command alwaysreplies with OK. At this point the user can issue multiple commands. Insteadto execute this commands Redis will "queue" them. All the commands areexecuted once EXEC is called.
+
Calling DISCARD instead will flush the transaction queue and will exitthe transaction.
+
The following is an example using the Ruby client:
+?> r.multi
+=> "OK"
+>> r.incr "foo"
+=> "QUEUED"
+>> r.incr "bar"
+=> "QUEUED"
+>> r.incr "bar"
+=> "QUEUED"
+>> r.exec
+=> [1, 1, 2]
+
+
As it is possible to see from the session above, MULTI returns an "array" ofreplies, where every element is the reply of a single command in thetransaction, in the same order the commands were queued.
+
When a Redis connection is in the context of a MULTI request, all the commandswill reply with a simple string "QUEUED" if they are correct from thepoint of view of the syntax and arity (number of arguments) of the commaand.Some command is still allowed to fail during execution time.
+
This is more clear if at protocol level: in the following example one commandwill fail when executed even if the syntax is right:
+Trying 127.0.0.1...
+Connected to localhost.
+Escape character is '^]'.
+MULTI
++OK
+SET a 3 
+abc
++QUEUED
+LPOP a
++QUEUED
+EXEC
+*2
++OK
+-ERR Operation against a key holding the wrong kind of value
+
+
MULTI returned a two elements bulk reply in witch one of this is a +OKcode and one is a -ERR reply. It's up to the client lib to find a sensibleway to provide the error to the user.
+
IMPORTANT: even when a command will raise an error, all the other commandsin the queue will be processed. Redis will NOT stop the processing ofcommands once an error is found.
+
Another example, again using the write protocol with telnet, shows howsyntax errors are reported ASAP instead:
+MULTI
++OK
+INCR a b c
+-ERR wrong number of arguments for 'incr' command
+
+
This time due to the syntax error the "bad" INCR command is not queuedat all.
+

The DISCARD command

DISCARD can be used in order to abort a transaction. No command will beexecuted, and the state of the client is again the normal one, outsideof a transaction. Example using the Ruby client:
+?> r.set("foo",1)
+=> true
+>> r.multi
+=> "OK"
+>> r.incr("foo")
+=> "QUEUED"
+>> r.discard
+=> "OK"
+>> r.get("foo")
+=> "1"
+

Return value

Multi bulk reply, specifically:

+The result of a MULTI/EXEC command is a multi bulk reply where every element is the return value of every command in the atomic transaction.
+
+
+ +
+
+ + + diff --git a/doc/PublishSubscribe.html b/doc/PublishSubscribe.html new file mode 100644 index 0000000000000000000000000000000000000000..c3d5d060854d5c606785946205e7a728f6229c3f --- /dev/null +++ b/doc/PublishSubscribe.html @@ -0,0 +1,116 @@ + + + + + + + +
+ + + +
+ + +

PublishSubscribe

+ +
+ +
+ +
+ =SUBSCRIBE channel_1 channel_2 ... channel_N= +

UNSUBSCRIBE channel_1 channel_2 ... channel_N

+

UNSUBSCRIBE (unsubscribe from all channels)

+

PSUBSCRIBE pattern_1 pattern_2 ... pattern_N

+

PUNSUBSCRIBE pattern_1 pattern_2 ... pattern_N

+

PUNSUBSCRIBE (unsubscribe from all patterns)

+

PUBLISH channel message

Time complexity: subscribe is O(1), unsubscribe is O(N) where N is the number of clients already subscribed to a channel, publish is O(N+M) where N is the number of clients subscribed to the receiving channel, and M is the total number of subscribed patterns (by any client). Psubscribe is O(N) where N is the number of patterns the Psubscribing client is already subscribed to. Punsubscribe is O(N+M) where N is the number of patterns the Punsubscribing client is already subscribed and M is the number of total patterns subscribed in the system (by any client).

SUBSCRIBE, UNSUBSCRIBE and PUBLISH commands implement thePublish/Subscribe messaging paradigm where (citing Wikipedia) senders (publishers) are not programmed to send their messages to specific receivers (subscribers). Rather, published messages are characterized into channels, without knowledge of what (if any) subscribers there may be. Subscribers express interest in one or more channels, and only receive messages that are of interest, without knowledge of what (if any) publishers there are. This decoupling of publishers and subscribers can allow for greater scalability and a more dynamic network topology.
+
For instance in order to subscribe to the channels foo and bar the clientwill issue the SUBSCRIBE command followed by the names of the channels.
+SUBSCRIBE foo bar
+
+
All the messages sent by other clients to this channels will be pushed bythe Redis server to all the subscribed clients, in the form of a threeelements bulk reply, where the first element is the message type, thesecond the originating channel, and the third argument the message payload.
+
A client subscribed to 1 or more channels should NOT issue other commandsother than SUBSCRIBE and UNSUBSCRIBE, but can subscribe or unsubscribeto other channels dynamically.
+
The reply of the SUBSCRIBE and UNSUBSCRIBE operations are sent in the formof messages, so that the client can just read a coherent stream of messageswhere the first element indicates the kind of message.

Format of pushed messages

+
Messages are in the form of multi bulk replies with three elements.The first element is the kind of message:
  • "subscribe": means that we successfully subscribed to the channel given as second element of the multi bulk reply. The third argument represents the number of channels we are currently subscribed to.
  • "unsubscribe": means that we successfully unsubscribed from the channel given as second element of the multi bulk reply. The third argument represents the number of channels we are currently subscribed to. If this latest argument is zero, we are no longer subscribed to any channel, and the client can issue any kind of Redis command as we are outside the Pub/sub state.
  • "message": it is a message received as result of a PUBLISH command issued by another client. The second element is the name of the originating channel, and the third the actual message payload.

Unsubscribing from all the channels at once

+If the UNSUBSCRIBE command is issued without additional arguments, it is equivalent to unsubscribing to all the channels we are currently subscribed. A message for every unsubscribed channel will be received. +

Wire protocol example

+
+SUBSCRIBE first second
+*3
+$9
+subscribe
+$5
+first
+:1
+*3
+$9
+subscribe
+$6
+second
+:2
+
+at this point from another client we issue a PUBLISH operation against the channel named "second". This is what the first client receives: +
+*3
+$7
+message
+$6
+second
+$5
+Hello
+
+Now the client unsubscribes itself from all the channels using the UNSUBSCRIBE command without additional arguments: +
+UNSUBSCRIBE
+*3
+$11
+unsubscribe
+$6
+second
+:1
+*3
+$11
+unsubscribe
+$5
+first
+:0
+
+

PSUBSCRIBE and PUNSUBSCRIBE: pattern matching subscriptions

+Redis Pub/Sub implementation supports pattern matching. Clients may subscribe to glob style patterns in order to receive all the messages sent to channel names matching a given pattern.

For instance the command: +
+PSUBSCRIBE news.*
+
+Will receive all the messages sent to the channel news.art.figurative and news.music.jazz and so forth. All the glob style patterns as valid, so multiple wild cards are supported.

Messages received as a result of pattern matching are sent in a different format: +
  • The type of the message is "pmessage": it is a message received as result of a PUBLISH command issued by another client, matching a pattern matching subscription. The second element is the original pattern matched, the third element is the name of the originating channel, and the last element the actual message payload.
+Similarly to SUBSCRIBE and UNSUBSCRIBE, PSUBSCRIBE and PUNSUBSCRIBE commands are acknowledged by the system sending a message of type "psubscribe" and "punsubscribe" using the same format as the "subscribe" and "unsubscribe" message format. +

Messages matching both a pattern and a channel subscription

+A client may receive a single message multiple time if it's subscribed to multiple patterns matching a published message, or it is subscribed to both patterns and channels matching the message. Like in the following example: +
+SUBSCRIBE foo
+PSUBSCRIBE f*
+
+In the above example, if a message is sent to the foo channel, the client will receive two messages, one of type "message" and one of type "pmessage". +

The meaning of the count of subscriptions with pattern matching

+In subscribe, unsubscribe, psubscribe and punsubscribe message types, the last argument is the count of subscriptions still active. This number is actually the total number of channels and patterns the client is still subscribed to. So the client will exit the Pub/Sub state only when this count will drop to zero as a result of unsubscription from all the channels and patterns. +

More details on the PUBLISH command

+The Publish command is a bulk command where the first argument is the target class, and the second argument the data to send. It returns an Integer Reply representing the number of clients that received the message (that is, the number of clients that were listening for this class). +

Programming Example

+Pieter Noordhuis provided a great example using Event-machine and Redis to create a multi user high performance web chat, with source code included of course! +

Client library implementations hints

+Because all the messages received contain the original subscription causing the message delivery (the channel in the case of "message" type, and the original pattern in the case of "pmessage" type) clinet libraries may bind the original subscription to callbacks (that can be anonymous functions, blocks, function pointers, and so forth), using an hash table.

When a message is received an O(1) lookup can be done in order to deliver the message to the registered callback. + +
+ +
+
+ + + diff --git a/doc/RedisBigData.html b/doc/RedisBigData.html new file mode 100644 index 0000000000000000000000000000000000000000..2477b1ac277a044fd7add2bd32ec1417d4d0b09a --- /dev/null +++ b/doc/RedisBigData.html @@ -0,0 +1,61 @@ + + + + + + + +
+ + + +
+ + +

RedisBigData

+ +
+ +
+ +
+ =Redis Big Data: facts and guidelines=

BGSAVE and BGREWRITEAOF blocking fork() call

+
+fork.c && ./a.out
+allocated:     1 MB, fork() took 0.000
+allocated:    10 MB, fork() took 0.001
+allocated:   100 MB, fork() took 0.007
+allocated:  1000 MB, fork() took 0.059
+allocated: 10000 MB, fork() took 0.460
+allocated: 20000 MB, fork() took 0.895
+allocated: 30000 MB, fork() took 1.327
+allocated: 40000 MB, fork() took 1.759
+allocated: 50000 MB, fork() took 2.190
+allocated: 60000 MB, fork() took 2.621
+allocated: 70000 MB, fork() took 3.051
+allocated: 80000 MB, fork() took 3.483
+allocated: 90000 MB, fork() took 3.911
+allocated: 100000 MB, fork() took 4.340
+allocated: 110000 MB, fork() took 4.770
+allocated: 120000 MB, fork() took 5.202
+
+

Using multiple cores

+

Splitting data into multiple instances

+

BGSAVE / AOFSAVE memory usage, and copy on write

+

BGSAVE / AOFSAVE time for big datasets

+

Non blocking hash table

+ +
+ +
+
+ + + diff --git a/doc/RedisEventLibrary.html b/doc/RedisEventLibrary.html new file mode 100644 index 0000000000000000000000000000000000000000..4b641a3c50ba6f5c0f18600cb8dbe2b8575f05a6 --- /dev/null +++ b/doc/RedisEventLibrary.html @@ -0,0 +1,70 @@ + + + + + + + +
+ + + +
+
+ +RedisEventLibrary: Contents
  Redis Event Library
    Event Loop Initialization
      aeCreateEventLoop
      aeCreateTimeEvent
      aeCreateFileEvent
    Event Loop Processing
      aeProcessEvents
      processTimeEvents +
+ +

RedisEventLibrary

+ +
+ +
+ +
+ #sidebar RedisInternals

Redis Event Library

Redis implements its own event library. The event library is implemented in ae.c.

The best way to understand how the Redis event library works is to understand how Redis uses it.

Event Loop Initialization

+initServer function defined in redis.c initializes the numerous fields of the redisServer structure variable. One such field is the Redis event loop el:

 
+aeEventLoop *el 
+
initServer initializes server.el field by calling aeCreateEventLoop defined in ae.c. The definition of aeEventLoop is below: +
+typedef struct aeEventLoop 
+{
+    int maxfd;
+    long long timeEventNextId;
+    aeFileEvent events[AE_SETSIZE]; /* Registered events */
+    aeFiredEvent fired[AE_SETSIZE]; /* Fired events */
+    aeTimeEvent *timeEventHead;
+    int stop;
+    void *apidata; /* This is used for polling API specific data */
+    aeBeforeSleepProc *beforesleep;
+} aeEventLoop;
+

aeCreateEventLoop

aeCreateEventLoop first mallocs aeEventLoop structure then calls ae_epoll.c:aeApiCreate. + +aeApiCreate mallocs aeApiState that has two fields - epfd that holds the epoll file descriptor returned by a call from [http://man.cx/epoll_create%282%29 epoll_create] and events that is of type struct epoll_event define by the Linux epoll library. The use of the events field will be described later. + +Next is 'ae.c:aeCreateTimeEvent. But before that initServer call anet.c:anetTcpServer that creates and returns a listening descriptor. The descriptor is listens to port 6379 by default. The returned listening descriptor is stored in server.fd field.

aeCreateTimeEvent

aeCreateTimeEvent accepts the following as parameters:

  • eventLoop: This is server.el in redis.c
  • milliseconds: The number of milliseconds from the curent time after which the timer expires.
  • proc: Function pointer. Stores the address of the function that has to be called after the timer expires.
  • clientData: Mostly NULL.
  • finalizerProc: Pointer to the function that has to be called before the timed event is removed from the list of timed events.
+initServer calls aeCreateTimeEvent to add a timed event to timeEventHead field of server.el. timeEventHead is a pointer to a list of such timed events. The call to aeCreateTimeEvent from redis.c:initServer function is given below:

+aeCreateTimeEvent(server.el /*eventLoop*/, 1 /*milliseconds*/, serverCron /*proc*/, NULL /*clientData*/, NULL /*finalizerProc*/);
+
redis.c:serverCron performs many operations that helps keep Redis running properly.

aeCreateFileEvent

The essence of aeCreateFileEvent function is to execute epoll_ctl system call which adds a watch for EPOLLIN event on the listening descriptor create by anetTcpServer and associate it with the epoll descriptor created by a call to aeCreateEventLoop.

Following is an explanation of what precisely aeCreateFileEvent does when called from redis.c:initServer.

initServer passes the following arguments to aeCreateFileEvent: +
  • server.el: The event loop created by aeCreateEventLoop. The epoll descriptor is got from server.el.
  • server.fd: The listening descriptor that also serves as an index to access the relevant file event structure from the eventLoop->events table and store extra information like the callback function.
  • AE_READABLE: Signifies that server.fd has to be watched for EPOLLIN event.
  • acceptHandler: The function that has to be executed when the event being watched for is ready. This function pointer is stored in eventLoop->events[server.fd]->rfileProc.
+This completes the initialization of Redis event loop.

Event Loop Processing

ae.c:aeMain called from redis.c:main does the job of processing the event loop that is initialized in the previous phase.

ae.c:aeMain calls ae.c:aeProcessEvents in a while loop that processes pending time and file events.

aeProcessEvents

ae.c:aeProcessEvents looks for the time event that will be pending in the smallest amount of time by calling ae.c:aeSearchNearestTimer on the event loop. In our case there is only one timer event in the event loop that was created by ae.c:aeCreateTimeEvent.

Remember, that timer event created by aeCreateTimeEvent has by now probably elapsed because it had a expiry time of one millisecond. Since, the timer has already expired the seconds and microseconds fields of the tvp timeval structure variable is initialized to zero.

The tvp structure variable along with the event loop variable is passed to ae_epoll.c:aeApiPoll.

aeApiPoll functions does a epoll_wait on the epoll descriptor and populates the eventLoop->fired table with the details: +
  • fd: The descriptor that is now ready to do a read/write operation depending on the mask value. The
  • mask: The read/write event that can now be performed on the corresponding descriptor.
+aeApiPoll returns the number of such file events ready for operation. Now to put things in context, if any client has requested for a connection then aeApiPoll would have noticed it and populated the eventLoop->fired table with an entry of the descriptor being the listening descriptor and mask being AE_READABLE.

Now, aeProcessEvents calls the redis.c:acceptHandler registered as the callback. acceptHandler executes [http://man.cx/accept(2) accept] on the listening descriptor returning a connected descriptor with the client. redis.c:createClient adds a file event on the connected descriptor through a call to ae.c:aeCreateFileEvent like below:

+    if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
+        readQueryFromClient, c) == AE_ERR) {
+        freeClient(c);
+        return NULL;
+    }
+
c is the redisClient structure variable and c->fd is the connected descriptor.

Next the ae.c:aeProcessEvent calls ae.c:processTimeEvents

processTimeEvents

ae.processTimeEvents iterates over list of time events starting at eventLoop->timeEventHead.

For every timed event that has elapsed processTimeEvents calls the registered callback. In this case it calls the only timed event callback registered, that is, redis.c:serverCron. The callback returns the time in milliseconds after which the callback must be called again. This change is recorded via a call to ae.c:aeAddMilliSeconds and will be handled on the next iteration of ae.c:aeMain while loop.

That's all. + +
+ +
+
+ + + diff --git a/doc/RedisGuides.html b/doc/RedisGuides.html new file mode 100644 index 0000000000000000000000000000000000000000..b133bd6ea937d9ef3c084d5d9ee3bf7cdd5098f7 --- /dev/null +++ b/doc/RedisGuides.html @@ -0,0 +1,37 @@ + + + + + + + +
+ + + +
+
+ +RedisGuides: Contents +
+ +

RedisGuides

+ +
+ +
+ + + +
+
+ + + diff --git a/doc/RedisInternals.html b/doc/RedisInternals.html new file mode 100644 index 0000000000000000000000000000000000000000..231310986fcdb9c289a57351aa2883acdaca7cff --- /dev/null +++ b/doc/RedisInternals.html @@ -0,0 +1,38 @@ + + + + + + + +
+ + + +
+
+ +RedisInternals: Contents
  Redis Internals
    Redis STRINGS
    Redis Virtual Memory
    Redis Event Library +
+ +

RedisInternals

+ +
+ +
+ +
+

Redis Internals

This is a source code level documentation of Redis.

Redis STRINGS

String is the basic building block of Redis types.

Redis is a key-value store. +All Redis keys are strings and its also the simplest value type.

Lists, sets, sorted sets and hashes are other more complex value types and even +these are composed of strings.

Hacking Strings documents the Redis String implementation details.

Redis Virtual Memory

A technical specification full of details about the Redis Virtual Memory subsystem

Redis Event Library

Read event library to understand what an event library does and why its needed.

Redis event library documents the implementation details of the event library used by Redis +
+ +
+
+ + + diff --git a/doc/SetexCommand.html b/doc/SetexCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..7c766d85cd1de75f4697f3d8d8e0f5f3fa87b108 --- /dev/null +++ b/doc/SetexCommand.html @@ -0,0 +1,42 @@ + + + + + + + +
+ + + +
+
+ +SetexCommand: Contents
  SETEX _key_ _time_ _value_
    Return value +
+ +

SetexCommand

+ +
+ +
+ +
+ #sidebar StringCommandsSidebar

SETEX _key_ _time_ _value_

+Time complexity: O(1)
The command is exactly equivalent to the following group of commands:
+SET _key_ _value_
+EXPIRE _key_ _time_
+
+
The operation is atomic. An atomic SET+EXPIRE operation was already providedusing MULTI/EXEC, but SETEX is a faster alternative providedbecause this operation is very common when Redis is used as a Cache.
+

Return value

Status code reply +
+ +
+
+ + + diff --git a/doc/SubstrCommand.html b/doc/SubstrCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..3b6bdef0aa449c353b9cab371df2d3a70e552638 --- /dev/null +++ b/doc/SubstrCommand.html @@ -0,0 +1,51 @@ + + + + + + + +
+ + + +
+
+ +SubstrCommand: Contents
  SUBSTR _key_ _start_ _end_
    Return value
    Examples +
+ +

SubstrCommand

+ +
+ +
+ +
+ #sidebar StringCommandsSidebar

SUBSTR _key_ _start_ _end_

+Time complexity: O(start+n) (with start being the start index and n the total length of the requested range). Note that the lookup part of this command is O(1) so for small strings this is actually an O(1) command.
Return a subset of the string from offset start to offset end(both offsets are inclusive).Negative offsets can be used in order to provide an offset starting fromthe end of the string. So -1 means the last char, -2 the penultimate andso forth.
+
The function handles out of range requests without raising an error, butjust limiting the resulting range to the actual length of the string.
+

Return value

Bulk reply

Examples

+redis> set s "This is a string"
+OK
+redis> substr s 0 3
+"This"
+redis> substr s -3 -1
+"ing"
+redis> substr s 0 -1
+"This is a string"
+redis> substr s 9 100000
+" string"
+
+ +
+ +
+
+ + + diff --git a/doc/VirtualMemorySpecification.html b/doc/VirtualMemorySpecification.html new file mode 100644 index 0000000000000000000000000000000000000000..28f440dd3eb979a58edd155a224ab811c38b7e17 --- /dev/null +++ b/doc/VirtualMemorySpecification.html @@ -0,0 +1,156 @@ + + + + + + + +
+ + + +
+ + +

VirtualMemorySpecification

+ +
+ +
+ +
+ #sidebar RedisInternals

Virtual Memory technical specification

This document details the internals of the Redis Virtual Memory subsystem. The intended audience is not the final user but programmers willing to understand or modify the Virtual Memory implementation.

Keys vs Values: what is swapped out?

+The goal of the VM subsystem is to free memory transferring Redis Objects from memory to disk. This is a very generic command, but specifically, Redis transfers only objects associated with values. In order to understand better this concept we'll show, using the DEBUG command, how a key holding a value looks from the point of view of the Redis internals: +
+redis> set foo bar
+OK
+redis> debug object foo
+Key at:0x100101d00 refcount:1, value at:0x100101ce0 refcount:1 encoding:raw serializedlength:4
+
+As you can see from the above output, the Redis top level hash table maps Redis Objects (keys) to other Redis Objects (values). The Virtual Memory is only able to swap values on disk, the objects associated to keys are always taken in memory: this trade off guarantees very good lookup performances, as one of the main design goals of the Redis VM is to have performances similar to Redis with VM disabled when the part of the dataset frequently used fits in RAM.

How does a swapped value looks like internally

+When an object is swapped out, this is what happens in the hash table entry: +
  • The key continues to hold a Redis Object representing the key.
  • The value is set to NULL
So you may wonder where we store the information that a given value (associated to a given key) was swapped out. Just in the key object!

This is how the Redis Object structure robj looks like: +
+/* The actual Redis Object */
+typedef struct redisObject {
+    void *ptr;
+    unsigned char type;
+    unsigned char encoding;
+    unsigned char storage;  /* If this object is a key, where is the value?
+                             * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
+    unsigned char vtype; /* If this object is a key, and value is swapped out,
+                          * this is the type of the swapped out object. */
+    int refcount;
+    /* VM fields, this are only allocated if VM is active, otherwise the
+     * object allocation function will just allocate
+     * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
+     * Redis without VM active will not have any overhead. */
+    struct redisObjectVM vm;
+} robj;
+
+As you can see there are a few fields about VM. The most important one is storage, that can be one of this values: +
  • REDIS_VM_MEMORY: the associated value is in memory.
  • REDIS_VM_SWAPPED: the associated values is swapped, and the value entry of the hash table is just set to NULL.
  • REDIS_VM_LOADING: the value is swapped on disk, the entry is NULL, but there is a job to load the object from the swap to the memory (this field is only used when threaded VM is active).
  • REDIS_VM_SWAPPING: the value is in memory, the entry is a pointer to the actual Redis Object, but there is an I/O job in order to transfer this value to the swap file.
If an object is swapped on disk (REDIS_VM_SWAPPED or REDIS_VM_LOADING), how do we know where it is stored, what type it is, and so forth? That's simple: the vtype field is set to the original type of the Redis object swapped, while the vm field (that is a redisObjectVM structure) holds information about the location of the object. This is the definition of this additional structure: +
+/* The VM object structure */
+struct redisObjectVM {
+    off_t page;         /* the page at which the object is stored on disk */
+    off_t usedpages;    /* number of pages used on disk */
+    time_t atime;       /* Last access time */
+} vm;
+
+As you can see the structure contains the page at which the object is located in the swap file, the number of pages used, and the last access time of the object (this is very useful for the algorithm that select what object is a good candidate for swapping, as we want to transfer on disk objects that are rarely accessed).

As you can see, while all the other fields are using unused bytes in the old Redis Object structure (we had some free bit due to natural memory alignment concerns), the vm field is new, and indeed uses additional memory. Should we pay such a memory cost even when VM is disabled? No! This is the code to create a new Redis Object: +
+... some code ...
+        if (server.vm_enabled) {
+            pthread_mutex_unlock(&server.obj_freelist_mutex);
+            o = zmalloc(sizeof(*o));
+        } else {
+            o = zmalloc(sizeof(*o)-sizeof(struct redisObjectVM));
+        }
+... some code ...
+
+As you can see if the VM system is not enabled we allocate just sizeof(*o)-sizeof(struct redisObjectVM) of memory. Given that the vm field is the last in the object structure, and that this fields are never accessed if VM is disabled, we are safe and Redis without VM does not pay the memory overhead.

The Swap File

+The next step in order to understand how the VM subsystem works is understanding how objects are stored inside the swap file. The good news is that's not some kind of special format, we just use the same format used to store the objects in .rdb files, that are the usual dump files produced by Redis using the SAVE command.

The swap file is composed of a given number of pages, where every page size is a given number of bytes. This parameters can be changed in redis.conf, since different Redis instances may work better with different values: it depends on the actual data you store inside it. The following are the default values: +
+vm-page-size 32
+vm-pages 134217728
+
+Redis takes a "bitmap" (an contiguous array of bits set to zero or one) in memory, every bit represent a page of the swap file on disk: if a given bit is set to 1, it represents a page that is already used (there is some Redis Object stored there), while if the corresponding bit is zero, the page is free.

Taking this bitmap (that will call the page table) in memory is a huge win in terms of performances, and the memory used is small: we just need 1 bit for every page on disk. For instance in the example below 134217728 pages of 32 bytes each (4GB swap file) is using just 16 MB of RAM for the page table.

Transfering objects from memory to swap

+In order to transfer an object from memory to disk we need to perform the following steps (assuming non threaded VM, just a simple blocking approach):

  • Find how many pages are needed in order to store this object on the swap file. This is trivially accomplished just calling the function rdbSavedObjectPages that returns the number of pages used by an object on disk. Note that this function does not duplicate the .rdb saving code just to understand what will be the length after an object will be saved on disk, we use the trick of opening /dev/null and writing the object there, finally calling ftello in order check the amount of bytes required. What we do basically is to save the object on a virtual very fast file, that is, /dev/null.
  • Now that we know how many pages are required in the swap file, we need to find this number of contiguous free pages inside the swap file. This task is accomplished by the vmFindContiguousPages function. As you can guess this function may fail if the swap is full, or so fragmented that we can't easily find the required number of contiguous free pages. When this happens we just abort the swapping of the object, that will continue to live in memory.
  • Finally we can write the object on disk, at the specified position, just calling the function vmWriteObjectOnSwap.
As you can guess once the object was correctly written in the swap file, it is freed from memory, the storage field in the associated key is set to REDIS_VM_SWAPPED, and the used pages are marked as used in the page table. +

Loading objects back in memory

+Loading an object from swap to memory is simpler, as we already know where the object is located and how many pages it is using. We also know the type of the object (the loading functions are required to know this information, as there is no header or any other information about the object type on disk), but this is stored in the vtype field of the associated key as already seen above.

Calling the function vmLoadObject passing the key object associated to the value object we want to load back is enough. The function will also take care of fixing the storage type of the key (that will be REDIS_VM_MEMORY), marking the pages as freed in the page table, and so forth.

The return value of the function is the loaded Redis Object itself, that we'll have to set again as value in the main hash table (instead of the NULL value we put in place of the object pointer when the value was originally swapped out). +

How blocking VM works

+Now we have all the building blocks in order to describe how the blocking VM works. First of all, an important detail about configuration. In order to enable blocking VM in Redis server.vm_max_threads must be set to zero. +We'll see later how this max number of threads info is used in the threaded VM, for now all it's needed to now is that Redis reverts to fully blocking VM when this is set to zero.

We also need to introduce another important VM parameter, that is, server.vm_max_memory. This parameter is very important as it is used in order to trigger swapping: Redis will try to swap objects only if it is using more memory than the max memory setting, otherwise there is no need to swap as we are matching the user requested memory usage. +

Blocking VM swapping

+Swapping of object from memory to disk happens in the cron function. This function used to be called every second, while in the recent Redis versions on git it is called every 100 milliseconds (that is, 10 times per second). +If this function detects we are out of memory, that is, the memory used is greater than the vm-max-memory setting, it starts transferring objects from memory to disk in a loop calling the function vmSwapOneObect. This function takes just one argument, if 0 it will swap objects in a blocking way, otherwise if it is 1, I/O threads are used. In the blocking scenario we just call it with zero as argument.

vmSwapOneObject acts performing the following steps: +
  • The key space in inspected in order to find a good candidate for swapping (we'll see later what a good candidate for swapping is).
  • The associated value is transfered to disk, in a blocking way.
  • The key storage field is set to REDIS_VM_SWAPPED, while the vm fields of the object are set to the right values (the page index where the object was swapped, and the number of pages used to swap it).
  • Finally the value object is freed and the value entry of the hash table is set to NULL.
The function is called again and again until one of the following happens: there is no way to swap more objects because either the swap file is full or nearly all the objects are already transfered on disk, or simply the memory usage is already under the vm-max-memory parameter. +

What values to swap when we are out of memory?

+Understanding what's a good candidate for swapping is not too hard. A few objects at random are sampled, and for each their swappability is commuted as: +
+swappability = age*log(size_in_memory)
+
+The age is the number of seconds the key was not requested, while size_in_memory is a fast estimation of the amount of memory (in bytes) used by the object in memory. So we try to swap out objects that are rarely accessed, and we try to swap bigger objects over smaller one, but the latter is a less important factor (because of the logarithmic function used). This is because we don't want bigger objects to be swapped out and in too often as the bigger the object the more I/O and CPU is required in order to transfer it. +

Blocking VM loading

+What happens if an operation against a key associated with a swapped out object is requested? For instance Redis may just happen to process the following command: +
+GET foo
+
+If the value object of the foo key is swapped we need to load it back in memory before processing the operation. In Redis the key lookup process is centralized in the lookupKeyRead and lookupKeyWrite functions, this two functions are used in the implementation of all the Redis commands accessing the keyspace, so we have a single point in the code where to handle the loading of the key from the swap file to memory.

So this is what happens: +
  • The user calls some command having as argumenet a swapped key
  • The command implementation calls the lookup function
  • The lookup function search for the key in the top level hash table. If the value associated with the requested key is swapped (we can see that checking the storage field of the key object), we load it back in memory in a blocking way before to return to the user.
+This is pretty straightforward, but things will get more interesting with the threads. From the point of view of the blocking VM the only real problem is the saving of the dataset using another process, that is, handling BGSAVE and BGREWRITEAOF commands. +

Background saving when VM is active

+The default Redis way to persist on disk is to create .rdb files using a child process. Redis calls the fork() system call in order to create a child, that has the exact copy of the in memory dataset, since fork duplicates the whole program memory space (actually thanks to a technique called Copy on Write memory pages are shared between the parent and child process, so the fork() call will not require too much memory).

In the child process we have a copy of the dataset in a given point in the time. Other commands issued by clients will just be served by the parent process and will not modify the child data.

The child process will just store the whole dataset into the dump.rdb file and finally will exit. But what happens when the VM is active? Values can be swapped out so we don't have all the data in memory, and we need to access the swap file in order to retrieve the swapped values. While child process is saving the swap file is shared between the parent and child process, since: +
  • The parent process needs to access the swap file in order to load values back into memory if an operation against swapped out values are performed.
  • The child process needs to access the swap file in order to retrieve the full dataset while saving the data set on disk.
In order to avoid problems while both the processes are accessing the same swap file we do a simple thing, that is, not allowing values to be swapped out in the parent process while a background saving is in progress. This way both the processes will access the swap file in read only. This approach has the problem that while the child process is saving no new values can be transfered on the swap file even if Redis is using more memory than the max memory parameters dictates. This is usually not a problem as the background saving will terminate in a short amount of time and if still needed a percentage of values will be swapped on disk ASAP.

An alternative to this scenario is to enable the Append Only File that will have this problem only when a log rewrite is performed using the BGREWRITEAOF command. +

The problem with the blocking VM

+The problem of blocking VM is that... it's blocking :) +This is not a problem when Redis is used in batch processing activities, but for real-time usage one of the good points of Redis is the low latency. The blocking VM will have bad latency behaviors as when a client is accessing a swapped out value, or when Redis needs to swap out values, no other clients will be served in the meantime.

Swapping out keys should happen in background. Similarly when a client is accessing a swapped out value other clients accessing in memory values should be served mostly as fast as when VM is disabled. Only the clients dealing with swapped out keys should be delayed.

All this limitations called for a non-blocking VM implementation. +

Threaded VM

+There are basically three main ways to turn the blocking VM into a non blocking one. +
  • 1: One way is obvious, and in my opionion, not a good idea at all, that is, turning Redis itself into a theaded server: if every request is served by a different thread automatically other clients don't need to wait for blocked ones. Redis is fast, exports atomic operations, has no locks, and is just 10k lines of code, because it is single threaded, so this was not an option for me.
  • 2: Using non-blocking I/O against the swap file. After all you can think Redis already event-loop based, why don't just handle disk I/O in a non-blocking fashion? I also discarded this possiblity because of two main reasons. One is that non blocking file operations, unlike sockets, are an incompatibility nightmare. It's not just like calling select, you need to use OS-specific things. The other problem is that the I/O is just one part of the time consumed to handle VM, another big part is the CPU used in order to encode/decode data to/from the swap file. This is I picked option three, that is...
  • 3: Using I/O threads, that is, a pool of threads handling the swap I/O operations. This is what the Redis VM is using, so let's detail how this works.

I/O Threads

+The threaded VM design goals where the following, in order of importance: +
  • Simple implementation, little room for race condtions, simple locking, VM system more or less completeley decoupled from the rest of Redis code.
  • Good performances, no locks for clients accessing values in memory.
  • Ability to decode/encode objects in the I/O threads.
The above goals resulted in an implementation where the Redis main thread (the one serving actual clients) and the I/O threads communicate using a queue of jobs, with a single mutex. +Basically when main thread requires some work done in the background by some I/O thread, it pushes an I/O job structure in the server.io_newjobs queue (that is, just a linked list). If there are no active I/O threads, one is started. At this point some I/O thread will process the I/O job, and the result of the processing is pushed in the server.io_processed queue. The I/O thread will send a byte using an UNIX pipe to the main thread in order to signal that a new job was processed and the result is ready to be processed.

This is how the iojob structure looks like: +
+typedef struct iojob {
+    int type;   /* Request type, REDIS_IOJOB_* */
+    redisDb *db;/* Redis database */
+    robj *key;  /* This I/O request is about swapping this key */
+    robj *val;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
+                 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
+    off_t page; /* Swap page where to read/write the object */
+    off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */
+    int canceled; /* True if this command was canceled by blocking side of VM */
+    pthread_t thread; /* ID of the thread processing this entry */
+} iojob;
+
+There are just three type of jobs that an I/O thread can perform (the type is specified by the type field of the structure): +
  • REDIS_IOJOB_LOAD: load the value associated to a given key from swap to memory. The object offset inside the swap file is page, the object type is key->vtype. The result of this operation will populate the val field of the structure.
  • REDIS_IOJOB_PREPARE_SWAP: compute the number of pages needed in order to save the object pointed by val into the swap. The result of this operation will populate the pages field.
  • REDIS_IOJOB_DO_SWAP: Transfer the object pointed by val to the swap file, at page offset page.
The main thread delegates just the above three tasks. All the rest is handled by the main thread itself, for instance finding a suitable range of free pages in the swap file page table (that is a fast operation), deciding what object to swap, altering the storage field of a Redis object to reflect the current state of a value. +

Non blocking VM as probabilistic enhancement of blocking VM

+So now we have a way to request background jobs dealing with slow VM operations. How to add this to the mix of the rest of the work done by the main thread? While blocking VM was aware that an object was swapped out just when the object was looked up, this is too late for us: in C it is not trivial to start a background job in the middle of the command, leave the function, and re-enter in the same point the computation when the I/O thread finished what we requested (that is, no co-routines or continuations or alike).

Fortunately there was a much, much simpler way to do this. And we love simple things: basically consider the VM implementation a blocking one, but add an optimization (using non the no blocking VM operations we are able to perform) to make the blocking very unlikely.

This is what we do: +
  • Every time a client sends us a command, before the command is executed, we examine the argument vector of the command in search for swapped keys. After all we know for every command what arguments are keys, as the Redis command format is pretty simple.
  • If we detect that at least a key in the requested command is swapped on disk, we block the client instead of really issuing the command. For every swapped value associated to a requested key, an I/O job is created, in order to bring the values back in memory. The main thread continues the execution of the event loop, without caring about the blocked client.
  • In the meanwhile, I/O threads are loading values in memory. Every time an I/O thread finished loading a value, it sends a byte to the main thread using an UNIX pipe. The pipe file descriptor has a readable event associated in the main thread event loop, that is the function vmThreadedIOCompletedJob. If this function detects that all the values needed for a blocked client were loaded, the client is restarted and the original command called.
+So you can think at this as a blocked VM that almost always happen to have the right keys in memory, since we pause clients that are going to issue commands about swapped out values until this values are loaded.

If the function checking what argument is a key fails in some way, there is no problem: the lookup function will see that a given key is associated to a swapped out value and will block loading it. So our non blocking VM reverts to a blocking one when it is not possible to anticipate what keys are touched.

For instance in the case of the SORT command used together with the GET or BY options, it is not trivial to know beforehand what keys will be requested, so at least in the first implementation, SORT BY/GET resorts to the blocking VM implementation. +

Blocking clients on swapped keys

+How to block clients? To suspend a client in an event-loop based server is pretty trivial. All we do is cancelling its read handler. Sometimes we do something different (for instance for BLPOP) that is just marking the client as blocked, but not processing new data (just accumulating the new data into input buffers). +

Aborting I/O jobs

+There is something hard to solve about the interactions between our blocking and non blocking VM, that is, what happens if a blocking operation starts about a key that is also "interested" by a non blocking operation at the same time?

For instance while SORT BY is executed, a few keys are being loaded in a blocking manner by the sort command. At the same time, another client may request the same keys with a simple GET key command, that will trigger the creation of an I/O job to load the key in background.

The only simple way to deal with this problem is to be able to kill I/O jobs in the main thread, so that if a key that we want to load or swap in a blocking way is in the REDIS_VM_LOADING or REDIS_VM_SWAPPING state (that is, there is an I/O job about this key), we can just kill the I/O job about this key, and go ahead with the blocking operation we want to perform.

This is not as trivial as it is. In a given moment an I/O job can be in one of the following three queues: +
  • server.io_newjobs: the job was already queued but no thread is handling it.
  • server.io_processing: the job is being processed by an I/O thread.
  • server.io_processed: the job was already processed.
The function able to kill an I/O job is vmCancelThreadedIOJob, and this is what it does: +
  • If the job is in the newjobs queue, that's simple, removing the iojob structure from the queue is enough as no thread is still executing any operation.
  • If the job is in the processing queue, a thread is messing with our job (and possibly with the associated object!). The only thing we can do is waiting for the item to move to the next queue in a blocking way. Fortunately this condition happens very rarely so it's not a performance problem.
  • If the job is in the processed queue, we just mark it as canceled marking setting the canceled field to 1 in the iojob structure. The function processing completed jobs will just ignored and free the job instead of really processing it.
+

Questions?

+This document is in no way complete, the only way to get the whole picture is reading the source code, but it should be a good introduction in order to make the code review / understanding a lot simpler.

Something is not clear about this page? Please leave a comment and I'll try to address the issue possibly integrating the answer in this document. + +
+ +
+
+ + + diff --git a/doc/VirtualMemoryUserGuide.html b/doc/VirtualMemoryUserGuide.html new file mode 100644 index 0000000000000000000000000000000000000000..7bc67b1580b773cff57bff64884fa48cbdb8f818 --- /dev/null +++ b/doc/VirtualMemoryUserGuide.html @@ -0,0 +1,67 @@ + + + + + + + +
+ + + +
+ + +

VirtualMemoryUserGuide

+ +
+ +
+ +
+ #sidebar RedisGuides

Virtual Memory User Guide

Redis Virtual Memory is a feature that will appear for the first time in a stable Redis distribution in Redis 2.0. However Virtual Memory (called VM starting from now) is already available and stable enough to be tests in the unstable branch of Redis available on Git.

Virtual Memory explained in simple words

Redis follows a Key-Value model. You have keys associated with some values. +Usually Redis takes both Keys and associated Values in memory. Sometimes this is not the best option, and while Keys must be taken in memory for the way Redis is designed (and in order to ensure fast lookups), Values can be swapped out to disk when they are rarely used.

In practical terms this means that if you have a dataset of 100,000 keys in memory, but only 10% of this keys are often used, Redis with Virtual Memory enabled will try to transfer the values associated to the rarely used keys on disk.

When this values are requested, as a result of a command issued by a client, the values are loaded back from the swap file to the main memory.

When using Virtual Memory is a good idea

Before using VM you should ask yourself if you really need it. Redis is a disk backed, in memory database. The right way to use Redis is almost always to have enough RAM to fit all the data in memory. Still there are a scenarios where to hold all the data in memory is not possible: +
  • Data access is very biased. Only a small percentage of keys (for instance related to active users in your web site) gets the vast majority of accesses. At the same time there is too much data per key to take everything in memory.
  • There is simply not enough memory available to hold all the data in memory, regardless of the data access pattern, and values are large. In this configuration Redis can be used as an on-disk DB where keys are in memory, so the key lookup is fast, but the access to the actual values require accessing the (slower) disk.
+An important concept to take in mind is that Redis is not able to swap the keys, so if your memory problems are related to the fact you have too much keys with very small values, VM is not the solution.

Instead if a good amount of memory is used because values are pretty large (for example large strings, lists, sets or hashes with many elements), then VM can be a good idea.

Sometimes you can turn your "many keys with small values" problem into a "less keys but with very large values" one just using Hashes in order to group related data into fields of a single key. For instance instead of having a key for every attribute of your object you have a single key per object where Hash fields represent the different attributes.

VM Configuration

Configuring the VM is not hard but requires some care to set the best parameters accordingly to the requirements.

The VM is enabled and configured editing redis.conf, the first step is switching it on with:

+vm-enabled yes
+
Many other configuration options are able to change the behavior of VM. The rule is that you don't want to run with the default configuration, as every problem and dataset requires some tuning in order to get the maximum advantages.

The vm-max-memory setting

The vm-max-memory setting specifies how much memory Redis is free to use before starting swapping values on disk.

Basically if this memory limit is still not reached, no object will be swapped, Redis will work all in memory as usually. Once this limit is hit, enough objects are swapped out in order to return just under the limit.

The swapped objects are the one with the highest "age" (that is, the number of seconds since they are not used in any way) mainly, but the "swappability" of an object is also proportional to the logarithm of it's size in memory. So basically older objects are preferred, but when they are about the same size, bigger objects are preferred.

WARNING: Because keys can't be swapped out, Redis will not be able to honour the vm-max-memory setting if the keys alone are using more space than the limit.

The best value for this setting is enough RAM in order to hold the "working set" of data. In practical terms, just give Redis as much memory as you can, and swapping will work better.

Configuring the swap file

In order to transfer data from memory to disk, Redis uses a swap file. The swap file has nothing to do with durability of data, and can be removed when a Redis instance is terminated. Still the swap file should not be moved, deleted, or altered in any other way while Redis is running.

Because the Redis swap file is used mostly in a random access fashion, to put the swap file into a Solid State Disk will lead to better performances.

The swap file is divided into "pages". A value can be swapped into one or multiple pages, but a single page can't hold more than a value.

There is no direct way to tell Redis how much bytes of swap file it should be using. Instead two different values are configured, that multiplied together will produce the total number of bytes used. This two values are the number of pages inside the swap file, and the page size. It is possible to configure this two parameters in redis.conf.

  • The vm-pages configuration directive is used to set the total number of pages in the swap file.
  • the vm-page-size configuration directive is used in order to set the page size in bytes.
+So for instance if the page size is set to the value of 32 bytes, and the total number of pages is set to 10000000 (10 millions), the swap file can hold a total of 320 MB of data.

Because a single page can't be used to hold more than a value (but a value can be stored into multiple pages), care must be taken in setting this parameters. +Usually the best idea is setting the page size so that the majority of the values can be swapped using a few pages.

Threaded VM vs Blocking VM

Another very important configuration parameter is vm-max-threads:

+# The default vm-max-threads configuration
+vm-max-threads 4
+
This is the maximum number of threads used in order to perform I/O from/to the swap file. A good value is just to match the number of cores in your system.

However the special value of "0" will enable blocking VM. When VM is configured to be blocking it performs the I/O in a synchronous blocking way. This is what you can expect from blocking VM: +
  • Clients accessing swapped out keys will block other clients while reading from disk, so the latency experimented by clients can be larger, especially if the disk is slow or busy and/or if there are big values swapped on disk.
  • The blocking VM performances are overall better, as there is no time lost in synchronization, spawning of threads, resuming blocked clients waiting for values.
So if you are willing to accept an higher latency from time to time, blocking VM can be a good pick, especially if swapping happens rarely as most of your often accessed data happens to fit in your memory.

If instead you have a lot of swap in and swap out operations and you have many cores that you want to exploit, and in general when you don't want that clients dealing with swapped values will block other clients for a few milliseconds (or more if the swapped value is very big), then it's better to use threaded VM.

To experiment with your dataset and different configurations is warmly encouraged...

Random things to know

+

A good place for the swap file

In many configurations the swap file can be fairly large, even 40GB or more. +Not all the kind of file systems are able to deal with large files in a good way, especially Mac OS X file system tends to be really lame about it.

The suggestion is to use Linux ext3 file system, or any other file system with good support for sparse files. What are sparse files?

Sparse files are files where a lot of the content happen to be empty. Advanced file systems like ext2, ext3, ext4, RaiserFS, Raiser4, and many others, are able to encode this files in a more efficient way and will allocate more space for the file when needed, that is, when more actual blocks of the file will be used.

The swap file is obviously pretty sparse, especially if the server is running since little time or it is much bigger compared to the amount of data swapped out. A file system not supporting sparse files can at some point block the Redis process while creating a very big file at once.

For a list of file systems supporting spare files check this Wikipedia page comparing different files systems.

Monitoring the VM

Once you have a Redis system with VM enabled up and running, you may be very interested in knowing how it's working: how many objects are swapped in total, the number of objects swapped and loaded every second, and so forth.

There is an utility that is very handy in checking how the VM is working, that is part of Redis Tools. This tool is called redis-stat, and using it is pretty straightforward:

+$ ./redis-stat vmstat
+ --------------- objects --------------- ------ pages ------ ----- memory -----
+ load-in  swap-out  swapped   delta      used     delta      used     delta    
+ 138837   1078936   800402    +800402    807620   +807620    209.50M  +209.50M  
+ 4277     38011     829802    +29400     837441   +29821     206.47M  -3.03M   
+ 3347     39508     862619    +32817     870340   +32899     202.96M  -3.51M   
+ 4445     36943     890646    +28027     897925   +27585     199.92M  -3.04M   
+ 10391    16902     886783    -3863     894104   -3821     200.22M  +309.56K  
+ 8888     19507     888371    +1588      895678   +1574      200.05M  -171.81K 
+ 8377     20082     891664    +3293      899850   +4172      200.10M  +53.55K   
+ 9671     20210     892586    +922       899917   +67        199.82M  -285.30K 
+ 10861    16723     887638    -4948     895003   -4914     200.13M  +312.35K  
+ 9541     21945     890618    +2980      898004   +3001      199.94M  -197.11K 
+ 9689     17257     888345    -2273     896405   -1599     200.27M  +337.77K  
+ 10087    18784     886771    -1574     894577   -1828     200.36M  +91.60K   
+ 9330     19350     887411    +640       894817   +240       200.17M  -189.72K 
+
The above output is about a redis-server with VM enable, around 1 million of keys inside, and a lot of simulated load using the redis-load utility.

As you can see from the output a number of load-in and swap-out operations are happening every second. Note that the first line reports the actual values since the server was started, while the next lines are differences compared to the previous reading.

If you assigned enough memory to hold your working set of data, probably you should see a lot less dramatic swapping happening, so redis-stat can be a really valuable tool in order to understand if you need to shop for RAM ;)

Redis with VM enabled: better .rdb files or Append Only File?

When VM is enabled, saving and loading the database are much slower operations. A DB that usually loads in 2 seconds takes 13 seconds with VM enabled if the server is configured to use the smallest memory possible (that is, vm-max-memory set to 0).

So you probably want to switch to a configuration using the Append Only File for persistence, so that you can perform the BGREWRITEAOF from time to time.

It is important to note that while a BGSAVE or BGREWRITEAOF is in progress Redis does not swap new values on disk. The VM will be read-only while there is another child accessing it. So if you have a lot of writes while there is a child working, the memory usage may grow.

Using as little memory as possible

An interesting setup to turn Redis into an on-disk DB with just keys in memory is setting vm-max-memory to 0. If you don't mind some latency more and poorer performances but want to use very little memory for very big values, this is a good setup.

In this setup you should first try setting the VM as blocking (vm-max-threads 0) as with this configuration and high traffic the number of swap in and swap out operations will be huge, and threading will consume a lot of resources compared to a simple blocking implementation.

VM Stability

VM is still experimental code, but in the latest weeks it was tested in many ways in development environments, and even in some production environment. No bugs were noticed during this testing period. Still the more obscure bugs may happen in non controlled environments where there are setups that we are not able to reproduce for some reason.

In this stage you are encouraged to try VM in your development environment, and even in production if your DB is not mission critical, but for instance just a big persistent cache of data that may go away without too much problems.

Please report any problem you will notice to the Redis Google Group or by IRC joining the #redis IRC channel on freenode. + +
+ +
+
+ + + diff --git a/doc/ZrankCommand.html b/doc/ZrankCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..a0a3e39f38e69425314131a6fdfe2cd2b8ecd39c --- /dev/null +++ b/doc/ZrankCommand.html @@ -0,0 +1,42 @@ + + + + + + + +
+ + + +
+ + +

ZrankCommand

+ +
+ +
+ +
+ +

ZRANK _key_ _member_ (Redis >

1.3.4) = +

ZREVRANK _key_ _member_ (Redis >

1.3.4) = +Time complexity: O(log(N))
ZRANK returns the rank of the member in the sorted set, with scores ordered from low to high. ZREVRANK returns the rank with scores ordered from high to low. When the given member does not exist in the sorted set, the special value 'nil' is returned. The returned rank (or index) of the member is 0-based for both commands.
+

Return value

Bulk reply, specifically:

+the rank (an integer number) represented as an string.
+
+
+ +
+
+ + + diff --git a/doc/ZremrangebyrankCommand.html b/doc/ZremrangebyrankCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..b2562295fa47cfe275f1a5f559308a62ad79a11c --- /dev/null +++ b/doc/ZremrangebyrankCommand.html @@ -0,0 +1,39 @@ + + + + + + + +
+ + + +
+
+ +ZremrangebyrankCommand: Contents
  ZREMRANGEBYRANK _key_ _start_ _end_ (Redis >
    Return value +
+ +

ZremrangebyrankCommand

+ +
+ +
+ +
+ +

ZREMRANGEBYRANK _key_ _start_ _end_ (Redis >

1.3.4) = +Time complexity: O(log(N))+O(M) with N being the number of elements in the sorted set and M the number of elements removed by the operation
Remove all elements in the sorted set at key with rank between start and end. Start and end are 0-based with rank 0 being the element with the lowest score. Both start and end can be negative numbers, where they indicate offsets starting at the element with the highest rank. For example: -1 is the element with the highest score, -2 the element with the second highest score and so forth.
+

Return value

Integer reply, specifically the number of elements removed. +
+ +
+
+ + + diff --git a/doc/ZunionCommand.html b/doc/ZunionCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..edb52a9c9effff91e3d3d37d3ee609e1922da870 --- /dev/null +++ b/doc/ZunionCommand.html @@ -0,0 +1,41 @@ + + + + + + + +
+ + + +
+ + +

ZunionCommand

+ +
+ +
+ +
+ +

ZUNION / ZINTER _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.5) =

Time complexity: O(N) + O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set
Creates a union or intersection of N sorted sets given by keys k1 through kN, and stores it at dstkey. It is mandatory to provide the number of input keys N, before passing the input keys and the other (optional) arguments.
+
As the terms imply, the ZINTER command requires an element to be present in each of the given inputs to be inserted in the result. The ZUNION command inserts all elements across all inputs.
+
Using the WEIGHTS option, it is possible to add weight to each input sorted set. This means that the score of each element in the sorted set is first multiplied by this weight before being passed to the aggregation. When this option is not given, all weights default to 1.
+
With the AGGREGATE option, it's possible to specify how the results of the union or intersection are aggregated. This option defaults to SUM, where the score of an element is summed across the inputs where it exists. When this option is set to be either MIN or MAX, the resulting set will contain the minimum or maximum score of an element across the inputs where it exists.
+

Return value

Integer reply, specifically the number of elements in the sorted set at dstkey. +
+ +
+
+ + + diff --git a/doc/ZunionstoreCommand.html b/doc/ZunionstoreCommand.html new file mode 100644 index 0000000000000000000000000000000000000000..a9f743260322ba8ea9fd614e047676cb2a8b83af --- /dev/null +++ b/doc/ZunionstoreCommand.html @@ -0,0 +1,41 @@ + + + + + + + +
+ + + +
+ + +

ZunionstoreCommand

+ +
+ +
+ +
+ +

ZUNION / ZINTER _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.5) =

Time complexity: O(N) + O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set
Creates a union or intersection of N sorted sets given by keys k1 through kN, and stores it at dstkey. It is mandatory to provide the number of input keys N, before passing the input keys and the other (optional) arguments.
+
As the terms imply, the ZINTER command requires an element to be present in each of the given inputs to be inserted in the result. The ZUNION command inserts all elements across all inputs.
+
Using the WEIGHTS option, it is possible to add weight to each input sorted set. This means that the score of each element in the sorted set is first multiplied by this weight before being passed to the aggregation. When this option is not given, all weights default to 1.
+
With the AGGREGATE option, it's possible to specify how the results of the union or intersection are aggregated. This option defaults to SUM, where the score of an element is summed across the inputs where it exists. When this option is set to be either MIN or MAX, the resulting set will contain the minimum or maximum score of an element across the inputs where it exists.
+

Return value

Integer reply, specifically the number of elements in the sorted set at dstkey. +
+ +
+
+ + + diff --git a/mkreleasehdr.sh b/mkreleasehdr.sh new file mode 100755 index 0000000000000000000000000000000000000000..c458e6480cfb3f0f6c4a2abfede37bcaecef2c3e --- /dev/null +++ b/mkreleasehdr.sh @@ -0,0 +1,9 @@ +#!/bin/sh +GIT_SHA1=$( (git show-ref --head --hash=8 2> /dev/null || echo 00000000) | head -n1) +GIT_DIRTY=$(git diff 2> /dev/null | wc -l) +test -f release.h || touch release.h +(cat release.h | grep SHA1 | grep $GIT_SHA1) && \ +(cat release.h | grep DIRTY | grep $GIT_DIRTY) && exit 0 # Already uptodate +echo "#define REDIS_GIT_SHA1 \"$GIT_SHA1\"" > release.h +echo "#define REDIS_GIT_DIRTY \"$GIT_DIRTY\"" >> release.h +touch redis.c # force recompile of redis.c diff --git a/redis-cli.c b/redis-cli.c index 863df9bf857f81d19d118ad26999b276d7b7ace8..ce8dbde8f863a6462df77a1f8388b2996986bb33 100644 --- a/redis-cli.c +++ b/redis-cli.c @@ -54,6 +54,7 @@ static struct config { long repeat; int dbnum; int interactive; + int shutdown; int monitor_mode; int pubsub_mode; int raw_output; @@ -313,7 +314,10 @@ static int cliReadMultiBulkReply(int fd) { static int cliReadReply(int fd) { char type; - if (anetRead(fd,&type,1) <= 0) exit(1); + if (anetRead(fd,&type,1) <= 0) { + if (config.shutdown) return 0; + exit(1); + } switch(type) { case '-': printf("(error) "); @@ -356,7 +360,6 @@ static int selectDb(int fd) { static int cliSendCommand(int argc, char **argv, int repeat) { struct redisCommand *rc = lookupCommand(argv[0]); - int shutdown = 0; int fd, j, retval = 0; sds cmd; @@ -372,7 +375,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { return 1; } - if (!strcasecmp(rc->name,"shutdown")) shutdown = 1; + if (!strcasecmp(rc->name,"shutdown")) config.shutdown = 1; if (!strcasecmp(rc->name,"monitor")) config.monitor_mode = 1; if (!strcasecmp(rc->name,"subscribe") || !strcasecmp(rc->name,"psubscribe")) config.pubsub_mode = 1; @@ -410,8 +413,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) { } retval = cliReadReply(fd); + if (retval) { - return shutdown ? 0 : retval; + return retval; } } return 0; @@ -594,6 +598,7 @@ int main(int argc, char **argv) { config.hostport = 6379; config.repeat = 1; config.dbnum = 0; + config.shutdown = 0; config.interactive = 0; config.monitor_mode = 0; config.pubsub_mode = 0; diff --git a/redis.c b/redis.c index 9e4a5dd3fe302605bdb6ecdbfae34f2555030d3c..c0f2d0faf3f6ed375d34a44822027efc235cc2df 100644 --- a/redis.c +++ b/redis.c @@ -27,7 +27,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#define REDIS_VERSION "1.3.10" +#define REDIS_VERSION "1.3.12" #include "fmacros.h" #include "config.h" @@ -76,6 +76,7 @@ #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ #include "zipmap.h" /* Compact dictionary-alike data structure */ #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */ +#include "release.h" /* Release and/or git repository information */ /* Error codes */ #define REDIS_OK 0 @@ -2760,21 +2761,6 @@ static void addReplyDouble(redisClient *c, double d) { (unsigned long) strlen(buf),buf)); } -static void addReplyLong(redisClient *c, long l) { - char buf[128]; - size_t len; - - if (l == 0) { - addReply(c,shared.czero); - return; - } else if (l == 1) { - addReply(c,shared.cone); - return; - } - len = snprintf(buf,sizeof(buf),":%ld\r\n",l); - addReplySds(c,sdsnewlen(buf,len)); -} - static void addReplyLongLong(redisClient *c, long long ll) { char buf[128]; size_t len; @@ -2786,8 +2772,11 @@ static void addReplyLongLong(redisClient *c, long long ll) { addReply(c,shared.cone); return; } - len = snprintf(buf,sizeof(buf),":%lld\r\n",ll); - addReplySds(c,sdsnewlen(buf,len)); + buf[0] = ':'; + len = ll2string(buf+1,sizeof(buf)-1,ll); + buf[len+1] = '\r'; + buf[len+2] = '\n'; + addReplySds(c,sdsnewlen(buf,len+3)); } static void addReplyUlong(redisClient *c, unsigned long ul) { @@ -2806,7 +2795,8 @@ static void addReplyUlong(redisClient *c, unsigned long ul) { } static void addReplyBulkLen(redisClient *c, robj *obj) { - size_t len; + size_t len, intlen; + char buf[128]; if (obj->encoding == REDIS_ENCODING_RAW) { len = sdslen(obj->ptr); @@ -2823,7 +2813,11 @@ static void addReplyBulkLen(redisClient *c, robj *obj) { len++; } } - addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len)); + buf[0] = '$'; + intlen = ll2string(buf+1,sizeof(buf)-1,(long long)len); + buf[intlen+1] = '\r'; + buf[intlen+2] = '\n'; + addReplySds(c,sdsnewlen(buf,intlen+3)); } static void addReplyBulk(redisClient *c, robj *obj) { @@ -4334,8 +4328,7 @@ static void incrDecrCommand(redisClient *c, long long incr) { if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return; value += incr; - o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value)); - o = tryObjectEncoding(o); + o = createStringObjectFromLongLong(value); retval = dictAdd(c->db->dict,c->argv[1],o); if (retval == DICT_ERR) { dictReplace(c->db->dict,c->argv[1],o); @@ -4465,7 +4458,7 @@ static void delCommand(redisClient *c) { deleted++; } } - addReplyLong(c,deleted); + addReplyLongLong(c,deleted); } static void existsCommand(redisClient *c) { @@ -4750,7 +4743,7 @@ static void pushGenericCommand(redisClient *c, int where) { incrRefCount(c->argv[2]); } server.dirty++; - addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(list))); + addReplyLongLong(c,listLength(list)); } static void lpushCommand(redisClient *c) { @@ -5252,7 +5245,7 @@ static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long if (dictSize((dict*)dstset->ptr) > 0) { dictAdd(c->db->dict,dstkey,dstset); incrRefCount(dstkey); - addReplyLong(c,dictSize((dict*)dstset->ptr)); + addReplyLongLong(c,dictSize((dict*)dstset->ptr)); } else { decrRefCount(dstset); addReply(c,shared.czero); @@ -5355,7 +5348,7 @@ static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnu if (dictSize((dict*)dstset->ptr) > 0) { dictAdd(c->db->dict,dstkey,dstset); incrRefCount(dstkey); - addReplyLong(c,dictSize((dict*)dstset->ptr)); + addReplyLongLong(c,dictSize((dict*)dstset->ptr)); } else { decrRefCount(dstset); addReply(c,shared.czero); @@ -5834,7 +5827,7 @@ static void zremrangebyscoreCommand(redisClient *c) { if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) deleteKey(c->db,c->argv[1]); server.dirty += deleted; - addReplyLong(c,deleted); + addReplyLongLong(c,deleted); } static void zremrangebyrankCommand(redisClient *c) { @@ -5872,7 +5865,7 @@ static void zremrangebyrankCommand(redisClient *c) { if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) deleteKey(c->db,c->argv[1]); server.dirty += deleted; - addReplyLong(c, deleted); + addReplyLongLong(c, deleted); } typedef struct { @@ -6058,7 +6051,7 @@ static void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { if (dstzset->zsl->length) { dictAdd(c->db->dict,dstkey,dstobj); incrRefCount(dstkey); - addReplyLong(c, dstzset->zsl->length); + addReplyLongLong(c, dstzset->zsl->length); server.dirty++; } else { decrRefCount(dstobj); @@ -6254,7 +6247,7 @@ static void genericZrangebyscoreCommand(redisClient *c, int justcount) { if (limit > 0) limit--; } if (justcount) { - addReplyLong(c,(long)rangelen); + addReplyLongLong(c,(long)rangelen); } else { lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n", withscores ? (rangelen*2) : rangelen); @@ -6324,9 +6317,9 @@ static void zrankGenericCommand(redisClient *c, int reverse) { rank = zslGetRank(zsl, *score, c->argv[2]); if (rank) { if (reverse) { - addReplyLong(c, zsl->length - rank); + addReplyLongLong(c, zsl->length - rank); } else { - addReplyLong(c, rank-1); + addReplyLongLong(c, rank-1); } } else { addReply(c,shared.nullbulk); @@ -7183,6 +7176,8 @@ static sds genRedisInfoString(void) { bytesToHuman(hmem,zmalloc_used_memory()); info = sdscatprintf(sdsempty(), "redis_version:%s\r\n" + "redis_git_sha1:%s\r\n" + "redis_git_dirty:%d\r\n" "arch_bits:%s\r\n" "multiplexing_api:%s\r\n" "process_id:%ld\r\n" @@ -7200,13 +7195,15 @@ static sds genRedisInfoString(void) { "total_connections_received:%lld\r\n" "total_commands_processed:%lld\r\n" "expired_keys:%lld\r\n" - "hash_max_zipmap_entries:%ld\r\n" - "hash_max_zipmap_value:%ld\r\n" + "hash_max_zipmap_entries:%zu\r\n" + "hash_max_zipmap_value:%zu\r\n" "pubsub_channels:%ld\r\n" "pubsub_patterns:%u\r\n" "vm_enabled:%d\r\n" "role:%s\r\n" ,REDIS_VERSION, + REDIS_GIT_SHA1, + strtol(REDIS_GIT_DIRTY,NULL,10) > 0, (sizeof(long) == 8) ? "64" : "32", aeGetApiName(), (long) getpid(), @@ -8727,6 +8724,48 @@ static void aofRemoveTempFile(pid_t childpid) { * as a fully non-blocking VM. */ +/* Called when the user switches from "appendonly yes" to "appendonly no" + * at runtime using the CONFIG command. */ +static void stopAppendOnly(void) { + flushAppendOnlyFile(); + fsync(server.appendfd); + close(server.appendfd); + + server.appendfd = -1; + server.appendseldb = -1; + server.appendonly = 0; + /* rewrite operation in progress? kill it, wait child exit */ + if (server.bgsavechildpid != -1) { + int statloc; + + if (kill(server.bgsavechildpid,SIGKILL) != -1) + wait3(&statloc,0,NULL); + /* reset the buffer accumulating changes while the child saves */ + sdsfree(server.bgrewritebuf); + server.bgrewritebuf = sdsempty(); + server.bgsavechildpid = -1; + } +} + +/* Called when the user switches from "appendonly no" to "appendonly yes" + * at runtime using the CONFIG command. */ +static int startAppendOnly(void) { + server.appendonly = 1; + server.lastfsync = time(NULL); + server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644); + if (server.appendfd == -1) { + redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno)); + return REDIS_ERR; + } + if (rewriteAppendOnlyFileBackground() == REDIS_ERR) { + server.appendonly = 0; + close(server.appendfd); + redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno)); + return REDIS_ERR; + } + return REDIS_OK; +} + /* =================== Virtual Memory - Blocking Side ====================== */ static void vmInit(void) { @@ -9829,6 +9868,8 @@ static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key) { static void configSetCommand(redisClient *c) { robj *o = getDecodedObject(c->argv[3]); + long long ll; + if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) { zfree(server.dbfilename); server.dbfilename = zstrdup(o->ptr); @@ -9839,7 +9880,13 @@ static void configSetCommand(redisClient *c) { zfree(server.masterauth); server.masterauth = zstrdup(o->ptr); } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) { - server.maxmemory = strtoll(o->ptr, NULL, 10); + if (getLongLongFromObject(o,&ll) == REDIS_ERR || + ll < 0) goto badfmt; + server.maxmemory = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || + ll < 0 || ll > LONG_MAX) goto badfmt; + server.maxidletime = ll; } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) { if (!strcasecmp(o->ptr,"no")) { server.appendfsync = APPENDFSYNC_NO; @@ -9850,6 +9897,23 @@ static void configSetCommand(redisClient *c) { } else { goto badfmt; } + } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) { + int old = server.appendonly; + int new = yesnotoi(o->ptr); + + if (new == -1) goto badfmt; + if (old != new) { + if (new == 0) { + stopAppendOnly(); + } else { + if (startAppendOnly() == REDIS_ERR) { + addReplySds(c,sdscatprintf(sdsempty(), + "-ERR Unable to turn on AOF. Check server logs.\r\n")); + decrRefCount(o); + return; + } + } + } } else if (!strcasecmp(c->argv[2]->ptr,"save")) { int vlen, j; sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen); @@ -9930,11 +9994,24 @@ static void configGetCommand(redisClient *c) { if (stringmatch(pattern,"maxmemory",0)) { char buf[128]; - snprintf(buf,128,"%llu\n",server.maxmemory); + ll2string(buf,128,server.maxmemory); addReplyBulkCString(c,"maxmemory"); addReplyBulkCString(c,buf); matches++; } + if (stringmatch(pattern,"timeout",0)) { + char buf[128]; + + ll2string(buf,128,server.maxidletime); + addReplyBulkCString(c,"timeout"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"appendonly",0)) { + addReplyBulkCString(c,"appendonly"); + addReplyBulkCString(c,server.appendonly ? "yes" : "no"); + matches++; + } if (stringmatch(pattern,"appendfsync",0)) { char *policy; @@ -10036,7 +10113,7 @@ static int pubsubSubscribeChannel(redisClient *c, robj *channel) { addReply(c,shared.mbulk3); addReply(c,shared.subscribebulk); addReplyBulk(c,channel); - addReplyLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns)); + addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns)); return retval; } @@ -10072,7 +10149,7 @@ static int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) { addReply(c,shared.mbulk3); addReply(c,shared.unsubscribebulk); addReplyBulk(c,channel); - addReplyLong(c,dictSize(c->pubsub_channels)+ + addReplyLongLong(c,dictSize(c->pubsub_channels)+ listLength(c->pubsub_patterns)); } @@ -10098,7 +10175,7 @@ static int pubsubSubscribePattern(redisClient *c, robj *pattern) { addReply(c,shared.mbulk3); addReply(c,shared.psubscribebulk); addReplyBulk(c,pattern); - addReplyLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns)); + addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns)); return retval; } @@ -10123,7 +10200,7 @@ static int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) { addReply(c,shared.mbulk3); addReply(c,shared.punsubscribebulk); addReplyBulk(c,pattern); - addReplyLong(c,dictSize(c->pubsub_channels)+ + addReplyLongLong(c,dictSize(c->pubsub_channels)+ listLength(c->pubsub_patterns)); } decrRefCount(pattern); @@ -10251,7 +10328,7 @@ static void punsubscribeCommand(redisClient *c) { static void publishCommand(redisClient *c) { int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]); - addReplyLong(c,receivers); + addReplyLongLong(c,receivers); } /* ================================= Debugging ============================== */ diff --git a/sha1.c b/sha1.c index 988ede7f053438543c4d97172a4e60ea35ec490b..7250fc6023179827b51c5ce96fd8bc55fa4032bd 100644 --- a/sha1.c +++ b/sha1.c @@ -30,7 +30,7 @@ A million repetitions of "a" #if (BSD >= 199103) # include #else -#ifdef linux +#if defined(linux) || defined(__linux__) # include #else #define LITTLE_ENDIAN 1234 /* least-significant byte first (vax, pc) */ @@ -49,7 +49,7 @@ A million repetitions of "a" defined(apollo) || defined(__convex__) || defined(_CRAY) || \ defined(__hppa) || defined(__hp9000) || \ defined(__hp9000s300) || defined(__hp9000s700) || \ - defined (BIT_ZERO_ON_LEFT) || defined(m68k) + defined (BIT_ZERO_ON_LEFT) || defined(m68k) || defined(__sparc) #define BYTE_ORDER BIG_ENDIAN #endif #endif /* linux */ diff --git a/solarisfixes.h b/solarisfixes.h index 8786965ae30561471502c3ed435d3f3c09e0660e..5be2b647cdc7a0f267436461072fa5d1d0ae1814 100644 --- a/solarisfixes.h +++ b/solarisfixes.h @@ -15,4 +15,6 @@ #define isinf(x) \ __extension__ ({ __typeof (x) __x_i = (x); \ __builtin_expect(!isnan(__x_i) && !isfinite(__x_i), 0); }) + +#define u_int uint #endif /* __GNUC__ */ diff --git a/staticsymbols.h b/staticsymbols.h index 96f200d5560ab63c666abf1081e4b4a12c0f6aa5..9f8481d55fe70acf4caac394316bc867465a2780 100644 --- a/staticsymbols.h +++ b/staticsymbols.h @@ -8,7 +8,6 @@ static struct redisFunctionSym symsTable[] = { {"addReplyBulkCString",(unsigned long)addReplyBulkCString}, {"addReplyBulkLen",(unsigned long)addReplyBulkLen}, {"addReplyDouble",(unsigned long)addReplyDouble}, -{"addReplyLong",(unsigned long)addReplyLong}, {"addReplyLongLong",(unsigned long)addReplyLongLong}, {"addReplySds",(unsigned long)addReplySds}, {"addReplyUlong",(unsigned long)addReplyUlong}, diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 8adce3e8754ca5acaebd88556b3b1e2f829199df..40f21925a3b645c4d1b2ebd8c106be009a06a45b 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -13,7 +13,7 @@ proc kill_server config { # check for leaks catch { if {[string match {*Darwin*} [exec uname -a]]} { - test {Check for memory leaks} { + test "Check for memory leaks (pid $pid)" { exec leaks $pid } {*0 leaks*} }