From a845e9bce188ad9e86f4200b944d1414379ae236 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Mon, 4 Apr 2022 12:07:27 +0300 Subject: [PATCH] Implement serialization of HSET --- README.md | 62 +- src/redis/CMakeLists.txt | 2 +- src/redis/ziplist.c | 2586 ++++++++++++++++++++++++++++++++++++++ src/redis/ziplist.h | 74 ++ src/server/rdb_save.cc | 57 +- src/server/rdb_save.h | 1 + src/server/rdb_test.cc | 7 +- 7 files changed, 2753 insertions(+), 36 deletions(-) create mode 100644 src/redis/ziplist.c create mode 100644 src/redis/ziplist.h diff --git a/README.md b/README.md index d7afbec..ab0ed37 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ cd build-opt && ninja dragonfly for more options, run `./dragonfly --help` -## Milestone Egg 🥚 +## Milestone - Source Available API 1.0 - [X] String family @@ -128,27 +128,8 @@ API 1.0 - [ ] RANDOMKEY - [ ] MOVE -In addition, we want to support efficient expiry (TTL) and cache eviction algorithms. -We should implement basic memory management support. For Master/Slave replication we should design -a distributed log format. - -### Memchache API -- [X] set -- [X] get -- [X] replace -- [X] add -- [X] stats (partial) -- [x] append -- [x] prepend -- [x] delete -- [x] flush_all -- [x] incr -- [x] decr -- [x] version -- [x] quit - API 2.0 -- [ ] List Family +- [X] List Family - [X] BLPOP - [X] BRPOP - [ ] BRPOPLPUSH @@ -240,6 +221,26 @@ API 2.0 - [ ] PFCOUNT - [ ] PFMERGE +In addition, we want to support efficient expiry (TTL) and cache eviction algorithms. +We should implement basic memory management support. For Master/Slave replication we should design +a distributed log format. + +### Memchache API +- [X] set +- [X] get +- [X] replace +- [X] add +- [X] stats (partial) +- [x] append +- [x] prepend +- [x] delete +- [x] flush_all +- [x] incr +- [x] decr +- [x] version +- [x] quit + + Commands that I prefer avoid implementing before launch: - PUNSUBSCRIBE - PSUBSCRIBE @@ -252,22 +253,19 @@ Commands that I prefer avoid implementing before launch: Also, I would omit keyspace notifications. For that I would like to deep dive and learn exact use-cases for this API. -### Random commands we implemented along the way +### Random commands we implemented as decorators along the way - [X] ROLE (2.8) decorator for for master withour replicas - [X] UNLINK (4.0) decorator for DEL command - [X] BGSAVE - [X] FUNCTION FLUSH -## Milestone Nymph -API 2,3,4 without cluster support, without modules, without memory inspection commands. -Without support for keyspace notifications. - -Design config support. ~140 commands overall... -## Milestone Molt -API 5,6 - without cluster and modules. Streams support. ~80 commands overall. -## Milestone Adult -TBD. +## Milestone Stability +APIs 3,4,5 without cluster support, without modules, without memory introspection commands. +Without geo commands and without support for keyspace notifications, without streams. +Design config support. ~10-20 commands overall... +Probably implement cluster-API decorators to allow cluster-configured clients to connect to a single +instance. ## Design decisions along the way ### Expiration deadlines with relative accuracy @@ -280,3 +278,5 @@ expiries of `PEXPIRE key 10010` will expire exactly after 10 seconds and 10ms. H Such rounding has at most 0.002% error which I hope is acceptable for large ranges. If it breaks your use-cases - talk to me or open an issue and explain your case. + +For more detailed differences between this and Redis implementations [see here](doc/differences.md). \ No newline at end of file diff --git a/src/redis/CMakeLists.txt b/src/redis/CMakeLists.txt index 2dda422..eaca9e4 100644 --- a/src/redis/CMakeLists.txt +++ b/src/redis/CMakeLists.txt @@ -10,7 +10,7 @@ endif() add_library(redis_lib crc64.c crcspeed.c debug.c dict.c endianconv.c intset.c listpack.c mt19937-64.c object.c lzf_c.c lzf_d.c sds.c sha256.c - quicklist.c redis_aux.c siphash.c t_hash.c t_zset.c util.c ${ZMALLOC_SRC}) + quicklist.c redis_aux.c siphash.c t_hash.c t_zset.c util.c ziplist.c ${ZMALLOC_SRC}) cxx_link(redis_lib ${ZMALLOC_DEPS}) diff --git a/src/redis/ziplist.c b/src/redis/ziplist.c new file mode 100644 index 0000000..50737b7 --- /dev/null +++ b/src/redis/ziplist.c @@ -0,0 +1,2586 @@ +/* The ziplist is a specially encoded dually linked list that is designed + * to be very memory efficient. It stores both strings and integer values, + * where integers are encoded as actual integers instead of a series of + * characters. It allows push and pop operations on either side of the list + * in O(1) time. However, because every operation requires a reallocation of + * the memory used by the ziplist, the actual complexity is related to the + * amount of memory used by the ziplist. + * + * ---------------------------------------------------------------------------- + * + * ZIPLIST OVERALL LAYOUT + * ====================== + * + * The general layout of the ziplist is as follows: + * + * ... + * + * NOTE: all fields are stored in little endian, if not specified otherwise. + * + * is an unsigned integer to hold the number of bytes that + * the ziplist occupies, including the four bytes of the zlbytes field itself. + * This value needs to be stored to be able to resize the entire structure + * without the need to traverse it first. + * + * is the offset to the last entry in the list. This allows + * a pop operation on the far side of the list without the need for full + * traversal. + * + * is the number of entries. When there are more than + * 2^16-2 entries, this value is set to 2^16-1 and we need to traverse the + * entire list to know how many items it holds. + * + * is a special entry representing the end of the ziplist. + * Is encoded as a single byte equal to 255. No other normal entry starts + * with a byte set to the value of 255. + * + * ZIPLIST ENTRIES + * =============== + * + * Every entry in the ziplist is prefixed by metadata that contains two pieces + * of information. First, the length of the previous entry is stored to be + * able to traverse the list from back to front. Second, the entry encoding is + * provided. It represents the entry type, integer or string, and in the case + * of strings it also represents the length of the string payload. + * So a complete entry is stored like this: + * + * + * + * Sometimes the encoding represents the entry itself, like for small integers + * as we'll see later. In such a case the part is missing, and we + * could have just: + * + * + * + * The length of the previous entry, , is encoded in the following way: + * If this length is smaller than 254 bytes, it will only consume a single + * byte representing the length as an unsinged 8 bit integer. When the length + * is greater than or equal to 254, it will consume 5 bytes. The first byte is + * set to 254 (FE) to indicate a larger value is following. The remaining 4 + * bytes take the length of the previous entry as value. + * + * So practically an entry is encoded in the following way: + * + * + * + * Or alternatively if the previous entry length is greater than 253 bytes + * the following encoding is used: + * + * 0xFE <4 bytes unsigned little endian prevlen> + * + * The encoding field of the entry depends on the content of the + * entry. When the entry is a string, the first 2 bits of the encoding first + * byte will hold the type of encoding used to store the length of the string, + * followed by the actual length of the string. When the entry is an integer + * the first 2 bits are both set to 1. The following 2 bits are used to specify + * what kind of integer will be stored after this header. An overview of the + * different types and encodings is as follows. The first byte is always enough + * to determine the kind of entry. + * + * |00pppppp| - 1 byte + * String value with length less than or equal to 63 bytes (6 bits). + * "pppppp" represents the unsigned 6 bit length. + * |01pppppp|qqqqqqqq| - 2 bytes + * String value with length less than or equal to 16383 bytes (14 bits). + * IMPORTANT: The 14 bit number is stored in big endian. + * |10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes + * String value with length greater than or equal to 16384 bytes. + * Only the 4 bytes following the first byte represents the length + * up to 2^32-1. The 6 lower bits of the first byte are not used and + * are set to zero. + * IMPORTANT: The 32 bit number is stored in big endian. + * |11000000| - 3 bytes + * Integer encoded as int16_t (2 bytes). + * |11010000| - 5 bytes + * Integer encoded as int32_t (4 bytes). + * |11100000| - 9 bytes + * Integer encoded as int64_t (8 bytes). + * |11110000| - 4 bytes + * Integer encoded as 24 bit signed (3 bytes). + * |11111110| - 2 bytes + * Integer encoded as 8 bit signed (1 byte). + * |1111xxxx| - (with xxxx between 0001 and 1101) immediate 4 bit integer. + * Unsigned integer from 0 to 12. The encoded value is actually from + * 1 to 13 because 0000 and 1111 can not be used, so 1 should be + * subtracted from the encoded 4 bit value to obtain the right value. + * |11111111| - End of ziplist special entry. + * + * Like for the ziplist header, all the integers are represented in little + * endian byte order, even when this code is compiled in big endian systems. + * + * EXAMPLES OF ACTUAL ZIPLISTS + * =========================== + * + * The following is a ziplist containing the two elements representing + * the strings "2" and "5". It is composed of 15 bytes, that we visually + * split into sections: + * + * [0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff] + * | | | | | | + * zlbytes zltail entries "2" "5" end + * + * The first 4 bytes represent the number 15, that is the number of bytes + * the whole ziplist is composed of. The second 4 bytes are the offset + * at which the last ziplist entry is found, that is 12, in fact the + * last entry, that is "5", is at offset 12 inside the ziplist. + * The next 16 bit integer represents the number of elements inside the + * ziplist, its value is 2 since there are just two elements inside. + * Finally "00 f3" is the first entry representing the number 2. It is + * composed of the previous entry length, which is zero because this is + * our first entry, and the byte F3 which corresponds to the encoding + * |1111xxxx| with xxxx between 0001 and 1101. We need to remove the "F" + * higher order bits 1111, and subtract 1 from the "3", so the entry value + * is "2". The next entry has a prevlen of 02, since the first entry is + * composed of exactly two bytes. The entry itself, F6, is encoded exactly + * like the first entry, and 6-1 = 5, so the value of the entry is 5. + * Finally the special entry FF signals the end of the ziplist. + * + * Adding another element to the above string with the value "Hello World" + * allows us to show how the ziplist encodes small strings. We'll just show + * the hex dump of the entry itself. Imagine the bytes as following the + * entry that stores "5" in the ziplist above: + * + * [02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64] + * + * The first byte, 02, is the length of the previous entry. The next + * byte represents the encoding in the pattern |00pppppp| that means + * that the entry is a string of length , so 0B means that + * an 11 bytes string follows. From the third byte (48) to the last (64) + * there are just the ASCII characters for "Hello World". + * + * ---------------------------------------------------------------------------- + * + * Copyright (c) 2009-2012, Pieter Noordhuis + * Copyright (c) 2009-2017, Salvatore Sanfilippo + * Copyright (c) 2020, Redis Labs, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include "zmalloc.h" +#include "util.h" +#include "ziplist.h" +#include "config.h" +#include "endianconv.h" + +#define ZIP_END 255 /* Special "end of ziplist" entry. */ +#define ZIP_BIG_PREVLEN 254 /* ZIP_BIG_PREVLEN - 1 is the max number of bytes of + the previous entry, for the "prevlen" field prefixing + each entry, to be represented with just a single byte. + Otherwise it is represented as FE AA BB CC DD, where + AA BB CC DD are a 4 bytes unsigned integer + representing the previous entry len. */ + +/* Different encoding/length possibilities */ +#define ZIP_STR_MASK 0xc0 +#define ZIP_INT_MASK 0x30 +#define ZIP_STR_06B (0 << 6) +#define ZIP_STR_14B (1 << 6) +#define ZIP_STR_32B (2 << 6) +#define ZIP_INT_16B (0xc0 | 0<<4) +#define ZIP_INT_32B (0xc0 | 1<<4) +#define ZIP_INT_64B (0xc0 | 2<<4) +#define ZIP_INT_24B (0xc0 | 3<<4) +#define ZIP_INT_8B 0xfe + +/* 4 bit integer immediate encoding |1111xxxx| with xxxx between + * 0001 and 1101. */ +#define ZIP_INT_IMM_MASK 0x0f /* Mask to extract the 4 bits value. To add + one is needed to reconstruct the value. */ +#define ZIP_INT_IMM_MIN 0xf1 /* 11110001 */ +#define ZIP_INT_IMM_MAX 0xfd /* 11111101 */ + +#define INT24_MAX 0x7fffff +#define INT24_MIN (-INT24_MAX - 1) + +/* Macro to determine if the entry is a string. String entries never start + * with "11" as most significant bits of the first byte. */ +#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK) + +/* Utility macros.*/ + +/* Return total bytes a ziplist is composed of. */ +#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) + +/* Return the offset of the last item inside the ziplist. */ +#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t)))) + +/* Return the length of a ziplist, or UINT16_MAX if the length cannot be + * determined without scanning the whole ziplist. */ +#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2))) + +/* The size of a ziplist header: two 32 bit integers for the total + * bytes count and last item offset. One 16 bit integer for the number + * of items field. */ +#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t)) + +/* Size of the "end of ziplist" entry. Just one byte. */ +#define ZIPLIST_END_SIZE (sizeof(uint8_t)) + +/* Return the pointer to the first entry of a ziplist. */ +#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE) + +/* Return the pointer to the last entry of a ziplist, using the + * last entry offset inside the ziplist header. */ +#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) + +/* Return the pointer to the last byte of a ziplist, which is, the + * end of ziplist FF entry. */ +#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1) + +/* Increment the number of items field in the ziplist header. Note that this + * macro should never overflow the unsigned 16 bit integer, since entries are + * always pushed one at a time. When UINT16_MAX is reached we want the count + * to stay there to signal that a full scan is needed to get the number of + * items inside the ziplist. */ +#define ZIPLIST_INCR_LENGTH(zl,incr) { \ + if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) \ + ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \ +} + +/* Don't let ziplists grow over 1GB in any case, don't wanna risk overflow in + * zlbytes*/ +#define ZIPLIST_MAX_SAFETY_SIZE (1<<30) +int ziplistSafeToAdd(unsigned char* zl, size_t add) { + size_t len = zl? ziplistBlobLen(zl): 0; + if (len + add > ZIPLIST_MAX_SAFETY_SIZE) + return 0; + return 1; +} + + +/* We use this function to receive information about a ziplist entry. + * Note that this is not how the data is actually encoded, is just what we + * get filled by a function in order to operate more easily. */ +typedef struct zlentry { + unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/ + unsigned int prevrawlen; /* Previous entry len. */ + unsigned int lensize; /* Bytes used to encode this entry type/len. + For example strings have a 1, 2 or 5 bytes + header. Integers always use a single byte.*/ + unsigned int len; /* Bytes used to represent the actual entry. + For strings this is just the string length + while for integers it is 1, 2, 3, 4, 8 or + 0 (for 4 bit immediate) depending on the + number range. */ + unsigned int headersize; /* prevrawlensize + lensize. */ + unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on + the entry encoding. However for 4 bits + immediate integers this can assume a range + of values and must be range-checked. */ + unsigned char *p; /* Pointer to the very start of the entry, that + is, this points to prev-entry-len field. */ +} zlentry; + +#define ZIPLIST_ENTRY_ZERO(zle) { \ + (zle)->prevrawlensize = (zle)->prevrawlen = 0; \ + (zle)->lensize = (zle)->len = (zle)->headersize = 0; \ + (zle)->encoding = 0; \ + (zle)->p = NULL; \ +} + +/* Extract the encoding from the byte pointed by 'ptr' and set it into + * 'encoding' field of the zlentry structure. */ +#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \ + (encoding) = ((ptr)[0]); \ + if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \ +} while(0) + +#define ZIP_ENCODING_SIZE_INVALID 0xff +/* Return the number of bytes required to encode the entry type + length. + * On error, return ZIP_ENCODING_SIZE_INVALID */ +static inline unsigned int zipEncodingLenSize(unsigned char encoding) { + if (encoding == ZIP_INT_16B || encoding == ZIP_INT_32B || + encoding == ZIP_INT_24B || encoding == ZIP_INT_64B || + encoding == ZIP_INT_8B) + return 1; + if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) + return 1; + if (encoding == ZIP_STR_06B) + return 1; + if (encoding == ZIP_STR_14B) + return 2; + if (encoding == ZIP_STR_32B) + return 5; + return ZIP_ENCODING_SIZE_INVALID; +} + +#define ZIP_ASSERT_ENCODING(encoding) do { \ + assert(zipEncodingLenSize(encoding) != ZIP_ENCODING_SIZE_INVALID); \ +} while (0) + +/* Return bytes needed to store integer encoded by 'encoding' */ +static inline unsigned int zipIntSize(unsigned char encoding) { + switch(encoding) { + case ZIP_INT_8B: return 1; + case ZIP_INT_16B: return 2; + case ZIP_INT_24B: return 3; + case ZIP_INT_32B: return 4; + case ZIP_INT_64B: return 8; + } + if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) + return 0; /* 4 bit immediate */ + /* bad encoding, covered by a previous call to ZIP_ASSERT_ENCODING */ + redis_unreachable(); + return 0; +} + +/* Write the encoding header of the entry in 'p'. If p is NULL it just returns + * the amount of bytes required to encode such a length. Arguments: + * + * 'encoding' is the encoding we are using for the entry. It could be + * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX + * for single-byte small immediate integers. + * + * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the + * string that this entry represents. + * + * The function returns the number of bytes used by the encoding/length + * header stored in 'p'. */ +unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) { + unsigned char len = 1, buf[5]; + + if (ZIP_IS_STR(encoding)) { + /* Although encoding is given it may not be set for strings, + * so we determine it here using the raw length. */ + if (rawlen <= 0x3f) { + if (!p) return len; + buf[0] = ZIP_STR_06B | rawlen; + } else if (rawlen <= 0x3fff) { + len += 1; + if (!p) return len; + buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f); + buf[1] = rawlen & 0xff; + } else { + len += 4; + if (!p) return len; + buf[0] = ZIP_STR_32B; + buf[1] = (rawlen >> 24) & 0xff; + buf[2] = (rawlen >> 16) & 0xff; + buf[3] = (rawlen >> 8) & 0xff; + buf[4] = rawlen & 0xff; + } + } else { + /* Implies integer encoding, so length is always 1. */ + if (!p) return len; + buf[0] = encoding; + } + + /* Store this length at p. */ + memcpy(p,buf,len); + return len; +} + +/* Decode the entry encoding type and data length (string length for strings, + * number of bytes used for the integer for integer entries) encoded in 'ptr'. + * The 'encoding' variable is input, extracted by the caller, the 'lensize' + * variable will hold the number of bytes required to encode the entry + * length, and the 'len' variable will hold the entry length. + * On invalid encoding error, lensize is set to 0. */ +#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \ + if ((encoding) < ZIP_STR_MASK) { \ + if ((encoding) == ZIP_STR_06B) { \ + (lensize) = 1; \ + (len) = (ptr)[0] & 0x3f; \ + } else if ((encoding) == ZIP_STR_14B) { \ + (lensize) = 2; \ + (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \ + } else if ((encoding) == ZIP_STR_32B) { \ + (lensize) = 5; \ + (len) = ((ptr)[1] << 24) | \ + ((ptr)[2] << 16) | \ + ((ptr)[3] << 8) | \ + ((ptr)[4]); \ + } else { \ + (lensize) = 0; /* bad encoding, should be covered by a previous */ \ + (len) = 0; /* ZIP_ASSERT_ENCODING / zipEncodingLenSize, or */ \ + /* match the lensize after this macro with 0. */ \ + } \ + } else { \ + (lensize) = 1; \ + if ((encoding) == ZIP_INT_8B) (len) = 1; \ + else if ((encoding) == ZIP_INT_16B) (len) = 2; \ + else if ((encoding) == ZIP_INT_24B) (len) = 3; \ + else if ((encoding) == ZIP_INT_32B) (len) = 4; \ + else if ((encoding) == ZIP_INT_64B) (len) = 8; \ + else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) \ + (len) = 0; /* 4 bit immediate */ \ + else \ + (lensize) = (len) = 0; /* bad encoding */ \ + } \ +} while(0) + +/* Encode the length of the previous entry and write it to "p". This only + * uses the larger encoding (required in __ziplistCascadeUpdate). */ +int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) { + uint32_t u32; + if (p != NULL) { + p[0] = ZIP_BIG_PREVLEN; + u32 = len; + memcpy(p+1,&u32,sizeof(u32)); + memrev32ifbe(p+1); + } + return 1 + sizeof(uint32_t); +} + +/* Encode the length of the previous entry and write it to "p". Return the + * number of bytes needed to encode this length if "p" is NULL. */ +unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) { + if (p == NULL) { + return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1; + } else { + if (len < ZIP_BIG_PREVLEN) { + p[0] = len; + return 1; + } else { + return zipStorePrevEntryLengthLarge(p,len); + } + } +} + +/* Return the number of bytes used to encode the length of the previous + * entry. The length is returned by setting the var 'prevlensize'. */ +#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \ + if ((ptr)[0] < ZIP_BIG_PREVLEN) { \ + (prevlensize) = 1; \ + } else { \ + (prevlensize) = 5; \ + } \ +} while(0) + +/* Return the length of the previous element, and the number of bytes that + * are used in order to encode the previous element length. + * 'ptr' must point to the prevlen prefix of an entry (that encodes the + * length of the previous entry in order to navigate the elements backward). + * The length of the previous entry is stored in 'prevlen', the number of + * bytes needed to encode the previous entry length are stored in + * 'prevlensize'. */ +#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \ + ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \ + if ((prevlensize) == 1) { \ + (prevlen) = (ptr)[0]; \ + } else { /* prevlensize == 5 */ \ + (prevlen) = ((ptr)[4] << 24) | \ + ((ptr)[3] << 16) | \ + ((ptr)[2] << 8) | \ + ((ptr)[1]); \ + } \ +} while(0) + +/* Given a pointer 'p' to the prevlen info that prefixes an entry, this + * function returns the difference in number of bytes needed to encode + * the prevlen if the previous entry changes of size. + * + * So if A is the number of bytes used right now to encode the 'prevlen' + * field. + * + * And B is the number of bytes that are needed in order to encode the + * 'prevlen' if the previous element will be updated to one of size 'len'. + * + * Then the function returns B - A + * + * So the function returns a positive number if more space is needed, + * a negative number if less space is needed, or zero if the same space + * is needed. */ +int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { + unsigned int prevlensize; + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + return zipStorePrevEntryLength(NULL, len) - prevlensize; +} + +/* Check if string pointed to by 'entry' can be encoded as an integer. + * Stores the integer value in 'v' and its encoding in 'encoding'. */ +int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) { + long long value; + + if (entrylen >= 32 || entrylen == 0) return 0; + if (string2ll((char*)entry,entrylen,&value)) { + /* Great, the string can be encoded. Check what's the smallest + * of our encoding types that can hold this value. */ + if (value >= 0 && value <= 12) { + *encoding = ZIP_INT_IMM_MIN+value; + } else if (value >= INT8_MIN && value <= INT8_MAX) { + *encoding = ZIP_INT_8B; + } else if (value >= INT16_MIN && value <= INT16_MAX) { + *encoding = ZIP_INT_16B; + } else if (value >= INT24_MIN && value <= INT24_MAX) { + *encoding = ZIP_INT_24B; + } else if (value >= INT32_MIN && value <= INT32_MAX) { + *encoding = ZIP_INT_32B; + } else { + *encoding = ZIP_INT_64B; + } + *v = value; + return 1; + } + return 0; +} + +/* Store integer 'value' at 'p', encoded as 'encoding' */ +void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) { + int16_t i16; + int32_t i32; + int64_t i64; + if (encoding == ZIP_INT_8B) { + ((int8_t*)p)[0] = (int8_t)value; + } else if (encoding == ZIP_INT_16B) { + i16 = value; + memcpy(p,&i16,sizeof(i16)); + memrev16ifbe(p); + } else if (encoding == ZIP_INT_24B) { + i32 = value<<8; + memrev32ifbe(&i32); + memcpy(p,((uint8_t*)&i32)+1,sizeof(i32)-sizeof(uint8_t)); + } else if (encoding == ZIP_INT_32B) { + i32 = value; + memcpy(p,&i32,sizeof(i32)); + memrev32ifbe(p); + } else if (encoding == ZIP_INT_64B) { + i64 = value; + memcpy(p,&i64,sizeof(i64)); + memrev64ifbe(p); + } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) { + /* Nothing to do, the value is stored in the encoding itself. */ + } else { + assert(NULL); + } +} + +/* Read integer encoded as 'encoding' from 'p' */ +int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { + int16_t i16; + int32_t i32; + int64_t i64, ret = 0; + if (encoding == ZIP_INT_8B) { + ret = ((int8_t*)p)[0]; + } else if (encoding == ZIP_INT_16B) { + memcpy(&i16,p,sizeof(i16)); + memrev16ifbe(&i16); + ret = i16; + } else if (encoding == ZIP_INT_32B) { + memcpy(&i32,p,sizeof(i32)); + memrev32ifbe(&i32); + ret = i32; + } else if (encoding == ZIP_INT_24B) { + i32 = 0; + memcpy(((uint8_t*)&i32)+1,p,sizeof(i32)-sizeof(uint8_t)); + memrev32ifbe(&i32); + ret = i32>>8; + } else if (encoding == ZIP_INT_64B) { + memcpy(&i64,p,sizeof(i64)); + memrev64ifbe(&i64); + ret = i64; + } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) { + ret = (encoding & ZIP_INT_IMM_MASK)-1; + } else { + assert(NULL); + } + return ret; +} + +/* Fills a struct with all information about an entry. + * This function is the "unsafe" alternative to the one blow. + * Generally, all function that return a pointer to an element in the ziplist + * will assert that this element is valid, so it can be freely used. + * Generally functions such ziplistGet assume the input pointer is already + * validated (since it's the return value of another function). */ +static inline void zipEntry(unsigned char *p, zlentry *e) { + ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen); + ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding); + ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len); + assert(e->lensize != 0); /* check that encoding was valid. */ + e->headersize = e->prevrawlensize + e->lensize; + e->p = p; +} + +/* Fills a struct with all information about an entry. + * This function is safe to use on untrusted pointers, it'll make sure not to + * try to access memory outside the ziplist payload. + * Returns 1 if the entry is valid, and 0 otherwise. */ +static inline int zipEntrySafe(unsigned char* zl, size_t zlbytes, unsigned char *p, zlentry *e, int validate_prevlen) { + unsigned char *zlfirst = zl + ZIPLIST_HEADER_SIZE; + unsigned char *zllast = zl + zlbytes - ZIPLIST_END_SIZE; +#define OUT_OF_RANGE(p) (unlikely((p) < zlfirst || (p) > zllast)) + + /* If threre's no possibility for the header to reach outside the ziplist, + * take the fast path. (max lensize and prevrawlensize are both 5 bytes) */ + if (p >= zlfirst && p + 10 < zllast) { + ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen); + ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding); + ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len); + e->headersize = e->prevrawlensize + e->lensize; + e->p = p; + /* We didn't call ZIP_ASSERT_ENCODING, so we check lensize was set to 0. */ + if (unlikely(e->lensize == 0)) + return 0; + /* Make sure the entry doesn't rech outside the edge of the ziplist */ + if (OUT_OF_RANGE(p + e->headersize + e->len)) + return 0; + /* Make sure prevlen doesn't rech outside the edge of the ziplist */ + if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen)) + return 0; + return 1; + } + + /* Make sure the pointer doesn't rech outside the edge of the ziplist */ + if (OUT_OF_RANGE(p)) + return 0; + + /* Make sure the encoded prevlen header doesn't reach outside the allocation */ + ZIP_DECODE_PREVLENSIZE(p, e->prevrawlensize); + if (OUT_OF_RANGE(p + e->prevrawlensize)) + return 0; + + /* Make sure encoded entry header is valid. */ + ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding); + e->lensize = zipEncodingLenSize(e->encoding); + if (unlikely(e->lensize == ZIP_ENCODING_SIZE_INVALID)) + return 0; + + /* Make sure the encoded entry header doesn't reach outside the allocation */ + if (OUT_OF_RANGE(p + e->prevrawlensize + e->lensize)) + return 0; + + /* Decode the prevlen and entry len headers. */ + ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen); + ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len); + e->headersize = e->prevrawlensize + e->lensize; + + /* Make sure the entry doesn't rech outside the edge of the ziplist */ + if (OUT_OF_RANGE(p + e->headersize + e->len)) + return 0; + + /* Make sure prevlen doesn't rech outside the edge of the ziplist */ + if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen)) + return 0; + + e->p = p; + return 1; +#undef OUT_OF_RANGE +} + +/* Return the total number of bytes used by the entry pointed to by 'p'. */ +static inline unsigned int zipRawEntryLengthSafe(unsigned char* zl, size_t zlbytes, unsigned char *p) { + zlentry e; + zipEntrySafe(zl, zlbytes, p, &e, 0); + return e.headersize + e.len; +} + +/* Return the total number of bytes used by the entry pointed to by 'p'. */ +static inline unsigned int zipRawEntryLength(unsigned char *p) { + zlentry e; + zipEntry(p, &e); + return e.headersize + e.len; +} + +/* Validate that the entry doesn't reach outside the ziplist allocation. */ +static inline void zipAssertValidEntry(unsigned char* zl, size_t zlbytes, unsigned char *p) { + zlentry e; + int res = zipEntrySafe(zl, zlbytes, p, &e, 1); + assert(res); + (void)res; +} + +/* Create a new empty ziplist. */ +unsigned char *ziplistNew(void) { + unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE; + unsigned char *zl = zmalloc(bytes); + ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); + ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); + ZIPLIST_LENGTH(zl) = 0; + zl[bytes-1] = ZIP_END; + return zl; +} + +/* Resize the ziplist. */ +unsigned char *ziplistResize(unsigned char *zl, size_t len) { + assert(len < UINT32_MAX); + zl = zrealloc(zl,len); + ZIPLIST_BYTES(zl) = intrev32ifbe(len); + zl[len-1] = ZIP_END; + return zl; +} + +/* When an entry is inserted, we need to set the prevlen field of the next + * entry to equal the length of the inserted entry. It can occur that this + * length cannot be encoded in 1 byte and the next entry needs to be grow + * a bit larger to hold the 5-byte encoded prevlen. This can be done for free, + * because this only happens when an entry is already being inserted (which + * causes a realloc and memmove). However, encoding the prevlen may require + * that this entry is grown as well. This effect may cascade throughout + * the ziplist when there are consecutive entries with a size close to + * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in + * every consecutive entry. + * + * Note that this effect can also happen in reverse, where the bytes required + * to encode the prevlen field can shrink. This effect is deliberately ignored, + * because it can cause a "flapping" effect where a chain prevlen fields is + * first grown and then shrunk again after consecutive inserts. Rather, the + * field is allowed to stay larger than necessary, because a large prevlen + * field implies the ziplist is holding large entries anyway. + * + * The pointer "p" points to the first entry that does NOT need to be + * updated, i.e. consecutive fields MAY need an update. */ +unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { + zlentry cur; + size_t prevlen, prevlensize, prevoffset; /* Informat of the last changed entry. */ + size_t firstentrylen; /* Used to handle insert at head. */ + size_t rawlen, curlen = intrev32ifbe(ZIPLIST_BYTES(zl)); + size_t extra = 0, cnt = 0, offset; + size_t delta = 4; /* Extra bytes needed to update a entry's prevlen (5-1). */ + unsigned char *tail = zl + intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)); + + /* Empty ziplist */ + if (p[0] == ZIP_END) return zl; + + zipEntry(p, &cur); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */ + firstentrylen = prevlen = cur.headersize + cur.len; + prevlensize = zipStorePrevEntryLength(NULL, prevlen); + prevoffset = p - zl; + p += prevlen; + + /* Iterate ziplist to find out how many extra bytes do we need to update it. */ + while (p[0] != ZIP_END) { + assert(zipEntrySafe(zl, curlen, p, &cur, 0)); + + /* Abort when "prevlen" has not changed. */ + if (cur.prevrawlen == prevlen) break; + + /* Abort when entry's "prevlensize" is big enough. */ + if (cur.prevrawlensize >= prevlensize) { + if (cur.prevrawlensize == prevlensize) { + zipStorePrevEntryLength(p, prevlen); + } else { + /* This would result in shrinking, which we want to avoid. + * So, set "prevlen" in the available bytes. */ + zipStorePrevEntryLengthLarge(p, prevlen); + } + break; + } + + /* cur.prevrawlen means cur is the former head entry. */ + assert(cur.prevrawlen == 0 || cur.prevrawlen + delta == prevlen); + + /* Update prev entry's info and advance the cursor. */ + rawlen = cur.headersize + cur.len; + prevlen = rawlen + delta; + prevlensize = zipStorePrevEntryLength(NULL, prevlen); + prevoffset = p - zl; + p += rawlen; + extra += delta; + cnt++; + } + + /* Extra bytes is zero all update has been done(or no need to update). */ + if (extra == 0) return zl; + + /* Update tail offset after loop. */ + if (tail == zl + prevoffset) { + /* When the the last entry we need to update is also the tail, update tail offset + * unless this is the only entry that was updated (so the tail offset didn't change). */ + if (extra - delta != 0) { + ZIPLIST_TAIL_OFFSET(zl) = + intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra-delta); + } + } else { + /* Update the tail offset in cases where the last entry we updated is not the tail. */ + ZIPLIST_TAIL_OFFSET(zl) = + intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra); + } + + /* Now "p" points at the first unchanged byte in original ziplist, + * move data after that to new ziplist. */ + offset = p - zl; + zl = ziplistResize(zl, curlen + extra); + p = zl + offset; + memmove(p + extra, p, curlen - offset - 1); + p += extra; + + /* Iterate all entries that need to be updated tail to head. */ + while (cnt) { + zipEntry(zl + prevoffset, &cur); /* no need for "safe" variant since we already iterated on all these entries above. */ + rawlen = cur.headersize + cur.len; + /* Move entry to tail and reset prevlen. */ + memmove(p - (rawlen - cur.prevrawlensize), + zl + prevoffset + cur.prevrawlensize, + rawlen - cur.prevrawlensize); + p -= (rawlen + delta); + if (cur.prevrawlen == 0) { + /* "cur" is the previous head entry, update its prevlen with firstentrylen. */ + zipStorePrevEntryLength(p, firstentrylen); + } else { + /* An entry's prevlen can only increment 4 bytes. */ + zipStorePrevEntryLength(p, cur.prevrawlen+delta); + } + /* Foward to previous entry. */ + prevoffset -= cur.prevrawlen; + cnt--; + } + return zl; +} + +/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */ +unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) { + unsigned int i, totlen, deleted = 0; + size_t offset; + int nextdiff = 0; + zlentry first, tail; + size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl)); + + zipEntry(p, &first); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */ + for (i = 0; p[0] != ZIP_END && i < num; i++) { + p += zipRawEntryLengthSafe(zl, zlbytes, p); + deleted++; + } + + assert(p >= first.p); + totlen = p-first.p; /* Bytes taken by the element(s) to delete. */ + if (totlen > 0) { + uint32_t set_tail; + if (p[0] != ZIP_END) { + /* Storing `prevrawlen` in this entry may increase or decrease the + * number of bytes required compare to the current `prevrawlen`. + * There always is room to store this, because it was previously + * stored by an entry that is now being deleted. */ + nextdiff = zipPrevLenByteDiff(p,first.prevrawlen); + + /* Note that there is always space when p jumps backward: if + * the new previous entry is large, one of the deleted elements + * had a 5 bytes prevlen header, so there is for sure at least + * 5 bytes free and we need just 4. */ + p -= nextdiff; + assert(p >= first.p && p= first.p. we know totlen >= 0, + * so we know that p > first.p and this is guaranteed not to reach + * beyond the allocation, even if the entries lens are corrupted. */ + size_t bytes_to_move = zlbytes-(p-zl)-1; + memmove(first.p,p,bytes_to_move); + } else { + /* The entire tail was deleted. No need to move memory. */ + set_tail = (first.p-zl)-first.prevrawlen; + } + + /* Resize the ziplist */ + offset = first.p-zl; + zlbytes -= totlen - nextdiff; + zl = ziplistResize(zl, zlbytes); + p = zl+offset; + + /* Update record count */ + ZIPLIST_INCR_LENGTH(zl,-deleted); + + /* Set the tail offset computed above */ + assert(set_tail <= zlbytes - ZIPLIST_END_SIZE); + ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(set_tail); + + /* When nextdiff != 0, the raw length of the next entry has changed, so + * we need to cascade the update throughout the ziplist */ + if (nextdiff != 0) + zl = __ziplistCascadeUpdate(zl,p); + } + return zl; +} + +/* Insert item at "p". */ +unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) { + size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen; + unsigned int prevlensize, prevlen = 0; + size_t offset; + int nextdiff = 0; + unsigned char encoding = 0; + long long value = 123456789; /* initialized to avoid warning. Using a value + that is easy to see if for some reason + we use it uninitialized. */ + zlentry tail; + + /* Find out prevlen for the entry that is inserted. */ + if (p[0] != ZIP_END) { + ZIP_DECODE_PREVLEN(p, prevlensize, prevlen); + } else { + unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl); + if (ptail[0] != ZIP_END) { + prevlen = zipRawEntryLengthSafe(zl, curlen, ptail); + } + } + + /* See if the entry can be encoded */ + if (zipTryEncoding(s,slen,&value,&encoding)) { + /* 'encoding' is set to the appropriate integer encoding */ + reqlen = zipIntSize(encoding); + } else { + /* 'encoding' is untouched, however zipStoreEntryEncoding will use the + * string length to figure out how to encode it. */ + reqlen = slen; + } + /* We need space for both the length of the previous entry and + * the length of the payload. */ + reqlen += zipStorePrevEntryLength(NULL,prevlen); + reqlen += zipStoreEntryEncoding(NULL,encoding,slen); + + /* When the insert position is not equal to the tail, we need to + * make sure that the next entry can hold this entry's length in + * its prevlen field. */ + int forcelarge = 0; + nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0; + if (nextdiff == -4 && reqlen < 4) { + nextdiff = 0; + forcelarge = 1; + } + + /* Store offset because a realloc may change the address of zl. */ + offset = p-zl; + newlen = curlen+reqlen+nextdiff; + zl = ziplistResize(zl,newlen); + p = zl+offset; + + /* Apply memory move when necessary and update tail offset. */ + if (p[0] != ZIP_END) { + /* Subtract one because of the ZIP_END bytes */ + memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff); + + /* Encode this entry's raw length in the next entry. */ + if (forcelarge) + zipStorePrevEntryLengthLarge(p+reqlen,reqlen); + else + zipStorePrevEntryLength(p+reqlen,reqlen); + + /* Update offset for tail */ + ZIPLIST_TAIL_OFFSET(zl) = + intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen); + + /* When the tail contains more than one entry, we need to take + * "nextdiff" in account as well. Otherwise, a change in the + * size of prevlen doesn't have an effect on the *tail* offset. */ + zipEntrySafe(zl, newlen, p+reqlen, &tail, 1); + if (p[reqlen+tail.headersize+tail.len] != ZIP_END) { + ZIPLIST_TAIL_OFFSET(zl) = + intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff); + } + } else { + /* This element will be the new tail. */ + ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl); + } + + /* When nextdiff != 0, the raw length of the next entry has changed, so + * we need to cascade the update throughout the ziplist */ + if (nextdiff != 0) { + offset = p-zl; + zl = __ziplistCascadeUpdate(zl,p+reqlen); + p = zl+offset; + } + + /* Write the entry */ + p += zipStorePrevEntryLength(p,prevlen); + p += zipStoreEntryEncoding(p,encoding,slen); + if (ZIP_IS_STR(encoding)) { + memcpy(p,s,slen); + } else { + zipSaveInteger(p,value,encoding); + } + ZIPLIST_INCR_LENGTH(zl,1); + return zl; +} + +/* Merge ziplists 'first' and 'second' by appending 'second' to 'first'. + * + * NOTE: The larger ziplist is reallocated to contain the new merged ziplist. + * Either 'first' or 'second' can be used for the result. The parameter not + * used will be free'd and set to NULL. + * + * After calling this function, the input parameters are no longer valid since + * they are changed and free'd in-place. + * + * The result ziplist is the contents of 'first' followed by 'second'. + * + * On failure: returns NULL if the merge is impossible. + * On success: returns the merged ziplist (which is expanded version of either + * 'first' or 'second', also frees the other unused input ziplist, and sets the + * input ziplist argument equal to newly reallocated ziplist return value. */ +unsigned char *ziplistMerge(unsigned char **first, unsigned char **second) { + /* If any params are null, we can't merge, so NULL. */ + if (first == NULL || *first == NULL || second == NULL || *second == NULL) + return NULL; + + /* Can't merge same list into itself. */ + if (*first == *second) + return NULL; + + size_t first_bytes = intrev32ifbe(ZIPLIST_BYTES(*first)); + size_t first_len = intrev16ifbe(ZIPLIST_LENGTH(*first)); + + size_t second_bytes = intrev32ifbe(ZIPLIST_BYTES(*second)); + size_t second_len = intrev16ifbe(ZIPLIST_LENGTH(*second)); + + int append; + unsigned char *source, *target; + size_t target_bytes, source_bytes; + /* Pick the largest ziplist so we can resize easily in-place. + * We must also track if we are now appending or prepending to + * the target ziplist. */ + if (first_len >= second_len) { + /* retain first, append second to first. */ + target = *first; + target_bytes = first_bytes; + source = *second; + source_bytes = second_bytes; + append = 1; + } else { + /* else, retain second, prepend first to second. */ + target = *second; + target_bytes = second_bytes; + source = *first; + source_bytes = first_bytes; + append = 0; + } + + /* Calculate final bytes (subtract one pair of metadata) */ + size_t zlbytes = first_bytes + second_bytes - + ZIPLIST_HEADER_SIZE - ZIPLIST_END_SIZE; + size_t zllength = first_len + second_len; + + /* Combined zl length should be limited within UINT16_MAX */ + zllength = zllength < UINT16_MAX ? zllength : UINT16_MAX; + + /* larger values can't be stored into ZIPLIST_BYTES */ + assert(zlbytes < UINT32_MAX); + + /* Save offset positions before we start ripping memory apart. */ + size_t first_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*first)); + size_t second_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*second)); + + /* Extend target to new zlbytes then append or prepend source. */ + target = zrealloc(target, zlbytes); + if (append) { + /* append == appending to target */ + /* Copy source after target (copying over original [END]): + * [TARGET - END, SOURCE - HEADER] */ + memcpy(target + target_bytes - ZIPLIST_END_SIZE, + source + ZIPLIST_HEADER_SIZE, + source_bytes - ZIPLIST_HEADER_SIZE); + } else { + /* !append == prepending to target */ + /* Move target *contents* exactly size of (source - [END]), + * then copy source into vacated space (source - [END]): + * [SOURCE - END, TARGET - HEADER] */ + memmove(target + source_bytes - ZIPLIST_END_SIZE, + target + ZIPLIST_HEADER_SIZE, + target_bytes - ZIPLIST_HEADER_SIZE); + memcpy(target, source, source_bytes - ZIPLIST_END_SIZE); + } + + /* Update header metadata. */ + ZIPLIST_BYTES(target) = intrev32ifbe(zlbytes); + ZIPLIST_LENGTH(target) = intrev16ifbe(zllength); + /* New tail offset is: + * + N bytes of first ziplist + * - 1 byte for [END] of first ziplist + * + M bytes for the offset of the original tail of the second ziplist + * - J bytes for HEADER because second_offset keeps no header. */ + ZIPLIST_TAIL_OFFSET(target) = intrev32ifbe( + (first_bytes - ZIPLIST_END_SIZE) + + (second_offset - ZIPLIST_HEADER_SIZE)); + + /* __ziplistCascadeUpdate just fixes the prev length values until it finds a + * correct prev length value (then it assumes the rest of the list is okay). + * We tell CascadeUpdate to start at the first ziplist's tail element to fix + * the merge seam. */ + target = __ziplistCascadeUpdate(target, target+first_offset); + + /* Now free and NULL out what we didn't realloc */ + if (append) { + zfree(*second); + *second = NULL; + *first = target; + } else { + zfree(*first); + *first = NULL; + *second = target; + } + return target; +} + +unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) { + unsigned char *p; + p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl); + return __ziplistInsert(zl,p,s,slen); +} + +/* Returns an offset to use for iterating with ziplistNext. When the given + * index is negative, the list is traversed back to front. When the list + * doesn't contain an element at the provided index, NULL is returned. */ +unsigned char *ziplistIndex(unsigned char *zl, int index) { + unsigned char *p; + unsigned int prevlensize, prevlen = 0; + size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl)); + if (index < 0) { + index = (-index)-1; + p = ZIPLIST_ENTRY_TAIL(zl); + if (p[0] != ZIP_END) { + /* No need for "safe" check: when going backwards, we know the header + * we're parsing is in the range, we just need to assert (below) that + * the size we take doesn't cause p to go outside the allocation. */ + ZIP_DECODE_PREVLEN(p, prevlensize, prevlen); + while (prevlen > 0 && index--) { + p -= prevlen; + assert(p >= zl + ZIPLIST_HEADER_SIZE && p < zl + zlbytes - ZIPLIST_END_SIZE); + ZIP_DECODE_PREVLEN(p, prevlensize, prevlen); + } + } + } else { + p = ZIPLIST_ENTRY_HEAD(zl); + while (index--) { + /* Use the "safe" length: When we go forward, we need to be careful + * not to decode an entry header if it's past the ziplist allocation. */ + p += zipRawEntryLengthSafe(zl, zlbytes, p); + if (p[0] == ZIP_END) + break; + } + } + if (p[0] == ZIP_END || index > 0) + return NULL; + zipAssertValidEntry(zl, zlbytes, p); + return p; +} + +/* Return pointer to next entry in ziplist. + * + * zl is the pointer to the ziplist + * p is the pointer to the current element + * + * The element after 'p' is returned, otherwise NULL if we are at the end. */ +unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) { + ((void) zl); + size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl)); + + /* "p" could be equal to ZIP_END, caused by ziplistDelete, + * and we should return NULL. Otherwise, we should return NULL + * when the *next* element is ZIP_END (there is no next entry). */ + if (p[0] == ZIP_END) { + return NULL; + } + + p += zipRawEntryLength(p); + if (p[0] == ZIP_END) { + return NULL; + } + + zipAssertValidEntry(zl, zlbytes, p); + return p; +} + +/* Return pointer to previous entry in ziplist. */ +unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) { + unsigned int prevlensize, prevlen = 0; + + /* Iterating backwards from ZIP_END should return the tail. When "p" is + * equal to the first element of the list, we're already at the head, + * and should return NULL. */ + if (p[0] == ZIP_END) { + p = ZIPLIST_ENTRY_TAIL(zl); + return (p[0] == ZIP_END) ? NULL : p; + } else if (p == ZIPLIST_ENTRY_HEAD(zl)) { + return NULL; + } else { + ZIP_DECODE_PREVLEN(p, prevlensize, prevlen); + assert(prevlen > 0); + p-=prevlen; + size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl)); + zipAssertValidEntry(zl, zlbytes, p); + return p; + } +} + +/* Get entry pointed to by 'p' and store in either '*sstr' or 'sval' depending + * on the encoding of the entry. '*sstr' is always set to NULL to be able + * to find out whether the string pointer or the integer value was set. + * Return 0 if 'p' points to the end of the ziplist, 1 otherwise. */ +unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *slen, long long *sval) { + zlentry entry; + if (p == NULL || p[0] == ZIP_END) return 0; + if (sstr) *sstr = NULL; + + zipEntry(p, &entry); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */ + if (ZIP_IS_STR(entry.encoding)) { + if (sstr) { + *slen = entry.len; + *sstr = p+entry.headersize; + } + } else { + if (sval) { + *sval = zipLoadInteger(p+entry.headersize,entry.encoding); + } + } + return 1; +} + +/* Insert an entry at "p". */ +unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) { + return __ziplistInsert(zl,p,s,slen); +} + +/* Delete a single entry from the ziplist, pointed to by *p. + * Also update *p in place, to be able to iterate over the + * ziplist, while deleting entries. */ +unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) { + size_t offset = *p-zl; + zl = __ziplistDelete(zl,*p,1); + + /* Store pointer to current element in p, because ziplistDelete will + * do a realloc which might result in a different "zl"-pointer. + * When the delete direction is back to front, we might delete the last + * entry and end up with "p" pointing to ZIP_END, so check this. */ + *p = zl+offset; + return zl; +} + +/* Delete a range of entries from the ziplist. */ +unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) { + unsigned char *p = ziplistIndex(zl,index); + return (p == NULL) ? zl : __ziplistDelete(zl,p,num); +} + +/* Replaces the entry at p. This is equivalent to a delete and an insert, + * but avoids some overhead when replacing a value of the same size. */ +unsigned char *ziplistReplace(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) { + + /* get metadata of the current entry */ + zlentry entry; + zipEntry(p, &entry); + + /* compute length of entry to store, excluding prevlen */ + unsigned int reqlen; + unsigned char encoding = 0; + long long value = 123456789; /* initialized to avoid warning. */ + if (zipTryEncoding(s,slen,&value,&encoding)) { + reqlen = zipIntSize(encoding); /* encoding is set */ + } else { + reqlen = slen; /* encoding == 0 */ + } + reqlen += zipStoreEntryEncoding(NULL,encoding,slen); + + if (reqlen == entry.lensize + entry.len) { + /* Simply overwrite the element. */ + p += entry.prevrawlensize; + p += zipStoreEntryEncoding(p,encoding,slen); + if (ZIP_IS_STR(encoding)) { + memcpy(p,s,slen); + } else { + zipSaveInteger(p,value,encoding); + } + } else { + /* Fallback. */ + zl = ziplistDelete(zl,&p); + zl = ziplistInsert(zl,p,s,slen); + } + return zl; +} + +/* Compare entry pointer to by 'p' with 'sstr' of length 'slen'. */ +/* Return 1 if equal. */ +unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int slen) { + zlentry entry; + unsigned char sencoding; + long long zval, sval; + if (p[0] == ZIP_END) return 0; + + zipEntry(p, &entry); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */ + if (ZIP_IS_STR(entry.encoding)) { + /* Raw compare */ + if (entry.len == slen) { + return memcmp(p+entry.headersize,sstr,slen) == 0; + } else { + return 0; + } + } else { + /* Try to compare encoded values. Don't compare encoding because + * different implementations may encoded integers differently. */ + if (zipTryEncoding(sstr,slen,&sval,&sencoding)) { + zval = zipLoadInteger(p+entry.headersize,entry.encoding); + return zval == sval; + } + } + return 0; +} + +/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries + * between every comparison. Returns NULL when the field could not be found. */ +unsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) { + int skipcnt = 0; + unsigned char vencoding = 0; + long long vll = 0; + size_t zlbytes = ziplistBlobLen(zl); + + while (p[0] != ZIP_END) { + struct zlentry e; + unsigned char *q; + int res = zipEntrySafe(zl, zlbytes, p, &e, 1); + assert(res); + (void)res; + + q = p + e.prevrawlensize + e.lensize; + + if (skipcnt == 0) { + /* Compare current entry with specified entry */ + if (ZIP_IS_STR(e.encoding)) { + if (e.len == vlen && memcmp(q, vstr, vlen) == 0) { + return p; + } + } else { + /* Find out if the searched field can be encoded. Note that + * we do it only the first time, once done vencoding is set + * to non-zero and vll is set to the integer value. */ + if (vencoding == 0) { + if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) { + /* If the entry can't be encoded we set it to + * UCHAR_MAX so that we don't retry again the next + * time. */ + vencoding = UCHAR_MAX; + } + /* Must be non-zero by now */ + assert(vencoding); + } + + /* Compare current entry with specified entry, do it only + * if vencoding != UCHAR_MAX because if there is no encoding + * possible for the field it can't be a valid integer. */ + if (vencoding != UCHAR_MAX) { + long long ll = zipLoadInteger(q, e.encoding); + if (ll == vll) { + return p; + } + } + } + + /* Reset skip count */ + skipcnt = skip; + } else { + /* Skip entry */ + skipcnt--; + } + + /* Move to next entry */ + p = q + e.len; + } + + return NULL; +} + +/* Return length of ziplist. */ +unsigned int ziplistLen(unsigned char *zl) { + unsigned int len = 0; + if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) { + len = intrev16ifbe(ZIPLIST_LENGTH(zl)); + } else { + unsigned char *p = zl+ZIPLIST_HEADER_SIZE; + size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl)); + while (*p != ZIP_END) { + p += zipRawEntryLengthSafe(zl, zlbytes, p); + len++; + } + + /* Re-store length if small enough */ + if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = intrev16ifbe(len); + } + return len; +} + +/* Return ziplist blob size in bytes. */ +size_t ziplistBlobLen(unsigned char *zl) { + return intrev32ifbe(ZIPLIST_BYTES(zl)); +} + +void ziplistRepr(unsigned char *zl) { + unsigned char *p; + int index = 0; + zlentry entry; + size_t zlbytes = ziplistBlobLen(zl); + + printf( + "{total bytes %u} " + "{num entries %u}\n" + "{tail offset %u}\n", + intrev32ifbe(ZIPLIST_BYTES(zl)), + intrev16ifbe(ZIPLIST_LENGTH(zl)), + intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))); + p = ZIPLIST_ENTRY_HEAD(zl); + while(*p != ZIP_END) { + zipEntrySafe(zl, zlbytes, p, &entry, 1); + printf( + "{\n" + "\taddr 0x%08lx,\n" + "\tindex %2d,\n" + "\toffset %5lu,\n" + "\thdr+entry len: %5u,\n" + "\thdr len%2u,\n" + "\tprevrawlen: %5u,\n" + "\tprevrawlensize: %2u,\n" + "\tpayload %5u\n", + (long unsigned)p, + index, + (unsigned long) (p-zl), + entry.headersize+entry.len, + entry.headersize, + entry.prevrawlen, + entry.prevrawlensize, + entry.len); + printf("\tbytes: "); + for (unsigned int i = 0; i < entry.headersize+entry.len; i++) { + printf("%02x|",p[i]); + } + printf("\n"); + p += entry.headersize; + if (ZIP_IS_STR(entry.encoding)) { + printf("\t[str]"); + if (entry.len > 40) { + if (fwrite(p,40,1,stdout) == 0) perror("fwrite"); + printf("..."); + } else { + if (entry.len && + fwrite(p,entry.len,1,stdout) == 0) perror("fwrite"); + } + } else { + printf("\t[int]%lld", (long long) zipLoadInteger(p,entry.encoding)); + } + printf("\n}\n"); + p += entry.len; + index++; + } + printf("{end}\n\n"); +} + +/* Validate the integrity of the data structure. + * when `deep` is 0, only the integrity of the header is validated. + * when `deep` is 1, we scan all the entries one by one. */ +int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep, + ziplistValidateEntryCB entry_cb, void *cb_userdata) { + /* check that we can actually read the header. (and ZIP_END) */ + if (size < ZIPLIST_HEADER_SIZE + ZIPLIST_END_SIZE) + return 0; + + /* check that the encoded size in the header must match the allocated size. */ + size_t bytes = intrev32ifbe(ZIPLIST_BYTES(zl)); + if (bytes != size) + return 0; + + /* the last byte must be the terminator. */ + if (zl[size - ZIPLIST_END_SIZE] != ZIP_END) + return 0; + + /* make sure the tail offset isn't reaching outside the allocation. */ + if (intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) > size - ZIPLIST_END_SIZE) + return 0; + + if (!deep) + return 1; + + unsigned int count = 0; + unsigned char *p = ZIPLIST_ENTRY_HEAD(zl); + unsigned char *prev = NULL; + size_t prev_raw_size = 0; + while(*p != ZIP_END) { + struct zlentry e; + /* Decode the entry headers and fail if invalid or reaches outside the allocation */ + if (!zipEntrySafe(zl, size, p, &e, 1)) + return 0; + + /* Make sure the record stating the prev entry size is correct. */ + if (e.prevrawlen != prev_raw_size) + return 0; + + /* Optionally let the caller validate the entry too. */ + if (entry_cb && !entry_cb(p, cb_userdata)) + return 0; + + /* Move to the next entry */ + prev_raw_size = e.headersize + e.len; + prev = p; + p += e.headersize + e.len; + count++; + } + + /* Make sure 'p' really does point to the end of the ziplist. */ + if (p != zl + bytes - ZIPLIST_END_SIZE) + return 0; + + /* Make sure the entry really do point to the start of the last entry. */ + if (prev != NULL && prev != ZIPLIST_ENTRY_TAIL(zl)) + return 0; + + /* Check that the count in the header is correct */ + unsigned int header_count = intrev16ifbe(ZIPLIST_LENGTH(zl)); + if (header_count != UINT16_MAX && count != header_count) + return 0; + + return 1; +} + +/* Randomly select a pair of key and value. + * total_count is a pre-computed length/2 of the ziplist (to avoid calls to ziplistLen) + * 'key' and 'val' are used to store the result key value pair. + * 'val' can be NULL if the value is not needed. */ +void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val) { + int ret; + unsigned char *p; + + /* Avoid div by zero on corrupt ziplist */ + assert(total_count); + + /* Generate even numbers, because ziplist saved K-V pair */ + int r = (rand() % total_count) * 2; + p = ziplistIndex(zl, r); + ret = ziplistGet(p, &key->sval, &key->slen, &key->lval); + assert(ret != 0); + (void)ret; + if (!val) + return; + p = ziplistNext(zl, p); + ret = ziplistGet(p, &val->sval, &val->slen, &val->lval); + assert(ret != 0); +} + +/* int compare for qsort */ +int uintCompare(const void *a, const void *b) { + return (*(unsigned int *) a - *(unsigned int *) b); +} + +/* Helper method to store a string into from val or lval into dest */ +static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long long lval, ziplistEntry *dest) { + dest->sval = val; + dest->slen = len; + dest->lval = lval; +} + +/* Randomly select count of key value pairs and store into 'keys' and + * 'vals' args. The order of the picked entries is random, and the selections + * are non-unique (repetitions are possible). + * The 'vals' arg can be NULL in which case we skip these. */ +void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) { + unsigned char *p, *key, *value; + unsigned int klen = 0, vlen = 0; + long long klval = 0, vlval = 0; + + /* Notice: the index member must be first due to the use in uintCompare */ + typedef struct { + unsigned int index; + unsigned int order; + } rand_pick; + rand_pick *picks = zmalloc(sizeof(rand_pick)*count); + unsigned int total_size = ziplistLen(zl)/2; + + /* Avoid div by zero on corrupt ziplist */ + assert(total_size); + + /* create a pool of random indexes (some may be duplicate). */ + for (unsigned int i = 0; i < count; i++) { + picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */ + /* keep track of the order we picked them */ + picks[i].order = i; + } + + /* sort by indexes. */ + qsort(picks, count, sizeof(rand_pick), uintCompare); + + /* fetch the elements form the ziplist into a output array respecting the original order. */ + unsigned int zipindex = 0, pickindex = 0; + p = ziplistIndex(zl, 0); + while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) { + p = ziplistNext(zl, p); + assert(ziplistGet(p, &value, &vlen, &vlval)); + while (pickindex < count && zipindex == picks[pickindex].index) { + int storeorder = picks[pickindex].order; + ziplistSaveValue(key, klen, klval, &keys[storeorder]); + if (vals) + ziplistSaveValue(value, vlen, vlval, &vals[storeorder]); + pickindex++; + } + zipindex += 2; + p = ziplistNext(zl, p); + } + + zfree(picks); +} + +/* Randomly select count of key value pairs and store into 'keys' and + * 'vals' args. The selections are unique (no repetitions), and the order of + * the picked entries is NOT-random. + * The 'vals' arg can be NULL in which case we skip these. + * The return value is the number of items picked which can be lower than the + * requested count if the ziplist doesn't hold enough pairs. */ +unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) { + unsigned char *p, *key; + unsigned int klen = 0; + long long klval = 0; + unsigned int total_size = ziplistLen(zl)/2; + unsigned int index = 0; + if (count > total_size) + count = total_size; + + /* To only iterate once, every time we try to pick a member, the probability + * we pick it is the quotient of the count left we want to pick and the + * count still we haven't visited in the dict, this way, we could make every + * member be equally picked.*/ + p = ziplistIndex(zl, 0); + unsigned int picked = 0, remaining = count; + while (picked < count && p) { + double randomDouble = ((double)rand()) / RAND_MAX; + double threshold = ((double)remaining) / (total_size - index); + if (randomDouble <= threshold) { + assert(ziplistGet(p, &key, &klen, &klval)); + ziplistSaveValue(key, klen, klval, &keys[picked]); + p = ziplistNext(zl, p); + assert(p); + if (vals) { + assert(ziplistGet(p, &key, &klen, &klval)); + ziplistSaveValue(key, klen, klval, &vals[picked]); + } + remaining--; + picked++; + } else { + p = ziplistNext(zl, p); + assert(p); + } + p = ziplistNext(zl, p); + index++; + } + return picked; +} + +#ifdef REDIS_TEST +#include +#include "adlist.h" +#include "sds.h" + +#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); } + +static unsigned char *createList() { + unsigned char *zl = ziplistNew(); + zl = ziplistPush(zl, (unsigned char*)"foo", 3, ZIPLIST_TAIL); + zl = ziplistPush(zl, (unsigned char*)"quux", 4, ZIPLIST_TAIL); + zl = ziplistPush(zl, (unsigned char*)"hello", 5, ZIPLIST_HEAD); + zl = ziplistPush(zl, (unsigned char*)"1024", 4, ZIPLIST_TAIL); + return zl; +} + +static unsigned char *createIntList() { + unsigned char *zl = ziplistNew(); + char buf[32]; + + sprintf(buf, "100"); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL); + sprintf(buf, "128000"); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL); + sprintf(buf, "-100"); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_HEAD); + sprintf(buf, "4294967296"); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_HEAD); + sprintf(buf, "non integer"); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL); + sprintf(buf, "much much longer non integer"); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL); + return zl; +} + +static long long usec(void) { + struct timeval tv; + gettimeofday(&tv,NULL); + return (((long long)tv.tv_sec)*1000000)+tv.tv_usec; +} + +static void stress(int pos, int num, int maxsize, int dnum) { + int i,j,k; + unsigned char *zl; + char posstr[2][5] = { "HEAD", "TAIL" }; + long long start; + for (i = 0; i < maxsize; i+=dnum) { + zl = ziplistNew(); + for (j = 0; j < i; j++) { + zl = ziplistPush(zl,(unsigned char*)"quux",4,ZIPLIST_TAIL); + } + + /* Do num times a push+pop from pos */ + start = usec(); + for (k = 0; k < num; k++) { + zl = ziplistPush(zl,(unsigned char*)"quux",4,pos); + zl = ziplistDeleteRange(zl,0,1); + } + printf("List size: %8d, bytes: %8d, %dx push+pop (%s): %6lld usec\n", + i,intrev32ifbe(ZIPLIST_BYTES(zl)),num,posstr[pos],usec()-start); + zfree(zl); + } +} + +static unsigned char *pop(unsigned char *zl, int where) { + unsigned char *p, *vstr; + unsigned int vlen; + long long vlong; + + p = ziplistIndex(zl,where == ZIPLIST_HEAD ? 0 : -1); + if (ziplistGet(p,&vstr,&vlen,&vlong)) { + if (where == ZIPLIST_HEAD) + printf("Pop head: "); + else + printf("Pop tail: "); + + if (vstr) { + if (vlen && fwrite(vstr,vlen,1,stdout) == 0) perror("fwrite"); + } + else { + printf("%lld", vlong); + } + + printf("\n"); + return ziplistDelete(zl,&p); + } else { + printf("ERROR: Could not pop\n"); + exit(1); + } +} + +static int randstring(char *target, unsigned int min, unsigned int max) { + int p = 0; + int len = min+rand()%(max-min+1); + int minval, maxval; + switch(rand() % 3) { + case 0: + minval = 0; + maxval = 255; + break; + case 1: + minval = 48; + maxval = 122; + break; + case 2: + minval = 48; + maxval = 52; + break; + default: + assert(NULL); + } + + while(p < len) + target[p++] = minval+rand()%(maxval-minval+1); + return len; +} + +static void verify(unsigned char *zl, zlentry *e) { + int len = ziplistLen(zl); + zlentry _e; + + ZIPLIST_ENTRY_ZERO(&_e); + + for (int i = 0; i < len; i++) { + memset(&e[i], 0, sizeof(zlentry)); + zipEntry(ziplistIndex(zl, i), &e[i]); + + memset(&_e, 0, sizeof(zlentry)); + zipEntry(ziplistIndex(zl, -len+i), &_e); + + assert(memcmp(&e[i], &_e, sizeof(zlentry)) == 0); + } +} + +static unsigned char *insertHelper(unsigned char *zl, char ch, size_t len, unsigned char *pos) { + assert(len <= ZIP_BIG_PREVLEN); + unsigned char data[ZIP_BIG_PREVLEN] = {0}; + memset(data, ch, len); + return ziplistInsert(zl, pos, data, len); +} + +static int compareHelper(unsigned char *zl, char ch, size_t len, int index) { + assert(len <= ZIP_BIG_PREVLEN); + unsigned char data[ZIP_BIG_PREVLEN] = {0}; + memset(data, ch, len); + unsigned char *p = ziplistIndex(zl, index); + assert(p != NULL); + return ziplistCompare(p, data, len); +} + +static size_t strEntryBytesSmall(size_t slen) { + return slen + zipStorePrevEntryLength(NULL, 0) + zipStoreEntryEncoding(NULL, 0, slen); +} + +static size_t strEntryBytesLarge(size_t slen) { + return slen + zipStorePrevEntryLength(NULL, ZIP_BIG_PREVLEN) + zipStoreEntryEncoding(NULL, 0, slen); +} + +/* ./redis-server test ziplist --accurate */ +int ziplistTest(int argc, char **argv, int accurate) { + unsigned char *zl, *p; + unsigned char *entry; + unsigned int elen; + long long value; + int iteration; + + /* If an argument is given, use it as the random seed. */ + if (argc >= 4) + srand(atoi(argv[3])); + + zl = createIntList(); + ziplistRepr(zl); + + zfree(zl); + + zl = createList(); + ziplistRepr(zl); + + zl = pop(zl,ZIPLIST_TAIL); + ziplistRepr(zl); + + zl = pop(zl,ZIPLIST_HEAD); + ziplistRepr(zl); + + zl = pop(zl,ZIPLIST_TAIL); + ziplistRepr(zl); + + zl = pop(zl,ZIPLIST_TAIL); + ziplistRepr(zl); + + zfree(zl); + + printf("Get element at index 3:\n"); + { + zl = createList(); + p = ziplistIndex(zl, 3); + if (!ziplistGet(p, &entry, &elen, &value)) { + printf("ERROR: Could not access index 3\n"); + return 1; + } + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + printf("\n"); + } else { + printf("%lld\n", value); + } + printf("\n"); + zfree(zl); + } + + printf("Get element at index 4 (out of range):\n"); + { + zl = createList(); + p = ziplistIndex(zl, 4); + if (p == NULL) { + printf("No entry\n"); + } else { + printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", (long)(p-zl)); + return 1; + } + printf("\n"); + zfree(zl); + } + + printf("Get element at index -1 (last element):\n"); + { + zl = createList(); + p = ziplistIndex(zl, -1); + if (!ziplistGet(p, &entry, &elen, &value)) { + printf("ERROR: Could not access index -1\n"); + return 1; + } + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + printf("\n"); + } else { + printf("%lld\n", value); + } + printf("\n"); + zfree(zl); + } + + printf("Get element at index -4 (first element):\n"); + { + zl = createList(); + p = ziplistIndex(zl, -4); + if (!ziplistGet(p, &entry, &elen, &value)) { + printf("ERROR: Could not access index -4\n"); + return 1; + } + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + printf("\n"); + } else { + printf("%lld\n", value); + } + printf("\n"); + zfree(zl); + } + + printf("Get element at index -5 (reverse out of range):\n"); + { + zl = createList(); + p = ziplistIndex(zl, -5); + if (p == NULL) { + printf("No entry\n"); + } else { + printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", (long)(p-zl)); + return 1; + } + printf("\n"); + zfree(zl); + } + + printf("Iterate list from 0 to end:\n"); + { + zl = createList(); + p = ziplistIndex(zl, 0); + while (ziplistGet(p, &entry, &elen, &value)) { + printf("Entry: "); + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + } else { + printf("%lld", value); + } + p = ziplistNext(zl,p); + printf("\n"); + } + printf("\n"); + zfree(zl); + } + + printf("Iterate list from 1 to end:\n"); + { + zl = createList(); + p = ziplistIndex(zl, 1); + while (ziplistGet(p, &entry, &elen, &value)) { + printf("Entry: "); + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + } else { + printf("%lld", value); + } + p = ziplistNext(zl,p); + printf("\n"); + } + printf("\n"); + zfree(zl); + } + + printf("Iterate list from 2 to end:\n"); + { + zl = createList(); + p = ziplistIndex(zl, 2); + while (ziplistGet(p, &entry, &elen, &value)) { + printf("Entry: "); + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + } else { + printf("%lld", value); + } + p = ziplistNext(zl,p); + printf("\n"); + } + printf("\n"); + zfree(zl); + } + + printf("Iterate starting out of range:\n"); + { + zl = createList(); + p = ziplistIndex(zl, 4); + if (!ziplistGet(p, &entry, &elen, &value)) { + printf("No entry\n"); + } else { + printf("ERROR\n"); + } + printf("\n"); + zfree(zl); + } + + printf("Iterate from back to front:\n"); + { + zl = createList(); + p = ziplistIndex(zl, -1); + while (ziplistGet(p, &entry, &elen, &value)) { + printf("Entry: "); + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + } else { + printf("%lld", value); + } + p = ziplistPrev(zl,p); + printf("\n"); + } + printf("\n"); + zfree(zl); + } + + printf("Iterate from back to front, deleting all items:\n"); + { + zl = createList(); + p = ziplistIndex(zl, -1); + while (ziplistGet(p, &entry, &elen, &value)) { + printf("Entry: "); + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite"); + } else { + printf("%lld", value); + } + zl = ziplistDelete(zl,&p); + p = ziplistPrev(zl,p); + printf("\n"); + } + printf("\n"); + zfree(zl); + } + + printf("Delete inclusive range 0,0:\n"); + { + zl = createList(); + zl = ziplistDeleteRange(zl, 0, 1); + ziplistRepr(zl); + zfree(zl); + } + + printf("Delete inclusive range 0,1:\n"); + { + zl = createList(); + zl = ziplistDeleteRange(zl, 0, 2); + ziplistRepr(zl); + zfree(zl); + } + + printf("Delete inclusive range 1,2:\n"); + { + zl = createList(); + zl = ziplistDeleteRange(zl, 1, 2); + ziplistRepr(zl); + zfree(zl); + } + + printf("Delete with start index out of range:\n"); + { + zl = createList(); + zl = ziplistDeleteRange(zl, 5, 1); + ziplistRepr(zl); + zfree(zl); + } + + printf("Delete with num overflow:\n"); + { + zl = createList(); + zl = ziplistDeleteRange(zl, 1, 5); + ziplistRepr(zl); + zfree(zl); + } + + printf("Delete foo while iterating:\n"); + { + zl = createList(); + p = ziplistIndex(zl,0); + while (ziplistGet(p,&entry,&elen,&value)) { + if (entry && strncmp("foo",(char*)entry,elen) == 0) { + printf("Delete foo\n"); + zl = ziplistDelete(zl,&p); + } else { + printf("Entry: "); + if (entry) { + if (elen && fwrite(entry,elen,1,stdout) == 0) + perror("fwrite"); + } else { + printf("%lld",value); + } + p = ziplistNext(zl,p); + printf("\n"); + } + } + printf("\n"); + ziplistRepr(zl); + zfree(zl); + } + + printf("Replace with same size:\n"); + { + zl = createList(); /* "hello", "foo", "quux", "1024" */ + unsigned char *orig_zl = zl; + p = ziplistIndex(zl, 0); + zl = ziplistReplace(zl, p, (unsigned char*)"zoink", 5); + p = ziplistIndex(zl, 3); + zl = ziplistReplace(zl, p, (unsigned char*)"yy", 2); + p = ziplistIndex(zl, 1); + zl = ziplistReplace(zl, p, (unsigned char*)"65536", 5); + p = ziplistIndex(zl, 0); + assert(!memcmp((char*)p, + "\x00\x05zoink" + "\x07\xf0\x00\x00\x01" /* 65536 as int24 */ + "\x05\x04quux" "\x06\x02yy" "\xff", + 23)); + assert(zl == orig_zl); /* no reallocations have happened */ + zfree(zl); + printf("SUCCESS\n\n"); + } + + printf("Replace with different size:\n"); + { + zl = createList(); /* "hello", "foo", "quux", "1024" */ + p = ziplistIndex(zl, 1); + zl = ziplistReplace(zl, p, (unsigned char*)"squirrel", 8); + p = ziplistIndex(zl, 0); + assert(!strncmp((char*)p, + "\x00\x05hello" "\x07\x08squirrel" "\x0a\x04quux" + "\x06\xc0\x00\x04" "\xff", + 28)); + zfree(zl); + printf("SUCCESS\n\n"); + } + + printf("Regression test for >255 byte strings:\n"); + { + char v1[257] = {0}, v2[257] = {0}; + memset(v1,'x',256); + memset(v2,'y',256); + zl = ziplistNew(); + zl = ziplistPush(zl,(unsigned char*)v1,strlen(v1),ZIPLIST_TAIL); + zl = ziplistPush(zl,(unsigned char*)v2,strlen(v2),ZIPLIST_TAIL); + + /* Pop values again and compare their value. */ + p = ziplistIndex(zl,0); + assert(ziplistGet(p,&entry,&elen,&value)); + assert(strncmp(v1,(char*)entry,elen) == 0); + p = ziplistIndex(zl,1); + assert(ziplistGet(p,&entry,&elen,&value)); + assert(strncmp(v2,(char*)entry,elen) == 0); + printf("SUCCESS\n\n"); + zfree(zl); + } + + printf("Regression test deleting next to last entries:\n"); + { + char v[3][257] = {{0}}; + zlentry e[3] = {{.prevrawlensize = 0, .prevrawlen = 0, .lensize = 0, + .len = 0, .headersize = 0, .encoding = 0, .p = NULL}}; + size_t i; + + for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) { + memset(v[i], 'a' + i, sizeof(v[0])); + } + + v[0][256] = '\0'; + v[1][ 1] = '\0'; + v[2][256] = '\0'; + + zl = ziplistNew(); + for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) { + zl = ziplistPush(zl, (unsigned char *) v[i], strlen(v[i]), ZIPLIST_TAIL); + } + + verify(zl, e); + + assert(e[0].prevrawlensize == 1); + assert(e[1].prevrawlensize == 5); + assert(e[2].prevrawlensize == 1); + + /* Deleting entry 1 will increase `prevrawlensize` for entry 2 */ + unsigned char *p = e[1].p; + zl = ziplistDelete(zl, &p); + + verify(zl, e); + + assert(e[0].prevrawlensize == 1); + assert(e[1].prevrawlensize == 5); + + printf("SUCCESS\n\n"); + zfree(zl); + } + + printf("Create long list and check indices:\n"); + { + unsigned long long start = usec(); + zl = ziplistNew(); + char buf[32]; + int i,len; + for (i = 0; i < 1000; i++) { + len = sprintf(buf,"%d",i); + zl = ziplistPush(zl,(unsigned char*)buf,len,ZIPLIST_TAIL); + } + for (i = 0; i < 1000; i++) { + p = ziplistIndex(zl,i); + assert(ziplistGet(p,NULL,NULL,&value)); + assert(i == value); + + p = ziplistIndex(zl,-i-1); + assert(ziplistGet(p,NULL,NULL,&value)); + assert(999-i == value); + } + printf("SUCCESS. usec=%lld\n\n", usec()-start); + zfree(zl); + } + + printf("Compare strings with ziplist entries:\n"); + { + zl = createList(); + p = ziplistIndex(zl,0); + if (!ziplistCompare(p,(unsigned char*)"hello",5)) { + printf("ERROR: not \"hello\"\n"); + return 1; + } + if (ziplistCompare(p,(unsigned char*)"hella",5)) { + printf("ERROR: \"hella\"\n"); + return 1; + } + + p = ziplistIndex(zl,3); + if (!ziplistCompare(p,(unsigned char*)"1024",4)) { + printf("ERROR: not \"1024\"\n"); + return 1; + } + if (ziplistCompare(p,(unsigned char*)"1025",4)) { + printf("ERROR: \"1025\"\n"); + return 1; + } + printf("SUCCESS\n\n"); + zfree(zl); + } + + printf("Merge test:\n"); + { + /* create list gives us: [hello, foo, quux, 1024] */ + zl = createList(); + unsigned char *zl2 = createList(); + + unsigned char *zl3 = ziplistNew(); + unsigned char *zl4 = ziplistNew(); + + if (ziplistMerge(&zl4, &zl4)) { + printf("ERROR: Allowed merging of one ziplist into itself.\n"); + return 1; + } + + /* Merge two empty ziplists, get empty result back. */ + zl4 = ziplistMerge(&zl3, &zl4); + ziplistRepr(zl4); + if (ziplistLen(zl4)) { + printf("ERROR: Merging two empty ziplists created entries.\n"); + return 1; + } + zfree(zl4); + + zl2 = ziplistMerge(&zl, &zl2); + /* merge gives us: [hello, foo, quux, 1024, hello, foo, quux, 1024] */ + ziplistRepr(zl2); + + if (ziplistLen(zl2) != 8) { + printf("ERROR: Merged length not 8, but: %u\n", ziplistLen(zl2)); + return 1; + } + + p = ziplistIndex(zl2,0); + if (!ziplistCompare(p,(unsigned char*)"hello",5)) { + printf("ERROR: not \"hello\"\n"); + return 1; + } + if (ziplistCompare(p,(unsigned char*)"hella",5)) { + printf("ERROR: \"hella\"\n"); + return 1; + } + + p = ziplistIndex(zl2,3); + if (!ziplistCompare(p,(unsigned char*)"1024",4)) { + printf("ERROR: not \"1024\"\n"); + return 1; + } + if (ziplistCompare(p,(unsigned char*)"1025",4)) { + printf("ERROR: \"1025\"\n"); + return 1; + } + + p = ziplistIndex(zl2,4); + if (!ziplistCompare(p,(unsigned char*)"hello",5)) { + printf("ERROR: not \"hello\"\n"); + return 1; + } + if (ziplistCompare(p,(unsigned char*)"hella",5)) { + printf("ERROR: \"hella\"\n"); + return 1; + } + + p = ziplistIndex(zl2,7); + if (!ziplistCompare(p,(unsigned char*)"1024",4)) { + printf("ERROR: not \"1024\"\n"); + return 1; + } + if (ziplistCompare(p,(unsigned char*)"1025",4)) { + printf("ERROR: \"1025\"\n"); + return 1; + } + printf("SUCCESS\n\n"); + zfree(zl); + } + + printf("Stress with random payloads of different encoding:\n"); + { + unsigned long long start = usec(); + int i,j,len,where; + unsigned char *p; + char buf[1024]; + int buflen; + list *ref; + listNode *refnode; + + /* Hold temp vars from ziplist */ + unsigned char *sstr; + unsigned int slen; + long long sval; + + iteration = accurate ? 20000 : 20; + for (i = 0; i < iteration; i++) { + zl = ziplistNew(); + ref = listCreate(); + listSetFreeMethod(ref,(void (*)(void*))sdsfree); + len = rand() % 256; + + /* Create lists */ + for (j = 0; j < len; j++) { + where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL; + if (rand() % 2) { + buflen = randstring(buf,1,sizeof(buf)-1); + } else { + switch(rand() % 3) { + case 0: + buflen = sprintf(buf,"%lld",(0LL + rand()) >> 20); + break; + case 1: + buflen = sprintf(buf,"%lld",(0LL + rand())); + break; + case 2: + buflen = sprintf(buf,"%lld",(0LL + rand()) << 20); + break; + default: + assert(NULL); + } + } + + /* Add to ziplist */ + zl = ziplistPush(zl, (unsigned char*)buf, buflen, where); + + /* Add to reference list */ + if (where == ZIPLIST_HEAD) { + listAddNodeHead(ref,sdsnewlen(buf, buflen)); + } else if (where == ZIPLIST_TAIL) { + listAddNodeTail(ref,sdsnewlen(buf, buflen)); + } else { + assert(NULL); + } + } + + assert(listLength(ref) == ziplistLen(zl)); + for (j = 0; j < len; j++) { + /* Naive way to get elements, but similar to the stresser + * executed from the Tcl test suite. */ + p = ziplistIndex(zl,j); + refnode = listIndex(ref,j); + + assert(ziplistGet(p,&sstr,&slen,&sval)); + if (sstr == NULL) { + buflen = sprintf(buf,"%lld",sval); + } else { + buflen = slen; + memcpy(buf,sstr,buflen); + buf[buflen] = '\0'; + } + assert(memcmp(buf,listNodeValue(refnode),buflen) == 0); + } + zfree(zl); + listRelease(ref); + } + printf("Done. usec=%lld\n\n", usec()-start); + } + + printf("Stress with variable ziplist size:\n"); + { + unsigned long long start = usec(); + int maxsize = accurate ? 16384 : 16; + stress(ZIPLIST_HEAD,100000,maxsize,256); + stress(ZIPLIST_TAIL,100000,maxsize,256); + printf("Done. usec=%lld\n\n", usec()-start); + } + + /* Benchmarks */ + { + zl = ziplistNew(); + iteration = accurate ? 100000 : 100; + for (int i=0; i + * Copyright (c) 2009-2012, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ZIPLIST_H +#define _ZIPLIST_H + +#define ZIPLIST_HEAD 0 +#define ZIPLIST_TAIL 1 + +/* Each entry in the ziplist is either a string or an integer. */ +typedef struct { + /* When string is used, it is provided with the length (slen). */ + unsigned char *sval; + unsigned int slen; + /* When integer is used, 'sval' is NULL, and lval holds the value. */ + long long lval; +} ziplistEntry; + +unsigned char *ziplistNew(void); +unsigned char *ziplistMerge(unsigned char **first, unsigned char **second); +unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where); +unsigned char *ziplistIndex(unsigned char *zl, int index); +unsigned char *ziplistNext(unsigned char *zl, unsigned char *p); +unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p); +unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval); +unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen); +unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p); +unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num); +unsigned char *ziplistReplace(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen); +unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen); +unsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); +unsigned int ziplistLen(unsigned char *zl); +size_t ziplistBlobLen(unsigned char *zl); +void ziplistRepr(unsigned char *zl); +typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata); +int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep, + ziplistValidateEntryCB entry_cb, void *cb_userdata); +void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val); +void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals); +unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals); +int ziplistSafeToAdd(unsigned char* zl, size_t add); + +#ifdef REDIS_TEST +int ziplistTest(int argc, char *argv[], int accurate); +#endif + +#endif /* _ZIPLIST_H */ diff --git a/src/server/rdb_save.cc b/src/server/rdb_save.cc index 93852bb..ca4473d 100644 --- a/src/server/rdb_save.cc +++ b/src/server/rdb_save.cc @@ -10,8 +10,11 @@ extern "C" { #include "redis/intset.h" +#include "redis/listpack.h" +#include "redis/ziplist.h" #include "redis/rdb.h" #include "redis/util.h" +#include "redis/zmalloc.h" } #include "base/logging.h" @@ -132,7 +135,7 @@ uint8_t RdbObjectType(const robj* o) { return RDB_TYPE_ZSET_2; break; case OBJ_HASH: - if (o->encoding == OBJ_ENCODING_ZIPLIST) + if (o->encoding == OBJ_ENCODING_LISTPACK) return RDB_TYPE_HASH_ZIPLIST; else if (o->encoding == OBJ_ENCODING_HT) return RDB_TYPE_HASH; @@ -206,6 +209,10 @@ error_code RdbSerializer::SaveObject(const robj* o) { return SaveSetObject(o); } + if (o->type == OBJ_HASH) { + return SaveHSetObject(o); + } + LOG(FATAL) << "Not implemented " << o->type; return error_code{}; } @@ -252,7 +259,7 @@ error_code RdbSerializer::SaveSetObject(const robj* obj) { dictIterator* di = dictGetIterator(set); dictEntry* de; - auto key_cleanup = absl::MakeCleanup([di] { dictReleaseIterator(di); }); + auto cleanup = absl::MakeCleanup([di] { dictReleaseIterator(di); }); while ((de = dictNext(di)) != NULL) { sds ele = (sds)de->key; @@ -270,6 +277,52 @@ error_code RdbSerializer::SaveSetObject(const robj* obj) { return error_code{}; } +error_code RdbSerializer::SaveHSetObject(const robj* obj) { + DCHECK_EQ(OBJ_HASH, obj->type); + if (obj->encoding == OBJ_ENCODING_HT) { + dict* set = (dict*)obj->ptr; + + RETURN_ON_ERR(SaveLen(dictSize(set))); + + dictIterator* di = dictGetIterator(set); + dictEntry* de; + auto cleanup = absl::MakeCleanup([di] { dictReleaseIterator(di); }); + + while ((de = dictNext(di)) != NULL) { + sds key = (sds)de->key; + sds value = (sds)de->v.val; + + RETURN_ON_ERR(SaveString(string_view{key, sdslen(key)})); + RETURN_ON_ERR(SaveString(string_view{value, sdslen(value)})); + } + } else if (obj->encoding == OBJ_ENCODING_LISTPACK) { + // convert to ziplist first. + uint8_t* lp = (uint8_t*)obj->ptr; + + size_t lplen = lpLength(lp); + CHECK(lplen > 0 && lplen % 2 == 0); // has (key,value) pairs. + + uint8_t* lpfield = lpFirst(lp); + uint8_t* zl = ziplistNew(); + int64_t entry_len; + uint8_t* entry; + uint8_t buf[32]; + + while (lpfield) { + entry = lpGet(lpfield, &entry_len, buf); + zl = ziplistPush(zl, entry, entry_len, ZIPLIST_TAIL); + lpfield = lpNext(lp, lpfield); + } + size_t ziplen = ziplistBlobLen(zl); + auto cleanup = absl::MakeCleanup([zl] { zfree(zl); }); + RETURN_ON_ERR(SaveString(string_view{reinterpret_cast(zl), ziplen})); + } else { + LOG(FATAL) << "Unknown jset encoding " << obj->encoding; + } + + return error_code{}; +} + /* Save a long long value as either an encoded string or a string. */ error_code RdbSerializer::SaveLongLongAsString(int64_t value) { uint8_t buf[32]; diff --git a/src/server/rdb_save.h b/src/server/rdb_save.h index 37820d3..5957213 100644 --- a/src/server/rdb_save.h +++ b/src/server/rdb_save.h @@ -49,6 +49,7 @@ class RdbSerializer { std::error_code SaveStringObject(const robj* obj); std::error_code SaveListObject(const robj* obj); std::error_code SaveSetObject(const robj* obj); + std::error_code SaveHSetObject(const robj* obj); std::error_code SaveLongLongAsString(int64_t value); ::io::Sink* sink_ = nullptr; diff --git a/src/server/rdb_test.cc b/src/server/rdb_test.cc index 98868a5..d53788b 100644 --- a/src/server/rdb_test.cc +++ b/src/server/rdb_test.cc @@ -82,9 +82,12 @@ TEST_F(RdbTest, LoadSmall) { TEST_F(RdbTest, Save) { Run({"set", "string_key", "val"}); - Run({"sadd", "set_key1", "val"}); + Run({"sadd", "set_key1", "val1", "val2"}); Run({"sadd", "set_key2", "1", "2", "3"}); - Run({"rpush", "list_key", "val"}); + + // Run({"rpush", "list_key", "val"}); // TODO: invalid encoding when reading by redis 6. + // Run({"rpush", "list_key", "val"}); + Run({"hset", "hset_key", "field1", "val1", "field2", "val2"}); Run({"save"}); }