You are on page 1of 8

redis ziplist​

ziplist在redis中的使用​
redis数据结构hash, zset, list都会使用到ziplist。​
hash的enties<hash_max_ziplist_entries配置时,使用ziplist。entries >=
hash_max_ziplist_enties时,会将ziplist的数据转存到hashtable, 同时新的数据设置存储编码格式
为HT,反正更换会ziplist。hash中ziplist的存储是把field和value拆成了一对entry相邻存储。例如
<entry.field1><entry.value1><entry.filed2><entry.value2>​
list使用quicklist底层存储使用了ziplist。​
zset在enties > zset_max_ziplist_entries时,会将数据转存为skiplist + hashtable的存储结构。
Enties <= zset_max_ziplist_entries时,又会将数据转存为ziplist来存储。zset在使用ziplist时,是
将value与score组成一对,例如<entry.value1><entry.score1><entry.value2><entry.score2>​
存储数据结构​

••
<zlbytes> <zltail> <zllen> < entry> <entry>... <entry> <zlend>​
all fields are stored in little endian, if not specified otherwise.​

•• zlbytes : ziplist占用的字节数​
zltail : ziplist尾部entry所在的字节位置​


zllen : ziplist entry 数量​
zlend : 固定ff ​
entry存储结构​


<prevlen> <encoding> <entry-data>​
prevlen : 之前一个entry占多少个字节. 字节数<254时占1个字节(无符号int),字节数>=254

••
时占5个字节,第一个字节固定为FE,其他4字节表示长度
encoding : 编码格式​
entry-data : 真实数据​
entry编码格式​
字符串 ​
长度位
bit位00开头,占1byte 表示<63(2^7 - 1)byte长度的字符串(big endian)​
bit为01开头,占2byte 表示<16383(2^14 - 1)byte长度的字符串(big endian)​
bit位10开头,占5byte表示>=16383byte长度的字符串(big endian)​
有符号int​
1byte 2byte 3byte 4byte 5byte 6byte 7byte 8byte 9byte​
11000000 xxxxxxxx xxxxxxxx​
11010000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx​
11100000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx​
11110000 xxxxxxxx xxxxxxxx xxxxxxxx​
11111110 xxxxxxxx​
x表示0/1
特殊int ​
1111xxxx,因11110000, 11111110, 11111111 被占用所以1111xxxx可以表示(0-12) 12个无符号int,
因0000被占用因此11110001表示0,11110010表示1 依次类推。​
举例​
ziplist存入字符串2, 5​
zlbytes zltail zllen entry entry zlend ​
[0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff]​
zlbytes = 15​
zltail = 13​
zllen = 2​
entry.1 : 00f3, 00表示前一个entry不存在,f3为11110011表示2​
entry.2 : 02f6, 02表示前一个entry长度为2字节, f6 为11110110表示5​
在2,5后面再加一个Hello world​
对应的entry​
prevlen encoding entry-data​
[02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64]​
prevlen表示前一个entry(entry.2)占用2字节​
encoding表示11000000(big endian)表示0b​
entry-data为Hello world对应的ascii码​
运行时数据结构​
1 typedef struct zlentry {
2 unsigned int prevrawlensize; /* 前驱节点的长度prevrawlen所需要的字节大小 */​
3 unsigned int prevrawlen; /* 前驱节点的长度 */​
4 unsigned int lensize; /* 编码当前节点长度len所需的字节数 */​
5 unsigned int len; /* 当前节点值长度 */​
6 unsigned int headersize; /* 当前节点header的大小 = lensize +
prevrawlensize */
7 unsigned char encoding; /* 当前节点的编码格式 */​
8 unsigned char *p; /* 指向当前节点的指针 */​
9 } zlentry;

redis ziplist插入,删除​
级联更新​
当插入或者删除某个entry造成了其后的多个entry的prev_entry_length域由1个字节扩展成5个字节
且其这些entry的长度本来是介于249-253之间,那么新插入或者删除的entry之后的多个连续entry
每个都需要扩展prev_entry_length以存储前一个entry实际占用的字节数​
1 /**
2 * p 为发生变更的点
3 */
4 unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
5 size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
6 size_t offset, noffset, extra;
7 unsigned char *np;
8 zlentry cur, next;
9
10 while (p[0] != ZIP_END) {
11 会解压存储数据结构生成运行时数据结构
//zipEntry zlentry​
12 //获取当前的 zlentry​
13 zipEntry(p, &cur);
14 rawlen = cur.headersize + cur.len;
15 rawlensize = zipStorePrevEntryLength(NULL,rawlen);
16
17 /* 没有后继,结束级联更新 */​
18 if (p[rawlen] == ZIP_END) break;
19 //获取当前下一个 zlentry​
20 zipEntry(p+rawlen, &next);
21
22 /* 后继 的
zlentry prevrawlen == rawlen ,结束级联更新 */​
23 if (next.prevrawlen == rawlen) break;
24 /* 当前节点长度所占字节数 后继 > 中前驱节点长度所占字节数
zlentry */​
25 /* 更新这些节点的字节数 ​
26 if (next.prevrawlensize < rawlensize) {
27 /* The "prevlen" field of "next" needs more bytes to hold
28 * the raw length of "cur". */
29 offset = p-zl;
30 extra = rawlensize-next.prevrawlensize;
31 zl = ziplistResize(zl,curlen+extra);
32 p = zl+offset;
33
34 /* Current pointer and offset for next element. */
35 np = p+rawlen;
36 noffset = np-zl;
37
38 /* Update tail offset when next element is not the tail element. */
39 if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
40 ZIPLIST_TAIL_OFFSET(zl) =
41 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
42 }
43
44 /* 将后继及之后的数据 到新的位置
copy np + rawlensize */​
45 memmove(np+rawlensize,
46 np+next.prevrawlensize,
47 curlen-noffset-next.prevrawlensize-1);
48 zipStorePrevEntryLength(np,rawlen);
49
50 /* 移动 的指针
p */​
51 p += rawlen;
52 curlen += extra;
53 } else {
54 if (next.prevrawlensize > rawlensize) {
55 /* This would result in shrinking, which we want to avoid.
56 * So, set "rawlen" in the available bytes. */
57 zipStorePrevEntryLengthLarge(p+rawlen,rawlen);
58 } else {
59 zipStorePrevEntryLength(p+rawlen,rawlen);
60 }
61
62 /* Stop here, as the raw length of "next" has not changed. */
63 break;
64 }
65 }
66 return zl;
67 }

插入操作​
1. 获取插入位置的前一个entry的长度​
2. 计算要插入的entry占用的字节数;​
3. 计算prev_entry_length的差值;​
4. 为ziplist重新分配内存空间;​
5. 如果不是在末尾插入,移动ziplist在p之后的字节为p的插入腾出足够的空间,并更新zltail的值;​
6. 如果prev_entry_length域的差值不为0,进行必要的连锁更新;​
7. 将新插入的entry三部分的值拷贝到对应的内存空间;​
1 /* 在 位置插入
p */​
2 unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char
*s, unsigned int slen) {
3 size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
4 unsigned int prevlensize, prevlen = 0;
5 size_t offset;
6 int nextdiff = 0;
7 unsigned char encoding = 0;
8 long long value = 123456789; /* initialized to avoid warning. Using a value
9 that is easy to see if for some reason
10 we use it uninitialized. */
11 zlentry tail;
12
13 /* Find out prevlen for the entry that is inserted. */
14 if (p[0] != ZIP_END) {
15 ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
16 } else {
17 unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
18 if (ptail[0] != ZIP_END) {
19 prevlen = zipRawEntryLength(ptail);
20 }
21 }
22
23 /* See if the entry can be encoded */
24 if (zipTryEncoding(s,slen,&value,&encoding)) {
25 /* 'encoding' is set to the appropriate integer encoding */
26 reqlen = zipIntSize(encoding);
27 } else {
28 /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
29 * string length to figure out how to encode it. */
30 reqlen = slen;
31 }
32 /* We need space for both the length of the previous entry and
33 * the length of the payload. */
34 reqlen += zipStorePrevEntryLength(NULL,prevlen);
35 reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
36
37 /* When the insert position is not equal to the tail, we need to
38 * make sure that the next entry can hold this entry's length in
39 * its prevlen field. */
40 int forcelarge = 0;
41 nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
42 if (nextdiff == -4 && reqlen < 4) {
43 nextdiff = 0;
44 forcelarge = 1;
45 }
46
47 /* Store offset because a realloc may change the address of zl. */
48 offset = p-zl;
49 zl = ziplistResize(zl,curlen+reqlen+nextdiff);
50 p = zl+offset;
51
52 /* Apply memory move when necessary and update tail offset. */
53 if (p[0] != ZIP_END) {
54 /* Subtract one because of the ZIP_END bytes */
55 memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
56
57 /* Encode this entry's raw length in the next entry. */
58 if (forcelarge)
59 zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
60 else
61 zipStorePrevEntryLength(p+reqlen,reqlen);
62
63 /* Update offset for tail */
64 ZIPLIST_TAIL_OFFSET(zl) =
65 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
66
67 /* When the tail contains more than one entry, we need to take
68 * "nextdiff" in account as well. Otherwise, a change in the
69 * size of prevlen doesn't have an effect on the *tail* offset. */
70 zipEntry(p+reqlen, &tail);
71 if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
72 ZIPLIST_TAIL_OFFSET(zl) =
73 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
74 }
75 } else {
76 /* This element will be the new tail. */
77 ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
78 }
79
80 /* When nextdiff != 0, the raw length of the next entry has changed, so
81 * we need to cascade the update throughout the ziplist */
82 if (nextdiff != 0) {
83 offset = p-zl;
84 zl = __ziplistCascadeUpdate(zl,p+reqlen);
85 p = zl+offset;
86 }
87
88 /* Write the entry */
89 p += zipStorePrevEntryLength(p,prevlen);
90 p += zipStoreEntryEncoding(p,encoding,slen);
91 if (ZIP_IS_STR(encoding)) {
92 memcpy(p,s,slen);
93 } else {
94 zipSaveInteger(p,value,encoding);
95 }
96 ZIPLIST_INCR_LENGTH(zl,1);
97 return zl;
98 }

删除操作​
1. 获取要要删除的起始位置的entry(为first),然后根据要删除的元素的数目以及ziplist中的zlend的限
制,确定要删除的位置的结束位置p,计算出总的要删除的字节数totlen=p-first.p​
2. 如果totlen>0,进行删除的操作,否则不执行任何的操作;​
3. 如果结束的位置p是zlend的位置,只需更新ziplist的zltail​
4. 如果结束的位置p不是zlend的位置,计算nextdiff = first.prevrawlen - p.prev_entry_len, 根据
nextdiff计算新后继的prev_entry_len和新的zltail​
5. 计算新的zlbytes,重新分配内存空间;​
6. 更新ziplist中的zllen​
7. 如果nextdiff != 0,从删除结束位置p开始进行级联更新​
1 unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int
num) {
2 unsigned int i, totlen, deleted = 0;
3 size_t offset;
4 int nextdiff = 0;
5 zlentry first, tail;
6
7 zipEntry(p, &first);
8 for (i = 0; p[0] != ZIP_END && i < num; i++) {
9 p += zipRawEntryLength(p);
10 deleted++;
11 }
12
13 totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
14 if (totlen > 0) {
15 if (p[0] != ZIP_END) {
16 /* Storing `prevrawlen` in this entry may increase or decrease the
17 * number of bytes required compare to the current `prevrawlen`.
18 * There always is room to store this, because it was previously
19 * stored by an entry that is now being deleted. */
20 nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
21
22 /* Note that there is always space when p jumps backward: if
23 * the new previous entry is large, one of the deleted elements
24 * had a 5 bytes prevlen header, so there is for sure at least
25 * 5 bytes free and we need just 4. */
26 p -= nextdiff;
27 zipStorePrevEntryLength(p,first.prevrawlen);
28
29 /* Update offset for tail */
30 ZIPLIST_TAIL_OFFSET(zl) =
31 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen);
32
33 /* When the tail contains more than one entry, we need to take
34 * "nextdiff" in account as well. Otherwise, a change in the
35 * size of prevlen doesn't have an effect on the *tail* offset. */
36 zipEntry(p, &tail);
37 if (p[tail.headersize+tail.len] != ZIP_END) {
38 ZIPLIST_TAIL_OFFSET(zl) =
39 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
40 }
41
42 /* Move tail to the front of the ziplist */
43 memmove(first.p,p,
44 intrev32ifbe(ZIPLIST_BYTES(zl))-(p-zl)-1);
45 } else {
46 /* The entire tail was deleted. No need to move memory. */
47 ZIPLIST_TAIL_OFFSET(zl) =
48 intrev32ifbe((first.p-zl)-first.prevrawlen);
49 }
50
51 /* Resize and update length */
52 offset = first.p-zl;
53 zl = ziplistResize(zl, intrev32ifbe(ZIPLIST_BYTES(zl))-totlen+nextdiff);
54 ZIPLIST_INCR_LENGTH(zl,-deleted);
55 p = zl+offset;
56
57 /* When nextdiff != 0, the raw length of the next entry has changed, so
58 * we need to cascade the update throughout the ziplist */
59 if (nextdiff != 0)
60 zl = __ziplistCascadeUpdate(zl,p);
61 }
62 return zl;
63 }

You might also like