You are on page 1of 12

redis evict机制​

redis evict支持的类型​
• volatile-lru:从已设置过期时间的数据集(server.db[i].expires)中挑选最近最少使用的数据淘



volatile-lfu:从已设置过期时间的数据集(server.db[i].expires)中挑选最近最不常使用的数据

••
淘汰
volatile-ttl:从已设置过期时间的数据集(server.db[i].expires)中挑选将要过期的数据淘汰​

••volatile-random:从已设置过期时间的数据集(server.db[i].expires)中任意选择数据淘汰​
allkeys-lru:从数据集(server.db[i].dict)中挑选最近最少使用的数据淘汰​

•evict机制执行流程​
allkeys-random:从数据集(server.db[i].dict)中任意选择数据淘汰​
no-enviction(驱逐):禁止驱逐数据​

redis是一个内存数据库并且在使用时会设置内存上限maxmemory,在使用内存超过maxmemory
时会发生key的逐出,也就是evict机制,redis在执行command时会检测当前使用的内存是否超过
maxmemory,通过配置的逐出机制逐出一部分key来释放内存。​
redis采用的方案是使用一种近似的lru和lfu策略来进行key的逐出,为什么是近似策略,因为如果
redis中存在几亿的key, redis要逐出这些key需要维护一个几亿长度的链表,这样非常浪费内存空
间。redis每次只是针对dict中一组采样的bucket来进行逐出的,并不是针对全部的key来进行逐出
的。而且redis也没有维护一个lru链表,而是通过在每一个key上记录一个lru值的方式来记录当前
redis key的访问时间的。​
来看一下redis key在redisDb的dict中是怎么存储的​
1 typedef struct redisObject {
2 unsigned type:4;
3 unsigned encoding:4;
4 为一个24位的无符号int型值,既可能存储的是相对于lru时
unsigned lru:LRU_BITS; /* lru
钟的 时间也可能是 数据
lru lfu ​
5 //引用计数​
int refcount;
6 //数据​
void *ptr;
7 } robj;

redis在创建一个robj对象时会初始化lru值,并且在访问robj对象时也会修改lru值。来看一下redis操
作lru的代码​
1 //in object.c 创建 robj对象 ​
2 robj *createObject(int type, void *ptr) {
3 robj *o = zmalloc(sizeof(*o));
4 o->type = type;
5 o->encoding = OBJ_ENCODING_RAW;
6 o->ptr = ptr;
7 o->refcount = 1;
8
9 /* Set the LRU to the current lruclock (minutes resolution), or
10 * alternatively the LFU counter. */
11 初始化 值
// lru ​
12 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
13 o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
14 } else {
15 o->lru = LRU_CLOCK();
16 }
17 return o;
18 }
19 //in db.c
20 robj *lookupKey(redisDb *db, robj *key, int flags) {
21 dictEntry *de = dictFind(db->dict,key->ptr);
22 if (de) {
23 robj *val = dictGetVal(de);
24
25 /* Update the access time for the ageing algorithm.
26 * Don't do it if we have a saving child, as this will trigger
27 * a copy on write madness. */
28 if (server.rdb_child_pid == -1 &&
29 server.aof_child_pid == -1 &&
30 !(flags & LOOKUP_NOTOUCH))
31 {
32 //更新 值
lru ​
33 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
34 updateLFU(val);
35 } else {
36 val->lru = LRU_CLOCK();
37 }
38 }
39 return val;
40 } else {
41 return NULL;
42 }
43 }

具体执行逐出代码如下:​
1 int processCommand(client *c) {
2 /* The QUIT command is handled separately. Normal command procs will
3 * go through checking for replication and QUIT will cause trouble
4 * when FORCE_REPLICATION is enabled and would be implemented in
5 * a regular command proc. */
6 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
7 addReply(c,shared.ok);
8 c->flags |= CLIENT_CLOSE_AFTER_REPLY;
9 return C_ERR;
10 }
11 ......
12 /* 检测内存,并做内存逐出 */​
13 if (server.maxmemory && !server.lua_timedout) {
14 核心函数
// freeMemoryIfNeedAndSafe()​
15 int out_of_memory = freeMemoryIfNeededAndSafe() == C_ERR;
16 /* freeMemoryIfNeeded may flush slave output buffers. This may result
17 * into a slave, that may be the active client, to be freed. */
18 if (server.current_client == NULL) return C_ERR;
19
20 /* It was impossible to free enough memory, and the command the client
21 * is trying to execute is denied during OOM conditions or the client
22 * is in MULTI/EXEC context? Error. */
23 if (out_of_memory &&
24 (c->cmd->flags & CMD_DENYOOM ||
25 (c->flags & CLIENT_MULTI && c->cmd->proc != execCommand))) {
26 flagTransaction(c);
27 addReply(c, shared.oomerr);
28 return C_OK;
29 }
30 }
31 ......
32 }
33 的逻辑如下:​
/* freeMemoryIfNeeded
34 /* 1. 计算需要释放多少内存 ​
35 /* 2. 如果逐出策略是 或者是基于过期时间的过滤,则在每个db中随机采样一组key,根据
lru, lfu
key的lru值或lfu data值,ttl值进行排序,然后选出最该逐出的key, 然后将这个key删除。如果逐出
策略是随机逐出,则每次在一个db中随机选择一个key,进行删除。​
36 /* 3. 如果删除后不能满足需要释放的内存,则继续进行第2步。​
37 int freeMemoryIfNeeded(void) {
38 /* By default replicas should ignore maxmemory
39 * and just be masters exact copies. */
40 if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;
41
42 size_t mem_reported, mem_tofree, mem_freed;
43 mstime_t latency, eviction_latency;
44 long long delta;
45 int slaves = listLength(server.slaves);
46
47 /* When clients are paused the dataset should be static not just from the
48 * POV of clients not being able to write, but also from the POV of
49 * expires and evictions of keys not being performed. */
50 if (clientsArePaused()) return C_OK;
51 // 计算需要释放的内存。 ​
52 if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
53 return C_OK;
54
55 mem_freed = 0;
56 如果逐出策略为
// 不释放
no eviction, ​
57 if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
58 goto cant_free; /* We need to free memory, but policy forbids. */
59
60 latencyStartMonitor(latency);
61 一直逐出到安全线
// ​
62 while (mem_freed < mem_tofree) {
63 int j, k, i, keys_freed = 0;
64 static unsigned int next_db = 0;
65 sds bestkey = NULL;
66 int bestdbid;
67 redisDb *db;
68 dict *dict;
69 dictEntry *de;
70 内存逐出策略如果是
// 或基于过期时间的策略时执行下面的逻辑
LRU,LFU ​
71 if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
72 server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
73 {
74 struct evictionPoolEntry *pool = EvictionPoolLRU;
75 找出要逐出的最佳的
// key​
76 while(bestkey == NULL) {
77 unsigned long total_keys = 0, keys;
78
79 /*遍历每一个 db */​
80 for (i = 0; i < server.dbnum; i++) {
81 db = server.db+i;
82 dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
83 db->dict : db->expires;
84 if ((keys = dictSize(dict)) != 0) {
85 函数的作用是对dict中的key进行采样,找出
//evictionPoolPopulate
一组key,并对这组 根据 或者 的
key lru lfu count 进行排序 ​
86 evictionPoolPopulate(i, dict, db->dict, pool);
87 total_keys += keys;
88 }
89 }
90 if (!total_keys) break; /* No keys to evict. */
91
92 /* 根据排序的队列找出最该删除的 key */​
93 for (k = EVPOOL_SIZE-1; k >= 0; k--) {
94 if (pool[k].key == NULL) continue;
95 bestdbid = pool[k].dbid;
96
97 if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
98 de = dictFind(server.db[pool[k].dbid].dict,
99 pool[k].key);
100 } else {
101 de = dictFind(server.db[pool[k].dbid].expires,
102 pool[k].key);
103 }
104
105 /* Remove the entry from the pool. */
106 if (pool[k].key != pool[k].cached)
107 sdsfree(pool[k].key);
108 pool[k].key = NULL;
109 pool[k].idle = 0;
110
111 /* If the key exists, is our pick. Otherwise it is
112 * a ghost and we need to try the next element. */
113 if (de) {
114 bestkey = dictGetKey(de);
115 break;
116 } else {
117 /* Ghost... Iterate again. */
118 }
119 }
120 }
121 }
122
123 /* volatile-random and allkeys-random policy */
124 else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
125 server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
126 {
127 /* When evicting a random key, we try to evict a key for
128 * each DB, so we use the static 'next_db' variable to
129 * incrementally visit all DBs. */
130 for (i = 0; i < server.dbnum; i++) {
131 j = (++next_db) % server.dbnum;
132 db = server.db+j;
133 dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
134 db->dict : db->expires;
135 if (dictSize(dict) != 0) {
136 de = dictGetRandomKey(dict);
137 bestkey = dictGetKey(de);
138 bestdbid = j;
139 break;
140 }
141 }
142 }
143
144 /* Finally remove the selected key. */
145 if (bestkey) {
146 db = server.db+bestdbid;
147 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
148 propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
149 /* We compute the amount of memory freed by db*Delete() alone.
150 * It is possible that actually the memory needed to propagate
151 * the DEL in AOF and replication link is greater than the one
152 * we are freeing removing the key, but we can't account for
153 * that otherwise we would never exit the loop.
154 *
155 * AOF and Output buffer memory will be freed eventually so
156 * we only care about memory used by the key space. */
157 delta = (long long) zmalloc_used_memory();
158 latencyStartMonitor(eviction_latency);
159 if (server.lazyfree_lazy_eviction)
160 dbAsyncDelete(db,keyobj);
161 else
162 dbSyncDelete(db,keyobj);
163 latencyEndMonitor(eviction_latency);
164 latencyAddSampleIfNeeded("eviction-del",eviction_latency);
165 latencyRemoveNestedEvent(latency,eviction_latency);
166 delta -= (long long) zmalloc_used_memory();
167 mem_freed += delta;
168 server.stat_evictedkeys++;
169 notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
170 keyobj, db->id);
171 decrRefCount(keyobj);
172 keys_freed++;
173
174 /* When the memory to free starts to be big enough, we may
175 * start spending so much time here that is impossible to
176 * deliver data to the slaves fast enough, so we force the
177 * transmission here inside the loop. */
178 if (slaves) flushSlavesOutputBuffers();
179
180 /* Normally our stop condition is the ability to release
181 * a fixed, pre-computed amount of memory. However when we
182 * are deleting objects in another thread, it's better to
183 * check, from time to time, if we already reached our target
184 * memory, since the "mem_freed" amount is computed only
185 * across the dbAsyncDelete() call, while the thread can
186 * release the memory all the time. */
187 if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
188 if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
189 /* Let's satisfy our stop condition. */
190 mem_freed = mem_tofree;
191 }
192 }
193 }
194
195 if (!keys_freed) {
196 latencyEndMonitor(latency);
197 latencyAddSampleIfNeeded("eviction-cycle",latency);
198 goto cant_free; /* nothing to free... */
199 }
200 }
201 latencyEndMonitor(latency);
202 latencyAddSampleIfNeeded("eviction-cycle",latency);
203 return C_OK;
204
205 cant_free:
206 /* We are here if we are not able to reclaim memory. There is only one
207 * last thing we can try: check if the lazyfree thread has jobs in queue
208 * and wait... */
209 while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
210 if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
211 break;
212 usleep(1000);
213 }
214 return C_ERR;
215 }
216 //针对lru, lfu, ttl的key排序​
217 void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct
evictionPoolEntry *pool) {
218 int j, k, count;
219 dictEntry *samples[server.maxmemory_samples];
220 // 进行采样,采样得到一组 robj 列表
samples​
221 count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
222 for (j = 0; j < count; j++) {
223 unsigned long long idle; //idle 值相对于三种算法相当于一个结果打分 ​
224 sds key;
225 robj *o;
226 dictEntry *de;
227
228 de = samples[j];
229 key = dictGetKey(de);
230
231 /* If the dictionary we are sampling from is not the main
232 * dictionary (but the expires one) we need to lookup the key
233 * again in the key dictionary to obtain the value object. */
234 if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
235 if (sampledict != keydict) de = dictFind(keydict, key);
236 o = dictGetVal(de);
237 }
238
239 /* 根据 值或 值或 值计算每一个
lru lfu ttl 的
robj idle */​
240 if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
241 idle = estimateObjectIdleTime(o);
242 } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
243 /* When we use an LRU policy, we sort the keys by idle time
244 * so that we expire keys starting from greater idle time.
245 * However when the policy is an LFU one, we have a frequency
246 * estimation, and we want to evict keys with lower frequency
247 * first. So inside the pool we put objects using the inverted
248 * frequency subtracting the actual frequency to the maximum
249 * frequency of 255. */
250 idle = 255-LFUDecrAndReturn(o);
251 } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
252 /* In this case the sooner the expire the better. */
253 idle = ULLONG_MAX - (long)dictGetVal(de);
254 } else {
255 serverPanic("Unknown eviction policy in evictionPoolPopulate()");
256 }
257
258 将key按idle值从小到大排列(也就是按lru差值越大的,lfu count值越小的, ttl值越
/*
小的排的越往后),并始终保持EVPOOL_SIZE个key */​
259 k = 0;
260 while (k < EVPOOL_SIZE &&
261 pool[k].key &&
262 pool[k].idle < idle) k++;
263 if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
264 /* Can't insert if the element is < the worst element we have
265 * and there are no empty buckets. */
266 continue;
267 } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
268 /* Inserting into empty position. No setup needed before insert. */
269 } else {
270 /* Inserting in the middle. Now k points to the first element
271 * greater than the element to insert. */
272 if (pool[EVPOOL_SIZE-1].key == NULL) {
273 /* Free space on the right? Insert at k shifting
274 * all the elements from k to end to the right. */
275
276 /* Save SDS before overwriting. */
277 sds cached = pool[EVPOOL_SIZE-1].cached;
278 memmove(pool+k+1,pool+k,
279 sizeof(pool[0])*(EVPOOL_SIZE-k-1));
280 pool[k].cached = cached;
281 } else {
282 /* No free space on right? Insert at k-1 */
283 k--;
284 /* Shift all elements on the left of k (included) to the
285 * left, so we discard the element with smaller idle time. */
286 sds cached = pool[0].cached; /* Save SDS before overwriting. */
287 if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
288 memmove(pool,pool+1,sizeof(pool[0])*k);
289 pool[k].cached = cached;
290 }
291 }
292
293 /* Try to reuse the cached SDS string allocated in the pool entry,
294 * because allocating and deallocating this object is costly
295 * (according to the profiler, not my fantasy. Remember:
296 * premature optimizbla bla bla bla. */
297 int klen = sdslen(key);
298 if (klen > EVPOOL_CACHED_SDS_SIZE) {
299 pool[k].key = sdsdup(key);
300 } else {
301 memcpy(pool[k].cached,key,klen+1);
302 sdssetlen(pool[k].cached,klen);
303 pool[k].key = pool[k].cached;
304 }
305 pool[k].idle = idle;
306 pool[k].dbid = dbid;
307 }
308 }

LRU与LFU​
redis中为了节省内存,在robj对象中使用了一个lru的值来标识lru算法最后访问时间的或lfu算法的使
用频率,一个只有24位的值是如何标识这些数据的。​


LRU​
lru算法的值比较简单,redis中维护了一个lru时钟,lru的值为当前秒级的时间的低24位,计算lru
差值则是使用当前lru时钟 - robj中lru值,不足时借一个最大的lru时钟值,lru值是相对的。​
1 //LRU_CLOCK_RESOLUTION 可以理解为精度。​
2 #define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
3
4 unsigned int getLRUClock(void) {
5 return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
6 }
7 // 如果在精度范围内,直接使用server.lruclock的值,减少mstime系统调用和计算。这个值是在
redis 定时任务中定时维护的​
8 unsigned int LRU_CLOCK(void) {
9 unsigned int lruclock;
10 if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
11 atomicGet(server.lruclock,lruclock);
12 } else {
13 lruclock = getLRUClock();
14 }
15 return lruclock;
16 }

17 // evictionPoolPopulate 函数中计算idle值的函数​
18 unsigned long long estimateObjectIdleTime(robj *o) {
19 unsigned long long lruclock = LRU_CLOCK();
20 if (lruclock >= o->lru) {
21 return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
22 } else {
23 return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
24 LRU_CLOCK_RESOLUTION;
25 }
26 }


LFU​
lfu算法是为了解决lru算法的如下缺陷:​
1 ~~~~~A~~~~~A~~~~~A~~~~A~~~~~A~~~~~A~~|
2 ~~B~~B~~B~~B~~B~~B~~B~~B~~B~~B~~B~~B~|
3 ~~~~~~~~~~C~~~~~~~~~C~~~~~~~~~C~~~~~~|
4 ~~~~~D~~~~~~~~~~D~~~~~~~~~D~~~~~~~~~D|

使用lru算法数据D可能会被误认为将来最有可能访问到的数据,其实D的使用频率并不高缺有可能被
保留下来,淘汰算法的本意是保留那些将来最有可能被再次访问的数据,而lru算法只是预测最近被
访问的数据将来最有可能被访问到。lfu算法(Least Frequently Used)则是最频繁被访问的数据将
来最有可能被访问到。
redis中lfu算法维护了两个值。分别为计数器counter和最近一次计数器下降的时间last_decr_time​
1 16 bits 8 bits
2 +----------------+--------+
3 + Last decr time | LOG_C |
4 +----------------+--------+

高16 bits用来记录最近一次计数器降低的时间last_decr_time,单位是分钟,低8 bits记录计数器数


值counter。​
redis中有两个参数可以调制lru的值​
1 lfu-log-factor 10
2 lfu-decay-time 1

lfu-log-factor 可以调整计数器 counter 的增长速度, lfu-log-factor 越大,


counter 增长的越慢。

lfu-decay-time 是一个以分钟为单位的数值,可以调整 counter 的减少速度。

在上面修改robj lru属性的代码中使用了更新lfu的函数,代码如下​
1 void updateLFU(robj *val) {
2 //先根据 对
last_decr_time counter 做一次下降​
3 unsigned long counter = LFUDecrAndReturn(val);
4 //再对counter做增加操作​
5 counter = LFULogIncr(counter);
6
7 val->lru = (LFUGetTimeInMinutes()<<8) | counter;
8 }
9
10 unsigned long LFUDecrAndReturn(robj *o) {
11 unsigned long ldt = o->lru >> 8;
12 unsigned long counter = o->lru & 255;
13 unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) /
server.lfu_decay_time : 0; //counter 下降多少依赖于配置中的lfu-decay-time,lfu-
decay-time 越大,下降的越慢 ​
14 if (num_periods)
15 counter = (num_periods > counter) ? 0 : counter - num_periods;
16 return counter;
17 }
18
19 // 计算当前时间与last_decr_time的差值​
20 unsigned long LFUTimeElapsed(unsigned long ldt) {
21 unsigned long now = LFUGetTimeInMinutes();
22 if (now >= ldt) return now-ldt;
23 return 65535-ldt+now;
24 }
25
26 // 获取当前时间 单位分钟
, ​
27 unsigned long LFUGetTimeInMinutes(void) {
28 return (server.unixtime/60) & 65535;
29 }
30
31 //counter 计数增加​
32 uint8_t LFULogIncr(uint8_t counter) {
33 if (counter == 255) return 255;
34 double r = (double)rand()/RAND_MAX;
35 double baseval = counter - LFU_INIT_VAL;
36 if (baseval < 0) baseval = 0;
37 double p = 1.0/(baseval*server.lfu_log_factor+1);
38 if (r < p) counter++;
39 return counter;
40 }
41 //counter 并不是简单的访问一次就+1,而是采用了一个0-1之间的p因子控制增长。counter最大值为
255 。取一个 之间的随机数r与p比较,当r<p时,才增加counter​
0-1
42 //p取决于当前counter值与lfu_log_factor因子,类似于做平滑处理。counter值与
lfu_log_factor因子越大,p越小,r<p的概率也越小,counter增长的概率也就越小。​
43 //增长情况如下:​
44 +--------+------------+------------+------------+------------+------------+ |
factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits |
45 +--------+------------+------------+------------+------------+------------+
46 | 0 | 104 | 255 | 255 | 255 | 255 |
47 +--------+------------+------------+------------+------------+------------+ |
1 | 18 | 49 | 255 | 255 | 255 |
48 +--------+------------+------------+------------+------------+------------+
49 | 10 | 10 | 18 | 142 | 255 | 255 |
50 +--------+------------+------------+------------+------------+------------+ |
100 | 8 | 11 | 49 | 143 | 255 |
51 +--------+------------+------------+------------+------------+------------+
52
53 //最终在 evictionPoolPopulate函数中计算idle值​
54 idle = 255 - LFUDecrAndReturn(o);

You might also like