Wednesday, July 8, 2009

Analysis of shrink_slab function in mm/vmscan.c

The code snippet is referenced from 2.6.31-rc2

184 #define SHRINK_BATCH 128
185 /*
186 * Call the shrink functions to age shrinkable caches
187 *
188 * Here we assume it costs one seek to replace a lru page and that it also
189 * takes a seek to recreate a cache object. With this in mind we age equal
190 * percentages of the lru and ageable caches. This should balance the seeks
191 * generated by these structures.
192 *
193 * If the vm encountered mapped pages on the LRU it increase the pressure on
194 * slab to avoid swapping.
195 *
196 * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
197 *
198 * `lru_pages' represents the number of on-LRU pages in all the zones which
199 * are eligible for the caller's allocation attempt. It is used for balancing
200 * slab reclaim versus page reclaim.
201 *
202 * Returns the number of slab objects which we shrunk.
203 */
204 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
205 unsigned long lru_pages)
206 {
207 struct shrinker *shrinker;
208 unsigned long ret = 0;
209
210 if (scanned == 0)
211 scanned = SWAP_CLUSTER_MAX;
212
213 if (!down_read_trylock(&shrinker_rwsem))
214 return 1; /* Assume we'll be able to shrink next time */
215
216 list_for_each_entry(shrinker, &shrinker_list, list) {
217 unsigned long long delta;
218 unsigned long total_scan;
219 unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
220
221 delta = (4 * scanned) / shrinker->seeks;
222 delta *= max_pass;
223 do_div(delta, lru_pages + 1);
224 shrinker->nr += delta;
225 if (shrinker->nr < nr="%ld\n">shrink, shrinker->nr);
229 shrinker->nr = max_pass;
230 }
231
232 /*
233 * Avoid risking looping forever due to too large nr value:
234 * never try to free more than twice the estimate number of
235 * freeable entries.
236 */
237 if (shrinker->nr > max_pass * 2)
238 shrinker->nr = max_pass * 2;
239
240 total_scan = shrinker->nr;
241 shrinker->nr = 0;
242
243 while (total_scan >= SHRINK_BATCH) {
244 long this_scan = SHRINK_BATCH;
245 int shrink_ret;
246 int nr_before;
247
248 nr_before = (*shrinker->shrink)(0, gfp_mask);
249 shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
250 if (shrink_ret == -1)
251 break;
252 if (shrink_ret <>nr += total_scan;
261 }
262 up_read(&shrinker_rwsem);
263 return ret;
264 }



Line 204: shrink_slab gets called multiple places, with cscope ctrl+\+c, we get:


1 61 fs/drop_caches.c <>
nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
2 1697 mm/vmscan.c <>
shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
3 1937 mm/vmscan.c <>
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
4 2193 mm/vmscan.c <>
shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
5 2229 mm/vmscan.c <>
shrink_slab(sc.nr_scanned, sc.gfp_mask,
6 2247 mm/vmscan.c <>
shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
7 2454 mm/vmscan.c <<__zone_reclaim>>
while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&

Tracing back to the calling function, We will see that scanned parameter refer to the scanned LRU pages,
sc->nr_scanned, lru_pages refer to total LRU pages in zones.

Line 216 - 261 loop through shrinker list to shrink slab caches
Line 219 get the maximum shrink cache sizes, See include/linux/mm.h

862 /*
863 * A callback you can register to apply pressure to ageable caches.
864 *
865 * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should
866 * look through the least-recently-used 'nr_to_scan' entries and
867 * attempt to free them up. It should return the number of objects
868 * which remain in the cache. If it returns -1, it means it cannot do
869 * any scanning at this time (eg. there is a risk of deadlock).
870 *
871 * The 'gfpmask' refers to the allocation we are currently trying to
872 * fulfil.
873 *
874 * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
875 * querying the cache size, so a fastpath for that case is appropriate.
876 */
877 struct shrinker {
878 int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
879 int seeks; /* seeks to recreate an obj */
880
881 /* These are for internal use */
882 struct list_head list;
883 long nr; /* objs pending delete */
884 };


Line 221 - 224 get the pending shrink object numbers, It maches the code comment above about the
"age equal percentages of the lru and ageable caches"

Line 243 - 258 do batch of SHRINK_BATCH scanning and accumulating shrinked objects to ret variable

Line 263 return the shrinked slab cache objects

No comments:

Post a Comment

Followers