今天看到一個有趣的地方就是在計算memory zone的watermark值的變化:
在kernel-4.4中:
static void __setup_per_zone_wmarks(void)
{
unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);------------(1)
unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);----------(2)
unsigned long lowmem_pages = 0;
struct zone *zone;
unsigned long flags;
/* Calculate total number of !ZONE_HIGHMEM pages */
for_each_zone(zone) {
if (IS_ZONE_MOVABLE_CMA_ZONE(zone))
continue;
if (!is_highmem(zone))
lowmem_pages += zone->managed_pages;----------------(3)
}
for_each_zone(zone) {
u64 min, low;
spin_lock_irqsave(&zone->lock, flags);
min = (u64)pages_min * zone->managed_pages;
do_div(min, lowmem_pages);------------------------(4)
low = (u64)pages_low * zone->managed_pages;
if (IS_ENABLED(CONFIG_ZONE_MOVABLE_CMA))
do_div(low, lowmem_pages);-----------------(5)
else
do_div(low, vm_total_pages);
if (is_highmem(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
* need highmem pages, so cap pages_min to a small
* value here.
*
* The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
* deltas control asynch page reclaim, and so should
* not be capped for highmem.
*/
unsigned long min_pages;
min_pages = zone->managed_pages / 1024;
min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
zone->watermark[WMARK_MIN] = min_pages;-------------(6)
} else {
/*
* If it's a lowmem zone, reserve a number of pages
* proportionate to the zone's size.
*/
zone->watermark[WMARK_MIN] = min;-------------------(7)
}
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) +
low + (min >> 2);------------------(8)
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
low + (min >> 1);------------------(9)
__mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
spin_unlock_irqrestore(&zone->lock, flags);
}
/* update totalreserve_pages */
calculate_totalreserve_pages();
}
(1)將節點/proc/sys/vm/min_free_kbytes的值左移2位得到pages_min
(2)將節點/proc/sys/vm/extra_free_kbytes的值左移2位得到pages_low
(3)將所有非highmem zone的managed pages加起來
(4)一個簡單計算,min爲pages_min*(當前zone的managed pages)/ 系統出去highmem zone的總的managed pages
(5)一個簡單計算,low爲pages_low*(當前zone的managed pages)/ 系統出去highmem zone的總的managed pages
(6)如果是high mem,則取一個從SWAP_CLUSTER_MAX到128之間的值作爲watermark[min]的值。
(7)如果不是highmem,則採用當前zone的watermark[min]等於第四步計算出來的min。
(8)當前zone的watermark[low]=watermark[min]+(第五步得出的low)+(第四步得出的min/4)
(9)當前zone的watermark[high]=watermark[min]+(第五步得出的low)+(第四步得出的min/2)
而在kernel-4.9中:
static void __setup_per_zone_wmarks(void)
{
unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
unsigned long lowmem_pages = 0;
struct zone *zone;
unsigned long flags;
/* Calculate total number of !ZONE_HIGHMEM pages */
for_each_zone(zone) {
/* Don't consider ZMC zone to avoid small watermark */
if (IS_ZONE_MOVABLE_CMA_ZONE(zone))
continue;
if (!is_highmem(zone))
lowmem_pages += zone->managed_pages;
}
for_each_zone(zone) {
u64 tmp;
spin_lock_irqsave(&zone->lock, flags);
tmp = (u64)pages_min * zone->managed_pages;
do_div(tmp, lowmem_pages);---------------------(1)
if (is_highmem(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
* need highmem pages, so cap pages_min to a small
* value here.
*
* The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
* deltas control asynch page reclaim, and so should
* not be capped for highmem.
*/
unsigned long min_pages;
min_pages = zone->managed_pages / 1024;
min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
zone->watermark[WMARK_MIN] = min_pages;
} else {
/*
* If it's a lowmem zone, reserve a number of pages
* proportionate to the zone's size.
*/
zone->watermark[WMARK_MIN] = tmp;
}
/*
* Set the kswapd watermarks distance according to the
* scale factor in proportion to available memory, but
* ensure a minimum size on small systems.
*/
tmp = max_t(u64, tmp >> 2,
mult_frac(zone->managed_pages,
watermark_scale_factor, 10000));-----------(2)
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;------(3)
spin_unlock_irqrestore(&zone->lock, flags);
}
/* update totalreserve_pages */
calculate_totalreserve_pages();
}
(1)非highmem zone的watermark min的值,highmem zone watermark min計算和4.4一致
(2)取以第一步計算結果的四分之一爲最小值的zone managed pages的百分比爲結果
(3)zone watermark low爲watermark min+第二步計算結果,zone watermark high爲watermark min+第二步計算結果*2
和4.4比較差異如下:
zone watermark[low]/[high]的值不再和extra_free_kbytes相關,而採用按照zone managed pages的百分比+ watermark[min]的方式來調節,百分比主要和watermark_scale_factor相關,這個值是百分比的分子,分母爲10000,默認爲10,即百分比爲1/1000,tmp的結果爲zone managed pages/1000