android裏面kernel-4.4和kernel4.9中關於memory zone watermark計算方法的變動

今天看到一個有趣的地方就是在計算memory zone的watermark值的變化:

在kernel-4.4中:

static void __setup_per_zone_wmarks(void)
{
	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);------------(1)
	unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);----------(2)
	unsigned long lowmem_pages = 0;
	struct zone *zone;
	unsigned long flags;

	/* Calculate total number of !ZONE_HIGHMEM pages */
	for_each_zone(zone) {
		if (IS_ZONE_MOVABLE_CMA_ZONE(zone))
			continue;
		if (!is_highmem(zone))
			lowmem_pages += zone->managed_pages;----------------(3)
	}

	for_each_zone(zone) {
		u64 min, low;

		spin_lock_irqsave(&zone->lock, flags);
		min = (u64)pages_min * zone->managed_pages;
		do_div(min, lowmem_pages);------------------------(4)
		low = (u64)pages_low * zone->managed_pages;
		if (IS_ENABLED(CONFIG_ZONE_MOVABLE_CMA))
			do_div(low, lowmem_pages);-----------------(5)
		else
			do_div(low, vm_total_pages);

		if (is_highmem(zone)) {
			/*
			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
			 * need highmem pages, so cap pages_min to a small
			 * value here.
			 *
			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
			 * deltas control asynch page reclaim, and so should
			 * not be capped for highmem.
			 */
			unsigned long min_pages;

			min_pages = zone->managed_pages / 1024;
			min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
			zone->watermark[WMARK_MIN] = min_pages;-------------(6)
		} else {
			/*
			 * If it's a lowmem zone, reserve a number of pages
			 * proportionate to the zone's size.
			 */
			zone->watermark[WMARK_MIN] = min;-------------------(7)
		}

		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) +
					low + (min >> 2);------------------(8)
		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
					low + (min >> 1);------------------(9)

		__mod_zone_page_state(zone, NR_ALLOC_BATCH,
			high_wmark_pages(zone) - low_wmark_pages(zone) -
			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));

		spin_unlock_irqrestore(&zone->lock, flags);
	}

	/* update totalreserve_pages */
	calculate_totalreserve_pages();
}

(1)將節點/proc/sys/vm/min_free_kbytes的值左移2位得到pages_min

(2)將節點/proc/sys/vm/extra_free_kbytes的值左移2位得到pages_low

(3)將所有非highmem zone的managed pages加起來

(4)一個簡單計算,min爲pages_min*(當前zone的managed pages)/ 系統出去highmem zone的總的managed pages

(5)一個簡單計算,low爲pages_low*(當前zone的managed pages)/ 系統出去highmem zone的總的managed pages

(6)如果是high mem,則取一個從SWAP_CLUSTER_MAX到128之間的值作爲watermark[min]的值。

(7)如果不是highmem,則採用當前zone的watermark[min]等於第四步計算出來的min。

(8)當前zone的watermark[low]=watermark[min]+(第五步得出的low)+(第四步得出的min/4)

(9)當前zone的watermark[high]=watermark[min]+(第五步得出的low)+(第四步得出的min/2)

而在kernel-4.9中:

static void __setup_per_zone_wmarks(void)
{
	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
	unsigned long lowmem_pages = 0;
	struct zone *zone;
	unsigned long flags;

	/* Calculate total number of !ZONE_HIGHMEM pages */
	for_each_zone(zone) {
		/* Don't consider ZMC zone to avoid small watermark */
		if (IS_ZONE_MOVABLE_CMA_ZONE(zone))
			continue;
		if (!is_highmem(zone))
			lowmem_pages += zone->managed_pages;
	}

	for_each_zone(zone) {
		u64 tmp;

		spin_lock_irqsave(&zone->lock, flags);
		tmp = (u64)pages_min * zone->managed_pages;
		do_div(tmp, lowmem_pages);---------------------(1)
		if (is_highmem(zone)) {
			/*
			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
			 * need highmem pages, so cap pages_min to a small
			 * value here.
			 *
			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
			 * deltas control asynch page reclaim, and so should
			 * not be capped for highmem.
			 */
			unsigned long min_pages;

			min_pages = zone->managed_pages / 1024;
			min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
			zone->watermark[WMARK_MIN] = min_pages;
		} else {
			/*
			 * If it's a lowmem zone, reserve a number of pages
			 * proportionate to the zone's size.
			 */
			zone->watermark[WMARK_MIN] = tmp;
		}

		/*
		 * Set the kswapd watermarks distance according to the
		 * scale factor in proportion to available memory, but
		 * ensure a minimum size on small systems.
		 */
		tmp = max_t(u64, tmp >> 2,
			    mult_frac(zone->managed_pages,
				      watermark_scale_factor, 10000));-----------(2)

		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;------(3)

		spin_unlock_irqrestore(&zone->lock, flags);
	}

	/* update totalreserve_pages */
	calculate_totalreserve_pages();
}

(1)非highmem zone的watermark min的值,highmem zone watermark min計算和4.4一致

(2)取以第一步計算結果的四分之一爲最小值的zone managed pages的百分比爲結果

(3)zone watermark low爲watermark min+第二步計算結果,zone watermark high爲watermark min+第二步計算結果*2

和4.4比較差異如下:

zone watermark[low]/[high]的值不再和extra_free_kbytes相關,而採用按照zone managed pages的百分比+ watermark[min]的方式來調節,百分比主要和watermark_scale_factor相關,這個值是百分比的分子,分母爲10000,默認爲10,即百分比爲1/1000,tmp的結果爲zone managed pages/1000

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章