HBase LruBlockCache源碼分析

HBase LruBlockCache源碼分析

本章對LruBlockCache源碼進行簡單部分和講解,其中包括如下部分:

  • 構造函數
  • getBlock
  • cacheBlock
  • evict

構造函數說明

LruBlockCache 構造函數

LruBlockCache 構造函數

從構造函數可以看出,整體有一下幾個部分組成

  1. LruBlockCache 內存比例

LruBlockCache 內存比例

  1. 統計參數
this.stats = new CacheStats(this.getClass().getSimpleName());

// 一下配合stats進行必要統計
this.count = new AtomicLong(0);
this.elements = new AtomicLong(0);
this.dataBlockElements = new AtomicLong(0);
this.dataBlockSize = new AtomicLong(0);
this.size = new AtomicLong(this.overhead);

存儲容器

new ConcurrentHashMap<BlockCacheKey, LruCachedBlock>(mapInitialSize, 
		mapLoadFactor, mapConcurrencyLevel);

LruBlockCache EvictionThread (clean/10s | clean/notify)

if(evictionThread) {
	this.evictionThread = new EvictionThread(this);
	this.evictionThread.start();
} else {
	this.evictionThread = null;
}

@Override
public void run() {
	enteringRun = true;
	while (this.go) {
	synchronized(this) {
		try {
			this.wait(1000 * 10/*Don't wait for ever*/);
		} catch(InterruptedException e) {
			LOG.warn("Interrupted eviction thread ", e);
			Thread.currentThread().interrupt();
		}
	}
	LruBlockCache cache = this.cache.get();
	if (cache == null) break;
		cache.evict();
	}
}

定時日誌 – print log/5m (default)

this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
        statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);

public void logStats() {
    // Log size
    long totalSize = heapSize();
    long freeSize = maxSize - totalSize;
    LruBlockCache.LOG.info("totalSize=" + StringUtils.byteDesc(totalSize) + ", " +
        "freeSize=" + StringUtils.byteDesc(freeSize) + ", " +
        "max=" + StringUtils.byteDesc(this.maxSize) + ", " +
        "blockCount=" + getBlockCount() + ", " +
        "accesses=" + stats.getRequestCount() + ", " +
        "hits=" + stats.getHitCount() + ", " +
        "hitRatio=" + (stats.getHitCount() == 0 ?
          "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
        "cachingAccesses=" + stats.getRequestCachingCount() + ", " +
        "cachingHits=" + stats.getHitCachingCount() + ", " +
        "cachingHitsRatio=" + (stats.getHitCachingCount() == 0 ?
          "0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
        "evictions=" + stats.getEvictionCount() + ", " +
        "evicted=" + stats.getEvictedCount() + ", " +
        "evictedPerRun=" + stats.evictedPerEviction());
  }

getBlock分析

public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
      boolean updateCacheMetrics) {
	LruCachedBlock cb = map.get(cacheKey);
	if (cb == null) {
		if (!repeat && updateCacheMetrics) {
			// update CacheStats的統計信息
			stats.miss(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
		}

		// victimHandler -- 表示【無辜者】,即哪些不應該被清理,但是被清理掉的Entry 將會轉移到victim中
		if (victimHandler != null && !repeat) {
			// 嘗試從victimHandler中獲取Entry
			Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);

			// 重新將victim中的Entry緩存到當前cache中
			if (result != null && caching) {
				cacheBlock(cacheKey, result, false, true);
			}
			return result;
		}
		return null;
	}

	// 更新各統計指標
	if (updateCacheMetrics) stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
	cb.access(count.incrementAndGet());
	
	return cb.getBuffer();
}

cacheBlock分析

public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
      final boolean cacheDataInL1) {

	if (buf.heapSize() > maxBlockSize) {
	    // 更新stats,並輸出warn日誌
	    return;
	}

	LruCachedBlock cb = map.get(cacheKey);
	if (cb != null) { // 檢測該key是否已經存在
		// 檢測key對應value 與當前需要緩存的buf 是否相等
	    if (BlockCacheUtil.compareCacheBlock(buf, cb.getBuffer()) != 0) {
	    	// 不相等,拋出異常
	        throw new RuntimeException("Cached block contents differ, which should not have happened."
				  + "cacheKey:" + cacheKey);
	    }
	    // 輸出warn日誌
	    return;
	}

	long currentSize = size.get();
	long currentAcceptableSize = acceptableSize();
	long hardLimitSize = (long) (hardCapacityLimitFactor * currentAcceptableSize);
	if (currentSize >= hardLimitSize) {
		stats.failInsert();

		// print trace log
		if (!evictionInProgress) {
			// 空間使用過多,進行必要清理
			runEviction();
		}
		return;
	}

	cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
	map.put(cacheKey, cb);

	// 更新各統計指標
	long newSize = updateSizeMetrics(cb, false);
	long val = elements.incrementAndGet();
	if (buf.getBlockType().isData()) {
		dataBlockElements.incrementAndGet();
	}
	
	if (newSize > currentAcceptableSize && !evictionInProgress) {
		runEviction(); // 清理空間
	}
}

evict分析

該邏輯較爲複雜,換種方式來說明一下

  • 閾值: 當bytesToFree = currentSize - minSize(); bytesToFree > 0;
  • 過程(正常情況下,不正常情況不以說明)(僞代碼):
    1. [single, multi, memory].map(new BlockBucket(_))
    2. cache.entrys.foreach(entry => findBucket(entry.blockType).add(entry))
    3. queue = new PriorityQueue; queue.add(buckets)
    4. queue.foreach(_.free)
    5. print log
注意點說明
  1. PriorityQueue: 可以翻譯成【優先隊列】,即添加至該隊列中的對象 將會被排序
public int compareTo(BlockBucket that) {
	return Long.compare(this.overflow(), that.overflow()); // BlockBucket的排序邏輯
}
  1. BlockBucket構造函數
public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
	this.name = name; // single | multi | memory
	this.bucketSize = bucketSize; // (long)Math.floor(this.maxSize * this.[single|multi|memory]Factor * this.minFactor)
	queue = new LruCachedBlockQueue(bytesToFree, blockSize); // 被選擇出來 要被清理掉的對象 存儲在這裏
	totalSize = 0; // 實際上最終將表示爲: 當前cache中 該類型(name)緩存的整體大小
}

public void add(LruCachedBlock block) {
	// 添加對象時,更新totalSize大小(只增不減,即使對象不存在與queue中【這句話 後面解釋】)
	totalSize += block.heapSize(); 
	queue.add(block); // 將對象添加至queue中,會進行必要的【選擇】
}

public long overflow() {
	// 當前緩存中該類型(name)緩存所佔空間 與 該類型(name)緩存應占空間的差值,即溢出大小
	return totalSize - bucketSize; 
}
  1. LruCachedBlockQueue
/** LruCachedBlockQueue */
/** MinMaxPriorityQueue<LruCachedBlock> queue */
// LruCachedBlockQueue的add方法
public void add(LruCachedBlock cb) {
	if(heapSize < maxSize) {
		// 當 當前存儲的大小(heapSize) 小於 需要存儲的大小(maxSize,其實是bytesToFree,即需要回收的空間大小)
		queue.add(cb);
		heapSize += cb.heapSize();
		
	} else {
		// 當前存儲的大小(heapSize) 已經到達 需要存儲的大小(maxSize)
		LruCachedBlock head = queue.peek(); // 取出queue中 accessTime最大的
		if(cb.compareTo(head) > 0) { 
			// cb.accessTime < head.accessTime,將cb放入,head換出
			heapSize += cb.heapSize();
			heapSize -= head.heapSize();
			if(heapSize > maxSize) {
				queue.poll();
			} else {
				heapSize += head.heapSize();
			}
			queue.add(cb);
		}
	}
	// 如此一來,queue中只會存儲
	// 1. accessTime最小的那些對象
	// 2. 存儲的對象大小總和 將正好大於等於 需要清理的總和
}

/** LruCachedBlock */
@Override
public int compareTo(LruCachedBlock that) {
	if (this.accessTime == that.accessTime) return 0;
		return this.accessTime < that.accessTime ? 1 : -1; // 比較兩者的【被訪問次數】
}
注意點總結
  1. 循環將cache.entrys添加到BlockBucket中,最後BlockBucket中只會留下【被選擇清理的對象】
  2. 添加到PriorityQueue中的BlockBucket,會按照overflow的大小(實際存放量 與 min存放量之間的差值)進行排序,即將超出最多的bucket優先進行清理。同時會儘可能的保留更多的block:
	long bucketBytesToFree = Math.min(overflow,
		(bytesToFree - bytesFreed) / remainingBuckets);
	bytesFreed += bucket.free(bucketBytesToFree);

LruBlockCache簡易導圖

LruBlockCache整理

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章