HashSet<T> 源碼解析

支持泛型，基礎元素結構：

internal struct Slot {

internal int hashCode; // Lower 31 bits of hash code, -1 if unused

internal T value;

internal int next; // Index of next entry, -1 if last

}

其 add 方法如下：

public bool Add(T item) {

return AddIfNotPresent(item);

}

/// <summary>

/// Adds value to HashSet if not contained already

/// Returns true if added and false if already present

/// </summary>

/// <param name="value">value to find</param>

/// <returns></returns>

private bool AddIfNotPresent(T value) {

if (m_buckets == null) {

Initialize(0);

}

int hashCode = InternalGetHashCode(value);

int bucket = hashCode % m_buckets.Length;

#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_NETCORE

int collisionCount = 0;

#endif

for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next) {

if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, value)) {

return false;

}

#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_NETCORE

collisionCount++;

#endif

}

int index;

if (m_freeList >= 0) {

index = m_freeList;

m_freeList = m_slots[index].next;

}

else {

if (m_lastIndex == m_slots.Length) {

IncreaseCapacity();

// this will change during resize

bucket = hashCode % m_buckets.Length;

}

index = m_lastIndex;

m_lastIndex++;

}

m_slots[index].hashCode = hashCode;

m_slots[index].value = value;

m_slots[index].next = m_buckets[bucket] - 1;

m_buckets[bucket] = index + 1;

m_count++;

m_version++;

#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_NETCORE

if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(m_comparer)) {

m_comparer = (IEqualityComparer<T>) HashHelpers.GetRandomizedEqualityComparer(m_comparer);

SetCapacity(m_buckets.Length, true);

}

#endif // FEATURE_RANDOMIZED_STRING_HASHING

return true;

}

通過以上方法可得知，在HashSet中添加元素，首先要判斷其是否已存在，如存在返回false並終止添加。HashSet 可添加 null 進入集合。經源碼分析可發現 HashSet 的元素 HashCode 生成函數默認情況下取自 EqualityComparer<T>.Default; 這和 HashTable 和Dictionary 的HashCode生成函數默認情況下是相同的，其中 Dictionary 可以在構造方法中自定義比較函數。源碼中 HashCode是 Int 類型變量，如遇到超大數據量注意溢出問題。

HashSet 基礎數據存儲機構如下：

private int[] m_buckets;

private Slot[] m_slots;

private int m_count;

private int m_lastIndex;

private int m_freeList;

private IEqualityComparer<T> m_comparer;

private int m_version;

其添加元素的處理方式如下：

首先通過m_buckets 存儲 hashCode % m_buckets.Length 對應於元素在 m_slots 中的存儲位置，再通過遍歷slots 元素的next 指針形成的鏈表，如果發現hashCode 同鏈表中元素同添加元素計算出的hashCode 相同，並且調用eque方法比較相同的話返回false.否則執行添加操作。核心代碼如下：

for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next) {

if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, value)) {

return false;

}

執行添加操作處理方式如下：

分兩種情況進行處理，核心代碼如下：

int index;

if (m_freeList >= 0) {

index = m_freeList;

m_freeList = m_slots[index].next;

}

else {

if (m_lastIndex == m_slots.Length) {

IncreaseCapacity();

// this will change during resize

bucket = hashCode % m_buckets.Length;

}

index = m_lastIndex;

m_lastIndex++;

}

如上代碼， m_freeList 該指針指向下一個數組空閒的元素位置，當爲-1時代表已沒有空餘元素供賦值使用。結合Remove方法得出的結論是m_freeList 當有元素被刪除時，指向最後一個被刪除元素，被刪除元素實際上是value屬性被置爲該類型缺省的值，next 指向下一個被刪除元素，也可這樣理解所有被刪除的元素實際上形成一個以 m_freeList 爲頭指針的鏈表，移除一個元素即在該鏈表頭出添加一個元素，同理以上的插入邏輯就很好理解了，第一種情況 m_freeList >0 證明數組中 m_lastIndex 之前的元素中有被刪除的空元素存在，優先給這些元素賦值。第二種情況 m_freeList = -1 時，即之前被刪除元素已全被賦值完畢，此時通過判斷 m_lastIndex 該值指向最後一個被用元素的下一個，當其值等於數組長度時代表數組空間以被使用完畢，需要重新分配空間，故需要調用 IncreaseCapacity();函數重新申請空間，其重新申請的空間個數由如下的ExpandPrime 方法確定，會根據現有元素個數來決定從新申請的空間大小，從而避免多次重複申請空間造成的性能問題。從新分配空間採用數組複製的方式進行，具體分配操作由 SetCapacity 方法執行，有興趣的可參看下面方法。第二種情況如在m_lastIndex 仍指向可用的元素，也即分配的空間未被使用完，直接對其進行賦值。賦值操作比較簡單，不再贅述。

相關方法如下：

public bool Remove(T item) {

if (m_buckets != null) {

int hashCode = InternalGetHashCode(item);

int bucket = hashCode % m_buckets.Length;

int last = -1;

for (int i = m_buckets[bucket] - 1; i >= 0; last = i, i = m_slots[i].next) {

if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, item)) {

if (last < 0) {

// first iteration; update buckets

m_buckets[bucket] = m_slots[i].next + 1;

}

else {

// subsequent iterations; update 'next' pointers

m_slots[last].next = m_slots[i].next;

}

m_slots[i].hashCode = -1;

m_slots[i].value = default(T);

m_slots[i].next = m_freeList;

m_count--;

m_version++;

if (m_count == 0) {

m_lastIndex = 0;

m_freeList = -1;

}

else {

m_freeList = i;

}

return true;

}

// either m_buckets is null or wasn't found

return false;

}

// Returns size of hashtable to grow to.

public static int ExpandPrime(int oldSize)

{

int newSize = 2 * oldSize;

// Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.

// Note that this check works even when _items.Length overflowed thanks to the (uint) cast

if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)

{

Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");

return MaxPrimeArrayLength;

}

return GetPrime(newSize);

}

private void SetCapacity(int newSize, bool forceNewHashCodes) {

Contract.Assert(HashHelpers.IsPrime(newSize), "New size is not prime!");

Contract.Assert(m_buckets != null, "SetCapacity called on a set with no elements");

Slot[] newSlots = new Slot[newSize];

if (m_slots != null) {

Array.Copy(m_slots, 0, newSlots, 0, m_lastIndex);

}

if(forceNewHashCodes) {

for(int i = 0; i < m_lastIndex; i++) {

if(newSlots[i].hashCode != -1) {

newSlots[i].hashCode = InternalGetHashCode(newSlots[i].value);

}

int[] newBuckets = new int[newSize];

for (int i = 0; i < m_lastIndex; i++) {

int bucket = newSlots[i].hashCode % newSize;

newSlots[i].next = newBuckets[bucket] - 1;

newBuckets[bucket] = i + 1;

}

m_slots = newSlots;

m_buckets = newBuckets;

}

HashSet<T> 源碼解析

C++ 中char wchar wchar_t char16_t char32_t 以及中文編碼的問題

OpenCV Windows下的編譯安裝

正則表達式

Angularjs 1.x 中View模板文件瀏覽器緩存問題

nginx , auto/options 中代碼 CC=${CC:-cc} 的理解

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結