DotNet源码学习-HASHSET(初探)

命名空间:System.Collections.Generic

先看一下官方说明:类提供了高级的设置操作。集是不包含重复元素的集合,其元素无特定顺序 

HashSet <T>对象的容量是对象可以容纳的元素数。当向对象添加元素时,HashSet <T>对象的容量会自动增加。

HashSet<String> hashSet = new HashSet<string>();
hashSet.Add("test");
hashSet.Add("test");
Console.WriteLine(hashSet.Count);

打印结果:1

HashSet的默认构造方法:

public HashSet()
            : this((IEqualityComparer<T>?)null)
        { }

注:: this语法为调用自身对象的其他构造方法。

public HashSet(IEqualityComparer<T>? comparer)
{
     if (comparer == EqualityComparer<T>.Default)
     {
          comparer = null;
     }
     _comparer = comparer;
     _lastIndex = 0;
     _count = 0;
     _freeList = -1;
     _version = 0;
}

第二中创建方式,将集合作为参数。

List<string> list = new List<string>();
list.Add("test");
list.Add("test");
HashSet<string> hSet = new HashSet<string>(list);
Console.WriteLine(hSet.Count);

此时控台输出:1

此时调用的构造方法为:

public HashSet(IEnumerable<T> collection, IEqualityComparer<T>? comparer)
            : this(comparer)
{
    if (collection == null)
    {
        throw new ArgumentNullException(nameof(collection));
    }
    var otherAsHashSet = collection as HashSet<T>;
    if (otherAsHashSet != null && AreEqualityComparersEqual(this, otherAsHashSet))
    {
        CopyFrom(otherAsHashSet);
    }
    else
    {
        // to avoid excess resizes, first set size based on collection‘s count. Collection
        // may contain duplicates, so call TrimExcess if resulting hashset is larger than
        // threshold
        ICollection<T>? coll = collection as ICollection<T>;
        int suggestedCapacity = coll == null ? 0 : coll.Count;
        Initialize(suggestedCapacity);
        UnionWith(collection);
        if (_count > 0 && _slots.Length / _count > ShrinkThreshold)
        {
            TrimExcess();
        }
    }
}

在该构造方法中若存在重复值则通过查找大于或等于容量的下一个质数来使用建议的容量。

private int Initialize(int capacity)
{
    Debug.Assert(_buckets == null, "Initialize was called but _buckets was non-null");
    int size = HashHelpers.GetPrime(capacity);
    _buckets = new int[size];
    _slots = new Slot[size];
    return size;
}

下面为生成质数方法:

public static int GetPrime(int min)
{
    if (min < 0)
        throw new ArgumentException(SR.Arg_HTCapacityOverflow);
    foreach (int prime in s_primes)
    {
        if (prime >= min)
            return prime;
    }
    // Outside of our predefined table. Compute the hard way.
    for (int i = (min | 1); i < int.MaxValue; i += 2)
    {
        if (IsPrime(i) && ((i - 1) % HashPrime != 0))
            return i;
    }
    return min;
}

再次扩展-》

public static bool IsPrime(int candidate)
{
  if ((candidate & 1) != 0)
  {
    int limit = (int)Math.Sqrt(candidate);//取平方
    for (int divisor = 3; divisor <= limit; divisor += 2)
    {
      if ((candidate % divisor) == 0)
        return false;
    }
    return true;
  }
  return candidate == 2;
}
internal struct Slot
{
  internal int hashCode;      // Lower 31 bits of hash code, -1 if unused
  internal int next;          // Index of next entry, -1 if last
  internal T value;
}

存储LIst集合:

public void UnionWith(IEnumerable<T> other)
{
  if (other == null)
  {
    throw new ArgumentNullException(nameof(other));
  }
  foreach (T item in other)
  {
    AddIfNotPresent(item);
  }
}

继续往下追踪:

private bool AddIfNotPresent(T value)
{
  if (_buckets == null)
  {
    Initialize(0);
  }

  int hashCode;
  int bucket;
  int collisionCount = 0;
  Slot[] slots = _slots;

  IEqualityComparer<T>? comparer = _comparer;

  if (comparer == null)
  {
  //取HASHCODE
    hashCode = value == null ? 0 : InternalGetHashCode(value.GetHashCode());
    bucket = hashCode % _buckets!.Length;

    if (default(T)! != null) // TODO-NULLABLE: default(T) == null warning (https://github.com/dotnet/roslyn/issues/34757)
    {
      for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)
      {
        if (slots[i].hashCode == hashCode && EqualityComparer<T>.Default.Equals(slots[i].value, value))
        {
          return false;
        }

        if (collisionCount >= slots.Length)
        {
          // The chain of entries forms a loop, which means a concurrent update has happened.
          throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);
        }
        collisionCount++;
      }
    }
    else
    {
      // Object type: Shared Generic, EqualityComparer<TValue>.Default won‘t devirtualize
      // https://github.com/dotnet/coreclr/issues/17273
      // So cache in a local rather than get EqualityComparer per loop iteration
      EqualityComparer<T> defaultComparer = EqualityComparer<T>.Default;

      for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)
      {
        if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value, value))
        {
          return false;
        }

        if (collisionCount >= slots.Length)
        {
          // The chain of entries forms a loop, which means a concurrent update has happened.
          throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);
        }
        collisionCount++;
      }
    }
  }
  else
  {
    hashCode = value == null ? 0 : InternalGetHashCode(comparer.GetHashCode(value));
    bucket = hashCode % _buckets!.Length;

    for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)
    {
      if (slots[i].hashCode == hashCode && comparer.Equals(slots[i].value, value))
      {
        return false;
      }

      if (collisionCount >= slots.Length)
      {
        // The chain of entries forms a loop, which means a concurrent update has happened.
        throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);
      }
      collisionCount++;
    }
  }

  int index;
  if (_freeList >= 0)
  {
    index = _freeList;
    _freeList = slots[index].next;
  }
  else
  {
    if (_lastIndex == slots.Length)
    {
      IncreaseCapacity();
      // this will change during resize
      slots = _slots;
      bucket = hashCode % _buckets.Length;
    }
    index = _lastIndex;
    _lastIndex++;
  }
  slots[index].hashCode = hashCode;
  slots[index].value = value;
  slots[index].next = _buckets[bucket] - 1;
  _buckets[bucket] = index + 1;
  _count++;
  _version++;

  return true;
}
private const int Lower31BitMask = 0x7FFFFFFF;

获取内部的HASHCODE

private static int InternalGetHashCode(T item, IEqualityComparer<T>? comparer)
{
  if (item == null)
  {
    return 0;
  }
  int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode();
  return hashCode & Lower31BitMask;
}

划重点-》

slots[index].hashCode = hashCode;
slots[index].value = value;
slots[index].next = _buckets[bucket] - 1;

最终列表中的值存储到结构中。

使用对象初始化HASHSET时,如果相同

HashSet<string> hashSet = new HashSet<string>();
 hashSet.Add("test");
 hashSet.Add("test");
 HashSet<string> hSet = new HashSet<string>(hashSet);
private void CopyFrom(HashSet<T> source)
{
  int count = source._count;
  if (count == 0)
  {
    // As well as short-circuiting on the rest of the work done,
    // this avoids errors from trying to access otherAsHashSet._buckets
    // or otherAsHashSet._slots when they aren‘t initialized.
    return;
  }

  int capacity = source._buckets!.Length;
  int threshold = HashHelpers.ExpandPrime(count + 1);

  if (threshold >= capacity)
  {
    _buckets = (int[])source._buckets.Clone();
    _slots = (Slot[])source._slots.Clone();

    _lastIndex = source._lastIndex;
    _freeList = source._freeList;
  }
  else
  {
    int lastIndex = source._lastIndex;
    Slot[] slots = source._slots;
    Initialize(count);
    int index = 0;
    for (int i = 0; i < lastIndex; ++i)
    {
      int hashCode = slots[i].hashCode;
      if (hashCode >= 0)
      {
        AddValue(index, hashCode, slots[i].value);
        ++index;
      }
    }
    Debug.Assert(index == count);
    _lastIndex = index;
  }
  _count = count;
}
public static int ExpandPrime(int oldSize)//返回要增长到的哈希表大小
{
  int newSize = 2 * oldSize;

  // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
  // Note that this check works even when _items.Length overflowed thanks to the (uint) cast
  if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
  {
    Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
    return MaxPrimeArrayLength;
  }

  return GetPrime(newSize);
}

这里只贴出HashSet声明创建对象的两种方式。

下篇再研究具体实现?

DotNet源码学习-HASHSET(初探)

相关推荐