我试图了解C#的通用Dictionary的内部工作方式。我反编译并获得了字典代码。我重新编写了它,将键/值/哈希码略微分开到单独的数组(我省略了枚举器和接口实现,因为它们是不相关的)

using System;
using System.Linq;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;

public class Dictionary<TKey, TValue> {
    int[] _Buckets;
    int[] _HashCodes;
    int[] _Next;
    int _Count;
    int _FreeList;
    int _FreeCount;
    TKey[] _Keys;
    TValue[] _Values;

    readonly IEqualityComparer<TKey> _Comparer;

    public int Count {
        get { return _Count - _FreeCount; }
    }

    public TValue this[TKey key] {
        get {
            int index = FindIndex(key);
            if (index >= 0)
                return _Values[index];
            throw new KeyNotFoundException(key.ToString());
        }
        set { Insert(key, value, false); }
    }

    public Dictionary() : this(0, null) {
    }

    public Dictionary(int capacity) : this(capacity, null) {
    }

    public Dictionary(IEqualityComparer<TKey> comparer) : this(0, comparer) {
    }

    public Dictionary(int capacity, IEqualityComparer<TKey> comparer) {
        if (capacity < 0)
            throw new ArgumentOutOfRangeException("capacity");
        Initialize(capacity);
        _Comparer = (comparer ?? EqualityComparer<TKey>.Default);
    }

    public Dictionary(IDictionary<TKey, TValue> dictionary) : this(dictionary, null)
    {
    }

    public Dictionary(IDictionary<TKey, TValue> dictionary, IEqualityComparer<TKey> comparer)
        : this((dictionary != null) ? dictionary.Count : 0, comparer)
    {
        if (dictionary == null)
            throw new ArgumentNullException("dictionary");

        foreach (KeyValuePair<TKey, TValue> current in dictionary)
            Add(current.Key, current.Value);
    }

    public bool ContainsValue(TValue value) {
        if (value == null) {
            for (int i = 0; i < _Count; i++) {
                if (_HashCodes[i] >= 0 && _Values[i] == null)
                    return true;
            }
        } else {
            var defaultComparer = EqualityComparer<TValue>.Default;
            for (int i = 0; i < _Count; i++) {
                if (_HashCodes[i] >= 0 && defaultComparer.Equals(_Values[i], value))
                    return true;
            }
        }
        return false;
    }

    public bool ContainsKey(TKey key) {
        return FindIndex(key) >= 0;
    }

    public void Clear() {
        if (_Count <= 0)
            return;

        for (int i = 0; i < _Buckets.Length; i++)
            _Buckets[i] = -1;

        Array.Clear(_Keys, 0, _Count);
        Array.Clear(_Values, 0, _Count);
        Array.Clear(_HashCodes, 0, _Count);
        Array.Clear(_Next, 0, _Count);

        _FreeList = -1;
        _Count = 0;
        _FreeCount = 0;
    }

    public void Add(TKey key, TValue value) {
        Insert(key, value, true);
    }

    private void Resize(int newSize, bool forceNewHashCodes) {
        int[] bucketsCopy = new int[newSize];
        for (int i = 0; i < bucketsCopy.Length; i++)
            bucketsCopy[i] = -1;

        var keysCopy = new TKey[newSize];
        var valuesCopy = new TValue[newSize];
        var hashCodesCopy = new int[newSize];
        var nextCopy = new int[newSize];

        Array.Copy(_Values, 0, valuesCopy, 0, _Count);
        Array.Copy(_Keys, 0, keysCopy, 0, _Count);
        Array.Copy(_HashCodes, 0, hashCodesCopy, 0, _Count);
        Array.Copy(_Next, 0, nextCopy, 0, _Count);

        if (forceNewHashCodes) {
            for (int i = 0; i < _Count; i++) {
                if (hashCodesCopy[i] != -1)
                    hashCodesCopy[i] = (_Comparer.GetHashCode(keysCopy[i]) & 2147483647);
            }
        }
        for (int i = 0; i < _Count; i++) {
            int index = hashCodesCopy[i] % newSize;
            nextCopy[i] = bucketsCopy[index];
            bucketsCopy[index] = i;
        }
        _Buckets = bucketsCopy;
        _Keys = keysCopy;
        _Values = valuesCopy;
        _HashCodes = hashCodesCopy;
        _Next = nextCopy;
    }

    private void Resize()
    {
        Resize(PrimeHelper.ExpandPrime(_Count), false);
    }

    public bool Remove(TKey key)
    {
        if (key == null)
            throw new ArgumentNullException("key");

        int hash = _Comparer.GetHashCode(key) & 2147483647;
        int index = hash % _Buckets.Length;
        int num = -1;
        for (int i = _Buckets[index]; i >= 0; i = _Next[i]) {
            if (_HashCodes[i] == hash && _Comparer.Equals(_Keys[i], key)) {
                if (num < 0)
                    _Buckets[index] = _Next[i];
                else
                    _Next[num] = _Next[i];

                _HashCodes[i] = -1;
                _Next[i] = _FreeList;
                _Keys[i] = default(TKey);
                _Values[i] = default(TValue);
                _FreeList = i;
                _FreeCount++;
                return true;
            }
            num = i;
        }
        return false;
    }

    private void Insert(TKey key, TValue value, bool add) {
        if (key == null)
            throw new ArgumentNullException("key");
        if (_Buckets == null)
            Initialize(0);
        int hash = _Comparer.GetHashCode(key) & 2147483647;
        int index = hash % _Buckets.Length;
        for (int i = _Buckets[index]; i >= 0; i = _Next[i]) {
            if (_HashCodes[i] == hash && _Comparer.Equals(_Keys[i], key)) {
                if (add)
                    throw new ArgumentException("Key already exists: " + key);
                _Values[i] = value;
                return;
            }
        }
        int num;
        if (_FreeCount > 0) {
            num = _FreeList;
            _FreeList = _Next[num];
            _FreeCount--;
        } else {
            if (_Count == _Keys.Length) {
                Resize();
                index = hash % _Buckets.Length;
            }
            num = _Count;
            _Count++;
        }
        _HashCodes[num] = hash;
        _Next[num] = _Buckets[index];
        _Keys[num] = key;
        _Values[num] = value;
        _Buckets[index] = num;
    }

    private void Initialize(int capacity) {
        int prime = PrimeHelper.GetPrime(capacity);
        _Buckets = new int[prime];
        for (int i = 0; i < _Buckets.Length; i++)
            _Buckets[i] = -1;

        _Keys = new TKey[prime];
        _Values = new TValue[prime];
        _HashCodes = new int[prime];
        _Next = new int[prime];

        _FreeList = -1;
    }

    private int FindIndex(TKey key) {
        if (key == null)
            throw new ArgumentNullException("key");

        if (_Buckets != null) {
            int hash = _Comparer.GetHashCode(key) & 2147483647;
            for (int i = _Buckets[hash % _Buckets.Length]; i >= 0; i = _Next[i]) {
                if (_HashCodes[i] == hash && _Comparer.Equals(_Keys[i], key))
                    return i;
            }
        }
        return -1;
    }

    public bool TryGetValue(TKey key, out TValue value) {
        int index = FindIndex(key);
        if (index >= 0) {
            value = _Values[index];
            return true;
        }
        value = default(TValue);
        return false;
    }

    public TValue ValueOrDefault(TKey key) {
        return ValueOrDefault(key, default(TValue));
    }

    public TValue ValueOrDefault(TKey key, TValue defaultValue) {
        //return this[key, defaultValue];
        int index = FindIndex(key);
        if (index >= 0)
            return _Values[index];
        return defaultValue;
    }

    private static class PrimeHelper {

        public static readonly int[] Primes = new int[] {
            3, 7, 11, 17,
            23, 29, 37, 47,
            59, 71, 89, 107,
            131, 163, 197, 239,
            293, 353, 431, 521,
            631, 761, 919, 1103,
            1327, 1597, 1931, 2333,
            2801, 3371, 4049, 4861,
            5839, 7013, 8419, 10103,
            12143, 14591, 17519, 21023,
            25229, 30293, 36353, 43627,
            52361, 62851, 75431, 90523,
            108631, 130363, 156437, 187751,
            225307, 270371, 324449, 389357,
            467237, 560689, 672827, 807403,
            968897, 1162687, 1395263, 1674319,
            2009191, 2411033, 2893249, 3471899,
            4166287, 4999559, 5999471, 7199369
        };

        public static bool IsPrime(int candidate) {
            if ((candidate & 1) != 0) {
                int num = (int)Math.Sqrt((double)candidate);
                for (int i = 3; i <= num; i += 2) {
                    if (candidate % i == 0) {
                        return false;
                    }
                }
                return true;
            }
            return candidate == 2;
        }

        public static int GetPrime(int min) {
            if (min < 0)
                throw new ArgumentException("min < 0");

            for (int i = 0; i < PrimeHelper.Primes.Length; i++) {
                int prime = PrimeHelper.Primes[i];
                if (prime >= min)
                    return prime;
            }
            for (int i = min | 1; i < 2147483647; i += 2) {
                if (PrimeHelper.IsPrime(i) && (i - 1) % 101 != 0)
                    return i;
            }
            return min;
        }

        public static int ExpandPrime(int oldSize) {
            int num = 2 * oldSize;
            if (num > 2146435069 && 2146435069 > oldSize) {
                return 2146435069;
            }
            return PrimeHelper.GetPrime(num);
        }
    }
}


让我感到困惑的是:


_FreeList_FreeCount变量:它们代表什么?
Count_Count有什么区别?
所以_Next似乎与哈希冲突有关,它是否存储具有相同哈希的键/值的下一个索引?
numRemove中做什么?
因此,在给定Key的情况下,我们计算出Index = GetHashCode(Key) % Buckets.Length;,这为我们提供了可以在_Keys/_Values/_HashCodes数组中使用的索引。为什么我们需要比较_HashCodes[i] == hash?我的意思是我们通过计算哈希值得到了索引,所以_Keys[i]哈希当然是相同的哈希值吧?


请注意,有一个实际的用例来反编译代码并使用它。 Unity3D不知道如何序列化字典,而且通常情况下也不是那么好,所以我采用了Dictionary的实际代码,并用[SerializeField]注释了必要的字段,现在Unity可以在没有任何包装的情况下序列化该类。

最佳答案

您显示的实现使用数组_Keys存储键,并使用_Values存储它们的对应值。这些数组中只有从索引零(含零)到索引_Count(互斥)的部分被占用。不使用_Count及以上的元素。

添加后,将依次填充_Keys_Values数组。发生删除时,相应的元素将被置于空闲列表中。当下一次添加时,将使用返回到空闲列表的最后一个元素,然后再考虑扩展到_Count处的元素。


  _Next似乎与哈希冲突有关,它是否存储具有相同哈希的键/值的下一个索引?


_Next有两个用途,具体取决于是否使用了相应的项目或该项目在空闲列表中。


使用项目i时,_Next[i]表示具有冲突哈希码的下一个元素的索引(不一定是相同的哈希码!)
当项目i在空闲列表中时,_Next[i]表示空闲列表中的下一个项目(即,放在i之前的空闲列表中的项目,如果有的话)。



  Count_Count有什么区别?


_Count_Keys_Values数组的高水位标记; Count是实际计数,计算方式是高水印减去空闲列表上放置的项目数(即,最大添加量减去当前删除量)。


  因此,在给定键的情况下,我们计算出Index = GetHashCode(Key) % Buckets.Length;,这为我们提供了可以在_Keys / _Values / _HashCodes数组中使用的索引。为什么我们需要比较_HashCodes[i] == hash


因为GetHashCode(Key) % Buckets.Length余数对于哈希码的不同值可能是相同的。您可以通过在调用Equals之前比较哈希码来优化代码,尽管此步骤不是必需的。


  numRemove中做什么?


我们要删除的项目可能是哈希码冲突的项目列表的一部分。该项目可能在列表的顶部,也可能在列表的其他位置。代码使用num来区别对待这些情况:


当该项目位于其列表的顶部时,需要通过调整_Buckets[]将下一个项目“提升”到顶部
如果该项目不在列表的最前面,则需要调整其_Next[]索引而不提升到最前面。


next从负数开始,并在初始迭代后切换为非负数。校验if (num < 0)表示“我们在第一次迭代中吗?”

09-26 23:58