快速关系搜索

时间:2017-01-10 11:17:33

标签: c# search relation

有多对多关系列表(N = 1000000)。我需要尽可能快地确定列表中的关系索引并枚举特定项目的所有关系。

我知道可以为from / to创建查找表(时间为O(1)),但它的大小太大(N * N)。我知道我可以使用二进制搜索(时间为O(log(N))),但它仍然很慢。还有其他解决方案吗?

C#代码:

public class Relation
{
    public int From;
    public int To;
}

public class Table
{
    public List<Relation> Relations { get; } = new List<Relation>();

    public void Add(int from, int to)
    {
        if (IndexOf(from, to) == -1)
        {
            Relations.Add(new Relation() { From = from, To = to });
        }
    }

    public int IndexOf(int from, int to)
    {
        // this algorithm make O(N) comparisons, but I need O(1)
        for (int i = 0; i < Relations.Count; i++)
        {
            if (Relations[i].From == from && Relations[i].To == to) return i;
        }
        return -1;
    }

    public IEnumerable<int> FromsOf(int to)
    {
        // this algorithm make O(N) comparisons, but I need O(1)
        for (int i = 0; i < Relations.Count; i++)
        {
            if (Relations[i].To == to) yield return Relations[i].From;
        }
    }
}

class Program
{
    static void Main(string[] args)
    {
        Random r = new Random();
        Table t = new Table();

        int N = 1000000;

        for (int i = 0; i < N; i++) t.Add(r.Next(N), r.Next(N));

        DateTime t1 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            if (t.IndexOf(r.Next(N), r.Next(N)) != -1)
            {
                // do something
            }
        }

        DateTime t2 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            foreach (int j in t.FromsOf(r.Next(N)))
            {
                // do something
            }
        }

        DateTime t3 = DateTime.Now;

        Console.WriteLine($"IndexOf speed = {(t2 - t1).TotalMilliseconds / N}ms");
        Console.WriteLine($"FromsOf speed = {(t3 - t2).TotalMilliseconds / N}ms");
    }
}

2 个答案:

答案 0 :(得分:0)

以下是包含2个词典的版本:

public class Table
{
    public Dictionary<int, HashSet<int>> froms { get; } = new Dictionary<int, HashSet<int>>();
    public Dictionary<int, HashSet<int>> tos { get; } = new Dictionary<int, HashSet<int>>();

    public void Add(int from, int to)
    {
        if (!Contains(from, to))
        {
            if (!froms.ContainsKey(from))
            {
                froms.Add(from, new HashSet<int> { to });
            }
            else
            {
                froms[from].Add(to);
            }

            if (!tos.ContainsKey(to))
            {
                tos.Add(to, new HashSet<int> { from });
            }
            else
            {
                tos[to].Add(from);
            }
        }
    }

    public bool Contains(int from, int to)
    {
        if (!froms.ContainsKey(from))
            return false;

        if (!froms[from].Contains(to))
            return false;

        return true;
    }

    public IEnumerable<int> FromsOf(int to)
    {
        if(tos.ContainsKey(to))
            return tos[to];
        else
            return new List<int>();
    }
}

class Program
{
    static void Main(string[] args)
    {
        Random r = new Random();
        Table t = new Table();

        int N = 1000000;

        for (int i = 0; i < N; i++)
            t.Add(r.Next(N), r.Next(N));

        DateTime t1 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            if (t.Contains(r.Next(N), r.Next(N)))
            {
                // do something
            }
        }

        DateTime t2 = DateTime.Now;
        for (int i = 0; i < N; i++)
        {
            foreach (int j in t.FromsOf(r.Next(N)))
            {
                // do something
            }
        }

        DateTime t3 = DateTime.Now;

        Console.WriteLine($"IndexOf speed = {(t2 - t1).TotalMilliseconds / N}ms");
        Console.WriteLine($"FromsOf speed = {(t3 - t2).TotalMilliseconds / N}ms");
        Console.ReadKey();
    }
}

输出:

  

IndexOf speed = 0.0003220099ms

     

FromsOf speed = 0.0003799996ms

答案 1 :(得分:0)

我在字典中尝试使用字典,它也非常快,但内存使用量约为33 * N(对于任务管理器132 Mb中N = 10000000内存使用率)。

代码:

    public class Relation
{
    public int From;
    public int To;
}

public class Table
{
    public List<Relation> Relations { get; } = new List<Relation>();
    public Dictionary<int, Dictionary<int, int>> FromDic = new Dictionary<int, Dictionary<int, int>>();

    public void Add(int from, int to)
    {
        if (IndexOf(from, to) == -1)
        {
            int index = Relations.Count;
            Relations.Add(new Relation() { From = from, To = to });

            Dictionary<int, int> innerDic;
            if (!FromDic.TryGetValue(from, out innerDic))
            {
                innerDic = new Dictionary<int, int>();
                FromDic[from] = innerDic;
            }
            innerDic[to] = index;
        }
    }

    public int IndexOf(int from, int to)
    {
        Dictionary<int, int> toDic;
        int index;
        if (FromDic.TryGetValue(from, out toDic) && toDic.TryGetValue(to, out index))
            return index;
        return -1;
    }

    public IEnumerable<int> TosOf(int from)
    {
        Dictionary<int, int> innerDic;
        if (FromDic.TryGetValue(from, out innerDic)) return innerDic.Keys;
        return new List<int>();
    }
}

class Program
{
    static void Main(string[] args)
    {
        Random r = new Random();
        Table t = new Table();

        int N = 100000;

        DateTime t0 = DateTime.Now;

        for (int i = 0; i < N; i++) t.Add(r.Next(N), r.Next(N));

        DateTime t1 = DateTime.Now;
        Console.WriteLine($"Add speed = {(t1 - t0).TotalMilliseconds * 1000 / N}mks");
        for (int i = 0; i < N; i++)
        {
            if (t.IndexOf(r.Next(N), r.Next(N)) != -1)
            {
                // do something
            }
        }

        DateTime t2 = DateTime.Now;
        Console.WriteLine($"IndexOf speed = {(t2 - t1).TotalMilliseconds * 1000 / N}mks");

        for (int i = 0; i < N; i++)
        {
            foreach (int j in t.TosOf(r.Next(N)))
            {
                // do something
            }
        }

        DateTime t3 = DateTime.Now;
        Console.WriteLine($"TosOf speed = {(t3 - t2).TotalMilliseconds * 1000 / N}mks");

        Console.ReadKey();
    }
}