散列unordered_set的自定义指针类型

时间:2013-09-05 11:29:50

标签: c++ c++11 hash unordered-set

我正在尝试散列Edge结构,以便我可以使用具有唯一边的unordered_set。在我的情况下,如果在之前的集合中没有遇到其两个端点的组合,则边缘被认为是唯一的。

虽然我的代码适用于只包含Edge类型的unordered_set,但我无法使其适用于指向Edge类型的指针。请看下面我有点冗长的代码。非常感谢任何帮助。

#include <iostream>
#include <iomanip>
#include <algorithm>
#include <unordered_set>

struct Vector3
{
    float x, y, z;

    Vector3() {}

    Vector3(float xx, float yy, float zz)
    {
        x = xx, y = yy, z = zz;
    }

    inline bool operator==(const Vector3& other) const
    {
        return x == other.x && y == other.y && z == other.z;
    }

    friend std::ostream& operator<<(std::ostream& stream, const Vector3& vector);
};

std::ostream& operator<<(std::ostream& stream, const Vector3& vector)
{
    return stream 
        << "(" 
        << std::setw(2) << std::setfill(' ') << vector.x << ", " 
        << std::setw(2) << std::setfill(' ') << vector.y << ", " 
        << std::setw(2) << std::setfill(' ') << vector.z 
        << ")";
}

struct Edge
{
    Vector3 EndPoints[2];

    Edge() {}

    Edge(Vector3 p, Vector3 q)
    {
        EndPoints[0] = p;
        EndPoints[1] = q;
    }

    inline bool operator==(const Edge& other) const
    {
        return  (EndPoints[0] == other.EndPoints[0] || EndPoints[0] == other.EndPoints[1]) && 
                (EndPoints[1] == other.EndPoints[1] || EndPoints[1] == other.EndPoints[0]);
    }

    inline bool operator==(const Edge* other) const
    {
        return  (EndPoints[0] == other->EndPoints[0] || EndPoints[0] == other->EndPoints[1]) && 
                (EndPoints[1] == other->EndPoints[1] || EndPoints[1] == other->EndPoints[0]);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Edge& vector);
    friend std::ostream& operator<<(std::ostream& stream, const Edge* vector);
};

std::ostream& operator<<(std::ostream& stream, const Edge& edge)
{
    return stream << edge.EndPoints[0] << " -- " << edge.EndPoints[1];
}

std::ostream& operator<<(std::ostream& stream, const Edge* edge)
{
    return stream << edge->EndPoints[0] << " -- " << edge->EndPoints[1];
}


namespace std
{
    template <>
    struct hash<Edge>
    {
        std::size_t operator()(const Edge& edge) const
        {
            Vector3 p0 = edge.EndPoints[0];
            Vector3 p1 = edge.EndPoints[1];

            if (p1.x < p0.x) std::swap(p0.x, p1.x);
            if (p1.y < p0.y) std::swap(p0.y, p1.y);
            if (p1.z < p0.z) std::swap(p0.z, p1.z);

            unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
            unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

            return hash0 ^ (hash1 << 3);
        }
    };

    template <>
    struct hash<Edge*>
    {
        std::size_t operator()(const Edge* edge) const
        {
            Vector3 p0 = edge->EndPoints[0];
            Vector3 p1 = edge->EndPoints[1];

            if (p1.x < p0.x) std::swap(p0.x, p1.x);
            if (p1.y < p0.y) std::swap(p0.y, p1.y);
            if (p1.z < p0.z) std::swap(p0.z, p1.z);

            unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
            unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

            std::size_t key = hash0 ^ (hash1 << 3);
            return key;
        }
    };
}


void add_edge(std::unordered_set<Edge>& table, Edge edge)
{
    std::unordered_set<Edge>::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}

void add_edge(std::unordered_set<Edge*>& table, Edge* edge)
{
    std::unordered_set<Edge*>::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}


void print_table(std::unordered_set<Edge>& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *it << std::endl;

    std::cout << std::endl;
}

void print_table(std::unordered_set<Edge*>& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *(*it) << std::endl;

    std::cout << std::endl;
}


int main()
{
    std::unordered_set<Edge> table;
    std::unordered_set<Edge*> ptable;

    Edge e0(Vector3( 1.f,  0.f,  0.f), Vector3(-1.f,  0.f,  0.f));
    Edge e1(Vector3(-1.f,  0.f,  0.f), Vector3( 1.f,  0.f,  0.f));

    add_edge(table, e0);
    add_edge(table, e1);

    print_table(table);

    add_edge(ptable, &e0);
    add_edge(ptable, &e1);

    print_table(ptable);

    return 0;
}

这是输出:

1>  Table already contains edge (-1,  0,  0) -- ( 1,  0,  0)
1>  
1>  Table has 1 elements:
1>  ( 1,  0,  0) -- (-1,  0,  0)
1>  
1>  Table has 2 elements:
1>  (-1,  0,  0) -- ( 1,  0,  0)
1>  ( 1,  0,  0) -- (-1,  0,  0)

所以我的问题是:为什么第二个元素被添加到第二个表中?我已经检查了哈希函数,但是它为两个条目返回了相同的密钥,所以这似乎不是罪魁祸首,但我不确定它可能是什么。

修改

我现在发现inline bool operator==(const Edge* other) const没有被调用,但我不确定原因。

2 个答案:

答案 0 :(得分:3)

Angew指出了真正的问题。

还有其他问题。您似乎希望Edges始终是双向的,因此Edge(a,b)== Edge(b,a)。

  

旁注 实现这一目标的最佳方式(IMO)是在Edge构建期间以确定性顺序排序端点。以后不需要考虑它。这被称为不变,并消除了在所有其余代码中检查边缘的“等效性”的负担。

但是,您的哈希函数无法正确实现

您的hash<>::operator()读取:

    std::size_t operator()(const Edge& edge) const
    {
        Vector3 p0 = edge.EndPoints[0];
        Vector3 p1 = edge.EndPoints[1];

        if (p1.x < p0.x) std::swap(p0.x, p1.x);
        if (p1.y < p0.y) std::swap(p0.y, p1.y);
        if (p1.z < p0.z) std::swap(p0.z, p1.z);

        unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
        unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

        return hash0 ^ (hash1 << 3);
    }

此交换逻辑有效地构成虚假端点

Edge(ep[3,1,2], ep[1,2,3])变为Edge(ep[1,1,2], ep[3,2,3])您可能需要Edge(ep[1,2,3], ep[3,1,2])

修复它将交换整个端点,而不是单个向量元素:

if (std::tie(p1.x, p1.y, p1.z) < std::tie(p0.x, p0.y, p0.z)) {
    using std::swap;
    swap(p0, p1);
}

通过删除(所有)不必要的重复代码来修复哈希函数:

template <> struct hash<Edge>
{
    std::size_t operator()(const Edge& edge) const {
        Vector3 p0 = edge.EndPoints[0];
        Vector3 p1 = edge.EndPoints[1];

        if (std::tie(p0.x, p0.y, p0.z) < 
            std::tie(p1.x, p1.y, p1.z))  // consider`Vector3::operator<`
        {
            using std::swap;
            swap(p0, p1);
        }

        auto hash_p = [](Vector3 const& p) { return (unsigned(p.x*73856093u) ^ unsigned(p.y*19349663u) ^ unsigned(p.z*83492791u)) % 1024u; };

        return hash_p(p0) ^ (hash_p(p1) << 3);
    }
};

指针哈希变成了一个前锋:

template <> struct hash<Edge*> {
    std::size_t operator()(const Edge* edge) const { 
        return hash<Edge>()(*edge); 
    }
};

考虑将比较移至Vector3::operator<

固定测试计划

实现上述内容,并修复Edge *的缺失Equality比较器:

另见 live on IdeOne

#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <cassert>
#include <tuple>

struct Vector3
{
    float x, y, z;

    Vector3() {}

    Vector3(float xx, float yy, float zz)
    {
        x = xx, y = yy, z = zz;
    }

    inline bool operator==(const Vector3& other) const
    {
        return x == other.x && y == other.y && z == other.z;
    }

    inline bool operator<(const Vector3& other) const
    {
        return std::tie(x, y, z) < std::tie(other.x, other.y, other.z);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Vector3& vector);
};

std::ostream& operator<<(std::ostream& stream, const Vector3& vector)
{
    return stream 
        << "(" 
        << std::setw(2) << std::setfill(' ') << vector.x << ", " 
        << std::setw(2) << std::setfill(' ') << vector.y << ", " 
        << std::setw(2) << std::setfill(' ') << vector.z 
        << ")";
}

struct Edge
{
    Vector3 EndPoints[2];

    Edge() {}

    Edge(Vector3 p, Vector3 q)
    {
        // swap order
        if (q < p) { using std::swap; swap(p, q); } // the invariant
        EndPoints[0] = p;
        EndPoints[1] = q;
    }

    inline bool operator==(const Edge& other) const {
        return std::tie(EndPoints[0], EndPoints[1]) == std::tie(other.EndPoints[0], other.EndPoints[1]);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Edge& vector);
    friend std::ostream& operator<<(std::ostream& stream, const Edge* vector);
};

std::ostream& operator<<(std::ostream& stream, const Edge& edge)
{
    return stream << edge.EndPoints[0] << " -- " << edge.EndPoints[1];
}

std::ostream& operator<<(std::ostream& stream, const Edge* edge)
{
    return stream << edge->EndPoints[0] << " -- " << edge->EndPoints[1];
}


namespace std
{
    template <> struct hash<Edge>
    {
        std::size_t operator()(const Edge& edge) const {
            assert(edge.EndPoints[0] < edge.EndPoints[1]); // the invariant

            auto hash_p = [](Vector3 const& p) { return (unsigned(p.x*73856093u) ^ unsigned(p.y*19349663u) ^ unsigned(p.z*83492791u)) % 1024u; };

            return hash_p(edge.EndPoints[0]) ^ (hash_p(edge.EndPoints[1]) << 3);
        }
    };

    template <> struct hash<Edge*> {
        std::size_t operator()(const Edge* edge) const { 
            return hash<Edge>()(*edge); 
        }
    };
}

struct EdgePtrEqual {
    bool operator()(Edge const* a, Edge const* b) const {
        return *a == *b;
    }
};

using EdgeSet    = std::unordered_set<Edge,  std::hash<Edge>>;
using EdgePtrSet = std::unordered_set<Edge*, std::hash<Edge*>, EdgePtrEqual>;

void add_edge(EdgeSet& table, Edge edge)
{
    EdgeSet::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}

void add_edge(EdgePtrSet& table, Edge* edge)
{
    EdgePtrSet::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}


void print_table(EdgeSet& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *it << std::endl;

    std::cout << std::endl;
}

void print_table(EdgePtrSet& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *(*it) << std::endl;

    std::cout << std::endl;
}


int main()
{
    EdgeSet table;
    EdgePtrSet ptable;

    Edge e0(Vector3( 1.f,  0.f,  0.f), Vector3(-1.f,  0.f,  0.f));
    Edge e1(Vector3(-1.f,  0.f,  0.f), Vector3( 1.f,  0.f,  0.f));

    add_edge(table, e0);
    add_edge(table, e1);

    print_table(table);

    add_edge(ptable, &e0);
    add_edge(ptable, &e1);

    print_table(ptable);

    return 0;
}

答案 1 :(得分:2)

添加std::hash<Edge*>的特化使得hasher与等式比较不一致。因为集合中的元素是指针,所以使用普通指针相等来比较它们。只会将operator==(const Edge*)Edge(这绝对不是您想要的)进行比较,而只会调用您的不平衡Edge*

您需要提供与您的哈希符合的比较器。默认比较器为std::equal_to<Key>。我认为你不能(或想要)专门化std::equal_to<Edge*>,所以你应该只提供自己的比较器作为unordered_set的第三个模板参数。

不相关的提示:如果你实现像这样的指针哈希,它将需要更少的维护:

template <>
struct hash<Edge*>
{
    std::size_t operator()(const Edge* edge) const
    {
        std::hash<Edge> h;
        return h(*edge);
    }
};