Question

我正在寻找C ++中图形的简洁精确的邻接列表表示。我的节点只是节点ID。我就是这样做的。只想知道专家对此的看法。还有更好的方法吗？

这是类实现（没什么特别的，现在不关心公共/私有方法）

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>

using namespace std;

class adjList {
public:
    int head;
    vector<int> listOfNodes;
    void print();
};

void adjList :: print() {
    for (int i=0; i<listOfNodes.size(); ++i) {
        cout << head << "-->" << listOfNodes.at(i) << endl;
    }
}

class graph {
public:
    vector<adjList> list;
    void print();
};

void graph :: print() {
    for (int i=0; i<list.size(); ++i) {
        list.at(i).print();
        cout << endl;
    }
}

我的main函数逐行解析输入文件。每行的解释如下：

<source_node> <node1_connected_to_source_node> <node2_connected_to_source_node <node3_connected_to_source_node> <...>

这是主要的：

int main()
    {
        fstream file("graph.txt", ios::in);
        string line;
        graph g;
        while (getline(file, line)) {
            int source;
            stringstream str(line);
            str >> source;
            int node2;
            adjList l;
            l.head = source;
            while (str >> node2) {
                l.listOfNodes.push_back(node2);
            }
            g.list.push_back(l);
        }
        file.close();
        g.print();
        getchar();
        return 0;
    }

我知道我应该在adjList类中添加addEdge（）函数，而不是直接从main（）修改它的变量，但是现在我只是想知道最好的结构。

修改我的方法有一个缺点。对于具有大量节点的复杂图形，节点确实是结构/类，在这种情况下，我将通过存储整个对象来复制值。在那种情况下，我认为我应该使用指针。例如，对于无向图，我将在adjList中存储节点对象的副本（节点1和2之间的连接意味着1的邻接列表将具有2，反之亦然）。我可以通过在adjList中存储节点对象的指针而不是整个对象来避免这种情况。检查dfs实现，这种方法会受益。在那里，我需要确保每个节点只访问一次。拥有同一节点的多个副本将使我的生活更加艰难。没有？

在这种情况下，我的类定义将改变如下：

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <map>

using namespace std;

class node {
public:
    node() {}
    node(int id, bool _dirty): node_id(id), dirty(_dirty) {}
    int node_id;
    bool dirty;
};

class adjList {
public:
    node *head;
    vector<node*> listOfNodes;
    void print();
    ~adjList() { delete head;}
};

void adjList :: print() {
    for (int i=0; i<listOfNodes.size(); ++i) {
        cout << head->node_id << "-->" << listOfNodes.at(i)->node_id << endl;
    }
}

class graph {
public:
    vector<adjList> list;
    void print();
    void dfs(node *startNode);
};

void graph::dfs(node *startNode) {
    startNode->dirty = true;
    for(int i=0; i<list.size(); ++i) {
        node *stNode = list.at(i).head;
        if (stNode->node_id != startNode->node_id) { continue;}
        for (int j=0; j<list.at(i).listOfNodes.size(); ++j) {
            if (!list.at(i).listOfNodes.at(j)->dirty) {
                dfs(list.at(i).listOfNodes.at(j));
            }
        }
    }
    cout << "Node: "<<startNode->node_id << endl;
}

void graph :: print() {
    for (int i=0; i<list.size(); ++i) {
        list.at(i).print();
        cout << endl;
    }
}

这就是我实现main（）函数的方法。我正在使用地图＆lt;＆gt;避免重复对象。仅在先前未定义时才创建新对象。通过其id检查对象的存在。

int main()
{
    fstream file("graph.txt", ios::in);
    string line;
    graph g;
    node *startNode;
    map<int, node*> nodeMap;
    while (getline(file, line)) {
        int source;
        stringstream str(line);
        str >> source;
        int node2;
        node *sourceNode;
        // Create new node only if a node does not already exist
        if (nodeMap.find(source) == nodeMap.end()) {
                sourceNode = new node(source, false);
                nodeMap[source] = sourceNode;
        } else {
                sourceNode = nodeMap[source];
        }
        adjList l;
        l.head = sourceNode;
        nodeMap[source] = sourceNode;
        while (str >> node2) {
            // Create new node only if a node does not already exist
            node *secNode;
            if (nodeMap.find(node2) == nodeMap.end()) {
                secNode = new node(node2, false);
                nodeMap[node2] = secNode;
            } else {
                secNode = nodeMap[node2];
            }
            l.listOfNodes.push_back(secNode);
        }
        g.list.push_back(l);
        startNode = sourceNode;
    }
    file.close();
    g.print();
    g.dfs(startNode);
    getchar();
    return 0;
}

第二次编辑 在 Ulrich Eckhardt 建议将邻接列表放入节点类之后，我认为这是一个更好的数据结构来存储图形并执行dfs（），dijkstra（）类操作。请注意，邻接列表在节点类中合并。

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <map>

using namespace std;

class node {
public:
    node() {
    }
    node(int id, bool _dirty): node_id(id), dirty(_dirty) {
        //cout << "In overloaded const\n";
    }
    int node_id;
    bool dirty;
    vector<node*> listOfNodes;
};

class graph {
public:
    vector<node*> myGraph;
    void dfs(node* startNode);
};

void graph::dfs(node* startNode) {
    startNode->dirty = true;
    for (int j=0; j<startNode->listOfNodes.size(); ++j) {
            if (!startNode->listOfNodes.at(j)->dirty) {
                dfs(startNode->listOfNodes.at(j));
            }
        }

    cout << "Node: "<<startNode->node_id << endl;
}

我们能做得比这更好吗？

Answer 1

有一些事情可以改进，但总的来说你的方法是合理的。注意：

您正在使用int作为容器的索引，这将为您提供某些编译器的警告，因为容器的大小可能超过int所代表的大小。相反，请使用size_t。
将您的for (int i=0; i<list.size(); ++i)重写为for(size_t i=0, size=list.size(); i!=size; ++i)。使用!=代替<将与迭代器一起使用。读取和存储大小一次使调试更容易，甚至可能更有效。
在要打印的循环中，您有list.at(i).print();。 list.at(i)将验证索引是否有效，否则将引发异常。在这个非常简单的情况下，我确信索引是有效的，因此使用list[i]代替更快。此外，它隐含地记录索引有效，而不是您希望它无效。
print()函数应该是常量。
我不明白int head是什么。这是节点的某种ID吗？并且ID不仅仅是graph::list内的索引吗？如果它是索引，您可以使用元素的地址减去第一个元素的地址按需计算，因此不需要冗余地存储它。另外，请考虑在读取时验证该索引，因此您没有任何边缘到达不存在的顶点。
如果你不关心节点级的封装（这是合理的！），你也可以把它变成一个结构，这可以节省一些输入。
存储指针而不是索引很棘手，但可以提高速度。问题是，对于读取，您可能需要一个指向尚不存在的顶点的指针。有一个hack允许在不使用额外存储的情况下执行此操作，它需要首先将索引存储在指针值中（使用reinterpret_cast），并在读取之后，对数据进行第二次传递，您可以将这些值调整为实际地址。当然，您也可以使用第二遍验证您没有任何边缘到达根本不存在的顶点（这是at(i)函数变得有用的地方）所以这第二遍无论如何，验证一些保证是件好事。

在显式请求中，这是一个如何在指针中存储索引的示例：

// read file
for(...) {
    size_t id = read_id_from_file();
    node* node_ptr = reinterpret_cast<node*>(id);
    adjacency_list.push_back(node_ptr);
}

/* Note that at this point, you do have node* that don't contain
valid addresses but just the IDs of the nodes they should finally
point to, so you must not use these pointers! */

// make another pass over all nodes after reading the file
for(size_t i=0, size=adjacency_list.size(); i!=size; ++i) {
    // read ID from adjacency list
    node* node_ptr = adjacency_list[i];
    size_t id = reinterpret_cast<size_t>(node_ptr);
    // convert ID to actual address
    node_ptr = lookup_node_by_id(id);
    if(!node_ptr)
        throw std::runtime_error("unknown node ID in adjacency list");
    // store actual node address in adjacency list
    adjacency_list[i] = node_ptr;
}

我很确定这一般是有效的，但我不能100％确定这是否有效，这就是为什么我不愿意在这里发布。但是，我希望这也可以说明为什么我要问究竟什么是“头”。如果它实际上只是容器中的索引，那么它几乎不需要它，既不在文件内也不在内存中。如果它是您从文件中检索的节点的某种名称或标识符，那么您绝对需要它，但是您不能将它用作索引，那里的值也可以用1或1000开始它们的ID，你应该抓住并处理而不会崩溃！

C ++中的邻接列表实现

1 个答案: