1 2
3 4
5 9
2 6
3 7
1: 1 2 6
2: 3 4 7
3: 5 9
第一行因为1“连接”到2而2连接到6。 第二行因为3连接到4而3连接到7
1 27
1 134
1 137
1 161
1 171
1 275
1 309
1 413
1 464
1 627
1 744
2 135
2 398
2 437
2 548
2 594
2 717
2 738
2 783
2 798
2 912
5 74
5 223
7 53
7 65
7 122
7 237
7 314
7 701
7 730
7 755
7 821
7 875
7 884
7 898
7 900
7 930
8 115
9 207
9 305
9 342
9 364
9 493
9 600
9 676
9 830
9 941
10 164
10 283
10 380
10 423
10 468
10 577
11 72
11 132
11 276
11 306
11 401
11 515
11 599
12 95
12 126
12 294
13 64
13 172
13 528
14 396
15 35
15 66
15 210
15 226
15 360
15 588
17 263
17 415
17 474
17 648
17 986
21 543
21 771
22 47
23 70
23 203
23 427
23 590
24 286
24 565
25 175
26 678
27 137
27 161
27 171
27 275
27 309
27 413
27 464
27 627
27 684
27 744
29 787
答案 0 :(得分:4)
我管理过O(n log n)。
这是一个(有些激烈的)C ++实现:
#include <boost/pending/disjoint_sets.hpp>
#include <boost/property_map/property_map.hpp>
#include <map>
#include <set>
#include <iostream>
typedef std::map<int, int> rank_t;
typedef std::map<int, int> parent_t;
typedef boost::associative_property_map< rank_t > rank_pmap_t;
typedef boost::associative_property_map< parent_t > parent_pmap_t;
typedef boost::disjoint_sets< rank_pmap_t, parent_pmap_t > group_sets_t;
typedef std::set<int> key_set;
typedef std::map<int, std::set<int> > output;
有了一些typedef,这是真正的肉。我正在使用boost::disjoint_sets,这恰好是问题的一个非常好的表示。第一个函数检查以前是否已经看过给定的任何一个键,并在需要时将它们添加到集合中。重要的部分是将union_set(a, b)
。如果其中一个或另一个集合已经在void add_data(int a, int b, group_sets_t & groups, key_set & keys)
if (keys.count(a) < 1) groups.make_set(a);
if (keys.count(b) < 1) groups.make_set(b);
groups.union_set(a, b);
void build_output(group_sets_t & groups, key_set & keys)
output out;
for (key_set::iterator i(keys.begin()); i != keys.end(); i++)
for (output::iterator i(out.begin()); i != out.end(); i++)
std::cout << i->first << ": ";
for (output::mapped_type::iterator j(i->second.begin()); j != i->second.end(); j++)
std::cout << *j << " ";
std::cout << std::endl;
int main()
rank_t rank;
parent_t parent;
rank_pmap_t rank_index(rank);
parent_pmap_t parent_index(parent);
group_sets_t groups( rank_index, parent_index );
key_set keys;
int a, b;
while (std::cin >> a)
std::cin >> b;
add_data(a, b, groups, keys);
build_output(groups, keys);
//std::cout << "number of sets: " <<
// groups.count_sets(keys.begin()), keys.end()) << std::endl;
关于表现。 make_set
)的O(α(n))非常接近常数,因此如果是只是建立结构的问题,整个算法将是O(nα(n))(实际上是O(n)),但我们必须打印出来。这意味着我们必须构建一些关联容器,这些容器的性能不能比O(n log n)好。通过选择不同的关联容器(例如,{{1}}等)可以获得恒定的因子加速,因为一旦填充了初始列表,就可以保留最佳的空间量。
答案 1 :(得分:1)
1 2
3 4
5 9
2 6
3 7
Group 1: 1 2 6
Group 2: 3 4 7
Group 3: 5 9
Group 1: 1 27 134 137 161 171 275 309 413 464 627 684 744
Group 2: 2 135 398 437 548 594 717 738 783 798 912
Group 3: 5 74 223
Group 4: 7 53 65 122 237 314 701 730 755 821 875 884 898 900 930
Group 5: 8 115
Group 6: 9 207 305 342 364 493 600 676 830 941
Group 7: 10 164 283 380 423 468 577
Group 8: 11 72 132 276 306 401 515 599
Group 9: 12 95 126 294
Group 10: 13 64 172 528
Group 11: 14 396
Group 12: 15 35 66 210 226 360 588
Group 13: 17 263 415 474 648 986
Group 14: 21 543 771
Group 15: 22 47
Group 16: 23 70 203 427 590
Group 17: 24 286 565
Group 18: 25 175
Group 19: 26 678
Group 20: 29 787
use strict;
use warnings;
my %cache = ();
while (<>)
my($x,$y) = split /\s+/;
#print "$x $y\n";
$cache{$x}{$y} = 1;
$cache{$y}{$x} = 1;
my $grp = 1;
foreach my $key (sort { $a <=> $b } keys %cache)
#print "key: $key\n";
if (defined $cache{$key})
my %result = ();
subkey_search(\%result, $key);
print "Group $grp:";
foreach my $val (sort { $a <=> $b } keys %result)
print " $val";
print "\n";
sub subkey_search
my($resultref, $key) = @_;
my %hash = %{$cache{$key}};
delete $cache{$key};
$resultref->{$key} = 1;
foreach my $subkey (sort keys %hash)
#print "subkey: $subkey\n";
subkey_search($resultref, $subkey) if (defined $cache{$subkey});
答案 2 :(得分:1)
//$fp = fopen("./resemblance.1.out", "r");
$fp = fopen("./wrong", "r");
$groups = array();
$group["-1"] = 1;
$groups[] = $group;
$map = array();
//Maintain a count
$group = 1;
while(!feof($fp)) {
$source = trim(fgets($fp, 4096));
//echo $source."\n";
$source = explode(" ", $source);
if(array_key_exists($source[0], $map) && !array_key_exists($source[1], $map)) {
$map[$source[1]] = $map[$source[0]];
} else if(array_key_exists($source[1], $map) && !array_key_exists($source[0], $map)) {
$map[$source[0]] = $map[$source[1]];
} else if(array_key_exists($source[1], $map) && array_key_exists($source[0], $map) && $map[$source[1]] != $map[$source[0]]) {
// Adjust the groups - change the groups of one of the elements to the other
$keys = array_keys($map, $map[$source[1]]);
foreach($keys as $key) {
$map[$key] = $map[$source[0]];
} else {
$map[$source[0]] = $group;
$map[$source[1]] = $group;
[1] => 2
[2] => 2
[3] => 3
[4] => 3
[5] => 4
[9] => 4
[6] => 2
[7] => 3
[] => 5
答案 3 :(得分:1)
def graph_to_connected_subgraphs(graph):
trees = []
for start in graph.keys():
if start in graph:
list = [start]
append_tree_from(graph, start, list)
return trees
def append_tree_from(graph, node, list):
if node in graph:
for endpoint in graph[node]:
append_tree_from(graph, endpoint, list)
del graph[node]
return list
def add_edge(graph, f, s):
if s < f: # ensure f < s to handle cyclic graphs
f, s = s, f
if f not in graph:
graph[f] = [s]
graph = {}
add_edge(graph, 1,2)
add_edge(graph, 2,6)
add_edge(graph, 3,4)
add_edge(graph, 5,9)
add_edge(graph, 3,7)
print graph_to_connected_subgraphs(graph)
[[1, 2, 6], [3, 4, 7], [5, 9]]
答案 4 :(得分:1)
这是在图上执行的DFS(深度优先搜索)算法的典型应用。试试这个dfs 该算法的复杂度为O(| V | + | E |),其中V - 顶点数和E - 边数
答案 5 :(得分:0)
groups = []
infile = open("so2.txt")
for line in infile.readlines():
newset = set(line.split())
matchgroups = []
excludegroups = []
for group in groups:
if len(newset & group):
newset |= group
groups = excludegroups
groups.append( newset)
for i, s in enumerate(groups):
print "%d: %s"%(i, " ".join(s))
这里的想法是形成图表并不是真的。输入中的每对数字都是一组。规则是仅返回不相交的集合。所以我读取每一行并将它们转换为集合,然后检查交叉点的所有现有集合,并将它们合并到新集合中。 Nonintersecting集只是添加到新的集合列表中,一旦完成,我将新的合并集添加到新的集合列表中。这样我可以确定只有不相交的集合才能进入列表。
答案 6 :(得分:0)
$group = 0;
$map = array();
do {
list($a, $b) = explode(' ', fgets($file));
$a = (int) $a;
$b = (int) $b;
if (!isset($map[$a]) && !isset($map[$b])) {
$map[$a] = $map[$b] = ++$group;
} elseif (!isset($map[$b])) {
$map[$b] = $map[$a];
} elseif (!isset($map[$a])) {
$map[$a] = $map[$b];
} elseif ($map[$a] != $map[$b]) {
// move one group to the other
foreach ($map as $n => $g) {
if ($g == $map[$b]) {
$map[$n] = $map[$a];
} while (!feof($file));
// print results
$results = array();
foreach ($map as $val => $group) {
$results[$group][] = $val;
echo '<pre>';
$i = 0;
foreach ($results as $result) {
echo 'Group ', ++$i, ': ', implode(' ', $result), "\n";
答案 7 :(得分:0)
import java.util.Scanner;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.LinkedList;
public final class Solver {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
final Map<Integer, Integer> ultimateOwners = new HashMap<Integer, Integer>();
final Map<Integer, List<Integer>> ownerToOwned = new HashMap<Integer, List<Integer>>();
final List<List<Integer>> results = new LinkedList<List<Integer>>();
while (in.hasNextInt()) {
// Get ultimate owner.
int owner = in.nextInt();
if (ultimateOwners.containsKey(owner)) owner = ultimateOwners.get(owner);
// Get owned and register its ultimate owner.
final int owned = in.nextInt();
ultimateOwners.put(owned, owner);
// Add owned to result.
if (ownerToOwned.containsKey(owner)) ownerToOwned.get(owner).add(owned);
else {
final List<Integer> resultLine = new LinkedList<Integer>();
ownerToOwned.put(owner, resultLine);
int lineNumber = 1;
for (final List<Integer> line : results) {
System.out.printf("%d: ", lineNumber++);
for (final Integer value : line) {
System.out.printf("%d ", value);
答案 8 :(得分:0)
在对我的前两次尝试和一些研究不完全满意之后,我遇到了this recipe用于Python中不相交的集合,以及Raymond Hettinger的祝福和输入。 (Raymond Hettinger是一位长期非常活跃的Python核心开发人员。)
这是that recipe的改编,与我前两次尝试非常接近,但配方本身可能是更好的参考。
对于非常大的数据集,收集应尽可能高效,因为Python中的大部分设置操作都是用C实现的。输入数据 不 有待分类。对于打印,我仅为了可读性而对输出进行排序,但如果这会影响性能,则可以打印连接而不进行排序。
# ~~~~~
# data, setup
input = '''
1 2
3 4
2 3
''' # etc.
def lgen():
for l in input.splitlines():
l = l.strip()
if l:
yield tuple(int(i) for i in l.split())
# ~~~~~
# collect
connections = {} # this is a mapping of values to the connections they are in
# each node will map to a shared object instance of the connection it is in
# e.g. {1: set([1,2]), 2: set([1,2])}, where the 2 sets are the same object
for x, y in lgen():
cx = connections.setdefault(x, set([x])) # if not found, create new connection with this single value
cy = connections.get(y) # defaults to None if not found
if not cy: # if we haven't come across this value yet...
cx.add(y) # ...add it to the current connection...
connections[y] = cx # ...and update the reference
elif cy is not cx: # if the cy connection is not the exact same object as the cx connection...
if len(cy) > len(cx): # \
cx, cy = cy, cx # >... merge them ...
cx |= cy # /
connections[y] = cx # ...and update the reference
# ~~~~~
# print
seen = set()
for key in sorted(connections.keys()):
if key not in seen:
c = connections[key]
print sorted(c)
seen |= c
答案 9 :(得分:0)
如果M是边数,N是节点数,时间复杂度是 O(M *α(M)),这是 O(M)对于所有实际M和空间复杂度,如果 O(N),其中N为节点数。
该算法是在线的,并且不需要事先知道所有边缘(与其他图形遍历解决方案相比),因此可以非常好地扩展。 也不需要订购边,可以按任何顺序给出。
实现是使用C ++编写的,但只能使用几乎任何您可能想要使用的语言的矢量/地图。
但是由于每个元素都有唯一的id,我们需要将这些id映射到(连续) 整数。
#include <cstdio>
#include <algorithm>
#include <vector>
using namespace std;
const int MAX_N = 1000*1000;
int p[MAX_N],f[MAX_N];
int parent(int a) {
return p[a] == a ? a : p[a] = parent(p[a]);
bool join(int a, int b) {
p[a = parent(a)] = parent(b);
return p[a] != a;
int main()
// First integer in the file : number of nodes in the graph
int N;
// Union-find in O(M * alpha(M)) ~= O(M)
// M = number of lines in the file
for(int i = 1; i <= N ; i++)
p[i] = i;
f[i] = -1;
int a,b;
while(scanf("%d%d",&a,&b) != EOF)
// Determine the number of groups : O(M)
int nG = 0;
for(int i = 1 ; i <= N ; i++)
p[i] = parent(p[i]);
if(f[p[i]] == -1)
f[p[i]] = nG++;
// Build groups : O(M)
vector< vector<int> > Groups(N+1);
for(int i = 1 ; i <= N ; i++)
Groups[ f[p[i]] ].push_back(i);
// Output result
for(int i = 0 ; i < Groups.size() ; i++)
printf("%d : ",i);
for(int j = 0 ; j < Groups[i].size() ; j++)
printf("%d ",Groups[i][j]);
在 O(M log(N))中构建它,如果您可以发送所有的id输入文件开头的节点,如果你使用哈希映射( O(N)),它可以是 O(N log(N))甚至更好或者如果您可以自己构建映射,并了解图表。
#include <cstdio>
#include <algorithm>
#include <vector>
#include <map>
using namespace std;
const int MAX_N = 1000*1000;
int p[MAX_N],f[MAX_N];
int parent(int a) {
return p[a] == a ? a : p[a] = parent(p[a]);
bool join(int a, int b) {
p[a = parent(a)] = parent(b);
return p[a] != a;
// Mapping
int N = 0;
map<int,int> indMap,invMap;
int IND(int x) {
if(indMap.find(x) == indMap.end())
p[N] = N;
f[N] = -1;
indMap[x] = N;
invMap[ indMap[x] ] = x;
return indMap[x];
int main()
// Union-find in O(M * alpha(M)) ~= O(M)
// M = number of lines in the file
int a,b;
while(scanf("%d%d",&a,&b) != EOF)
// Determine the number of groups : O(M)
int nG = 0;
for(int i = 1 ; i <= N ; i++)
p[i] = parent(p[i]);
if(f[p[i]] == -1)
f[p[i]] = nG++;
// Build groups : O(M)
vector< vector<int> > Groups(N+1);
for(int i = 1 ; i <= N ; i++)
Groups[ f[p[i]] ].push_back(i);
// Output result
for(int i = 0 ; i < Groups.size() ; i++)
printf("%d : ",i+1);
for(int j = 0 ; j < Groups[i].size() ; j++)
printf("%d ", invMap[ Groups[i][j] ]);