假设我有以下字符串:
foo,bar,baz
bar,foo
quux,baz,foo
我想生成多个对的列表,以便您获得以下数组:
[['foo', 'bar'], ['foo', 'baz']],
也许这听起来很傻,但我现在已经敲了一段时间才知道如何做到这一点。另一个问题是该集合是几个MB的大,如果可能的话,代码需要非常高效。
有人能把我推向正确的方向吗? 也许使用某种效率算法或只是一些示例代码?
答案 0 :(得分:0)
分而治之。
准备一行所有对的列表,然后连接所有行对列表,找出那些重复的行。
$string = <<<STRING
foo,bar,baz
bar,foo
quux,baz,foo
STRING;
$lines = array_map(function ($line) {
// split lines into words
$words = explode(',', $line);
// filter repeats
$words = array_unique($words);
// sort words
sort($words);
return $words;
}, preg_split('/\R/', $string));
function pairs($words) {
$length = count($words);
if ($length < 2) {
throw new Exception('No pairs if length < 2');
}
$pairs = [];
// iterate from start to one before last word
for ($i = 0; $i < $length - 1; $i++) {
// iterate from next word to end
for ($j = $i + 1; $j < $length; $j++) {
$pairs[] = [$words[$i], $words[$j]];
}
}
return $pairs;
}
$allPairs = [];
$nonUniquePairs = [];
foreach ($lines as $words) {
$pairs = pairs($words);
foreach ($pairs as $pair) {
// check if pair is already added and not in $nonUniquePairs array
if (in_array($pair, $allPairs, true) && !in_array($pair, $nonUniquePairs, true)) {
$nonUniquePairs[] = $pair;
}
}
$allPairs = array_unique(array_merge($allPairs, $pairs), SORT_REGULAR);
}
这将是结果:
'allPairs' =>
array (size=5)
0 =>
array (size=2)
0 => string 'bar' (length=3)
1 => string 'baz' (length=3)
1 =>
array (size=2)
0 => string 'bar' (length=3)
1 => string 'foo' (length=3)
2 =>
array (size=2)
0 => string 'baz' (length=3)
1 => string 'foo' (length=3)
4 =>
array (size=2)
0 => string 'baz' (length=3)
1 => string 'quux' (length=4)
5 =>
array (size=2)
0 => string 'foo' (length=3)
1 => string 'quux' (length=4)
'nonUniquePairs' =>
array (size=2)
0 =>
array (size=2)
0 => string 'bar' (length=3)
1 => string 'foo' (length=3)
1 =>
array (size=2)
0 => string 'baz' (length=3)
1 => string 'foo' (length=3)
答案 1 :(得分:0)
<?
$str = 'foo,bar,baz,baz,b
bar,foo,b,a
quux,b,baz,foo,a';
// prepare a working array
$array = array_map(function ($i) { return array_unique(array_map('trim', explode(',', $i))); }, explode("\n", $str));
// find intersects of all arrays
$res = array();
for($i=0; $i < count($array); $i++)
for($j=$i+1; $j < count($array); $j++) {
$temp = array_unique(array_intersect($array[$i], $array[$j]));
sort($temp); // Reaet indexes
$res[] = $temp;
}
// Gather unique pairs
$pairs = array();
foreach($res as $item)
for($i=0; $i < count($item); $i++)
for($j=$i+1; $j < count($item); $j++) {
$c = true;
foreach($pairs as $p)
if(!array_diff($p, array($item[$i], $item[$j]))) {
$c = false;
break;
}
if($c) $pairs[] = array($item[$i], $item[$j]);
}
print_r($pairs);
结果
[ [b, bar], [b, foo], [bar, foo], [b, baz], [baz, foo], [a,b], [a, foo] ]