我有'n'行数据集,每个数据集有两个由空格分隔的组件。首先是卡号,第二个是名称。一个人是相同的,如果他有相同的卡号或名称。如何从数据集中找到唯一人的总数?
示例:
1 A
1 B
2 B
3 C
此数据集有2个独特的人。这是因为第一和第二行卡号相同,第二和第三行名称相同。
可以使用哪种算法来解决这类问题?
答案 0 :(得分:1)
这是使用图论和连接组件的另一种解决方案:
int CountUnique(Person[] persons)
Dictionary<string, int> phones = new Dictionary<string, int>();
Dictionary<string, int> emails = new Dictionary<string, int>();
bool[] unique = new bool[n];
int count = 0;
int max = 0;
for (int i = 0; i < n; i++)
{
Person p = persons[i];
int pA = -1, pB = -1;
if (phones.ContainsKey(p.Phone))
{
pA = phones[p.Phone];
}
if (emails.ContainsKey(p.Email))
{
pB = emails[p.Email];
}
if (pA != -1)
{
persons[pA].Next.Add(p);
p.Next.Add(persons[pA]);
}
else
{
phones.Add(p.Phone, p.Index);
}
if (pB != -1 && pB != pA)
{
persons[pB].Next.Add(p);
p.Next.Add(persons[pB]);
}
if (pB == -1)
{
emails.Add(p.Email, p.Index);
}
}
int current = 0;
Person pCurrent;
count = 0;
while ((pCurrent = FindUnvisited(persons, current)) != null)
{
BFS(pCurrent);
count++;
}
return count;
}
private static void DFS(Person pCurrent)
{
pCurrent.Visited = true;
foreach (Person p in pCurrent.Next)
{
if (!p.Visited)
{
BFS(p);
}
}
}
private static Person FindUnvisited(Person[] persons, int current)
{
for (int i = current; i < persons.Length; i++)
{
if (persons[i].Visited == false) return persons[i];
}
return null;
}
}
}
答案 1 :(得分:0)
我提出的解决方案是使用某种分区:大多数操作都是在O(1)或O(logn)中完成的,并且由用户完成一次,因此时间复杂度约为O(n),O (logn)取决于Dictionary的实现方式。
int CountUnique(Person persons)
{
Dictionary<string, int> phones = new Dictionary<string, int>(); //Keep a dictionary where each phone number is mapped to a partition
Dictionary<string, int> email = new Dictionary<string, int>(); //Keep a dictionary where each email is mapped to a partition
bool[,] linked = new bool[n, n]; //Lookup table used to tell if 2 partitions are linked (represents the same person)
int count = 0;
int max = 0;
for (int i = 0; i < n; i++)
{
Person p = persons[i];
int pA = -1, pB = -1; // Partition found using the phone number, Partition found using email
if (phones.ContainsKey(p.Phone))
{
pA = phones[p.Phone];
}
if (emails.ContainsKey(p.Email))
{
pB = emails[p.Email];
}
if (pA == -1 && pB == -1) // First case, not found: Add both phones and email and create a new partition. Number of unique persons is also incremented.
{
phones.Add(p.Phone.Trim(), max);
emails.Add(p.Email.Trim().ToLower(), max);
max++;
count++;
}
else
{
if (pA != -1 && pB != -1 && pA != pB) // Found using both parameters on different partitions
{
if (!linked[pA, pB] && !linked[pB, pA]) // If the partition are not linked, link them
{
count--; // We'lost one partition => one unique person less
linked[pA, pB] = linked[pB, pA] = true;
}
}
if (pA == -1) // We did find an existing email but no phone
{
phones.Add(p.Phone.Trim(), pB); // Add the phone number
max++;
}
if (pB == -1) // We did find an existing phone but no email
{
emails.Add(p.Email.Trim().ToLower(), pA); // Add the email number
max++;
}
}
}
return count;
}
答案 2 :(得分:0)
C ++的排序。但把它作为sudo代码。
int uniqueCount = 0;
map<string, bool> column_1;
map<string, bool> column_2;
string left, right
for(int x = 0 ;x < matrix.count; x++) {
left = matrix[x][0]
right = matrix[x][1];
if(column_1.find(left) != column_1.end && column_2.find(right) != column_2.end){
++uniqueCount
column_1[left] = true;
column_2[right] = true;
}
else --uniqueCount;
}
如果以上内容无法编译,我很抱歉。把它作为sudo代码,我已经有一段时间不在c ++中了,并且不认为rails代码会有所帮助。