假设我有两个存储在两个不同的.csv文件(1.csv,2.csv)中的查询结果,数据看起来像这样:
dim1和dim2将存在,这些字段可以不同。
如何获得如下所示的结果?
字段在结果表中是唯一连接的,并且行通过键(dim1和dim2)分组
我使用了List<List<string>>
来代表每个表格结果:
public static List<List<string>> R1 { get; } = new List<List<string>>
{
new List<string> {"dim1", "dim2", "field1"},
new List<string> {"2", "wwa", "3"},
new List<string> {"1", "arr", "6"}
};
public static List<List<string>> R2 { get; } = new List<List<string>>
{
new List<string> {"dim1", "dim2", "field2", "field3"},
new List<string> {"2", "jaja", "1000", "2"},
new List<string> {"3", "wwa", "1000", "3"},
new List<string> {"1", "arr", "2000", "4"}
};
我采用了以下策略:
var keys = 2;
var results = new List<List<string>>();
foreach (var r1 in R1)
{
var range = r1.GetRange(0, keys);
var hash = range.GetKey();
var found = false;
var row = new List<string>();
foreach (var r2 in R2)
{
if (r2.GetRange(0, keys).GetKey() == hash)
{
row.AddRange(range);
row.AddRange(r1.GetRange(keys, r1.Count - keys));
row.AddRange(r2.GetRange(keys, r2.Count - keys));
results.Add(row);
found = true;
}
}
if (!found)
{
row.AddRange(r1);
R2[0].GetRange(keys, R2[0].Count - keys).ForEach(e => row.Add(null));
results.Add(row);
}
}
foreach (var r2 in R2)
{
var range = r2.GetRange(0, keys);
var hash = range.GetKey();
var found = false;
var row = new List<string>();
foreach (var r in results)
{
if (r.GetRange(0, keys).GetKey() == hash)
found = true;
}
if (!found)
{
row.AddRange(range);
R1[0].GetRange(keys, R1[0].Count - keys).ForEach(e => row.Add(null));
row.AddRange(r2.GetRange(keys, r2.Count - keys));
results.Add(row);
}
}
如您所见,我具有2xN ^ 2的复杂度,我认为应该有一个更好的解决方案来生成适合上面示例的新List<List<string>>
。
答案 0 :(得分:2)
列表联接可以通过以下方式完成
private void JoinList(List<List<string>> listToJoin, int keysNumber, int paddingAfterKeys = 0)
{
// you need the padding if an element is found only in the second list
// this is a list that will be added to the result
var paddingList = new List<string>();
for (var i = 0; i < paddingAfterKeys; i++)
{
// feel free to change it to null or what value fit your solution
paddingList.Add("0");
}
foreach (var t in listToJoin)
{
// create a key
var keyString = string.Join(',', t.Take(keysNumber));
if (result.TryGetValue(keyString, out var fieldsList))
{
// if the key already exist just add the values except the keys values this way you won't get duplicate keys
fieldsList.AddRange(t.Skip(keysNumber));
}
else
{
// get the keys, pad the list if needed and the rest of the keys
fieldsList = t.Take(keysNumber).ToList();
fieldsList.AddRange(paddingList);
fieldsList.AddRange(t.Skip(keysNumber));
// add new key to the dictionary and set the value in my program the result was a private variable of the class for the ease of use.
result[keyString] = fieldsList;
}
}
}
如果不是第一个键,则可以创建一个列表以保留索引并从索引中获取键。
要对此进行编排,您将需要以下内容:
public void ProcessLists()
{
const int keysNumber = 2;
var totalLength = R1[0].Count + R2[0].Count - keysNumber;
// add first list to dictionary
JoinList(R1, keysNumber);
var paddingAfterKeys = R1[0].Count - keysNumber;
// add the second list to dictionary and add padding if a key was not found in dictionary
JoinList(R2, keysNumber, paddingAfterKeys);
// add padding to the end if a key was found in first list but not in second
paddingAfterKeys = R2[0].Count - keysNumber;
var paddingList = new List<string>();
for (var i = 0; i < paddingAfterKeys; i++)
{
paddingList.Add("0");
}
foreach (var keyValuePair in result.Where(x => x.Value.Count < totalLength))
{
keyValuePair.Value.AddRange(paddingList);
}
}
答案 1 :(得分:1)
您可以创建以CSV记录(输入记录和最终输出记录)为模型的强类型对象。然后使用LINQ组合记录。
这是一个小例子。
using System;
using System.Collections.Generic;
using System.Linq;
namespace ConsoleApp2
{
class Program
{
public static List<CsvOne> InitCsvOne()
{
//mock pulling in data
List<CsvOne> csv = new List<CsvOne>
{
new CsvOne { dim1 = 2,dim2 = "wwa", field1 =3 },
new CsvOne { dim1 = 1, dim2 = "arr", field1 = 6}
};
return csv;
}
public static List<CsvTwo> InitCsvTwo()
{
//mock pulling in data
List<CsvTwo> csv = new List<CsvTwo>
{
new CsvTwo { dim1 = 2,dim2 = "jaja", field2 = 1000, field3 =2 },
new CsvTwo { dim1 = 3, dim2 = "waa", field2 = 1000, field3 = 3 },
new CsvTwo { dim1 = 1, dim2 = "arr", field2 = 2000, field3 = 4},
};
return csv;
}
static void Main(string[] args)
{
var csvOne = InitCsvOne();
var csvTwo = InitCsvTwo();
var csvThree = new List<CsvThree>();
//get the ball rolling with csv one
csvOne.ForEach(record =>csvThree.Add(new CsvThree(record)));
//now either match up one with two or add with field1 being 0
//if we already have a matching dim1 and dim2, lets update the two new fields.
//note that we do not add another one if there are two the same
csvTwo.ForEach(record =>
{
if (csvThree.Any(t => (t.dim1 == record.dim1 && t.dim2 == record.dim2)))
{
//combine the match with fields 2 and 3
var theMatch = csvThree.FirstOrDefault(t => (t.dim1 == record.dim1 && t.dim2 == record.dim2));
theMatch.field2 = record.field2;
theMatch.field3 = record.field3;
}
else //add this new record to the list
{
csvThree.Add(new CsvThree(record));
}
});
csvThree = csvThree.OrderBy(t => t.dim1).ThenBy(t=>t.dim2).ToList(); //or whatever you want
//check it
Console.WriteLine($"Csv One Records");
Console.WriteLine($" dim1|dim2|field1");
csvOne.ForEach(record =>
{
Console.WriteLine(record.ToString());
});
Console.WriteLine($"Csv Two Records");
Console.WriteLine($"dim1|dim2|field2|field3");
csvTwo.ForEach(record =>
{
Console.WriteLine(record.ToString());
});
Console.WriteLine($"Csv Three Records");
Console.WriteLine($"dim1|dim2|field1|field2|field3");
csvThree.ForEach(record =>
{
Console.WriteLine(record.ToString());
});
Console.WriteLine("Press any key to exit...");
var wait = Console.ReadKey();
}
}
public abstract class dim
{
public int dim1 { get; set; }
public string dim2 { get; set; }
}
public class CsvOne:dim
{
public int field1 { get; set; }
public CsvOne()
{
field1 = 0;
}
public override string ToString()
{
return $"{dim1} |{dim2}|{field1}";
}
}
public class CsvTwo:dim
{
public int field2 { get; set; }
public int field3 { get; set; }
public CsvTwo()
{
field2 = field3 = 0;
}
public override string ToString()
{
return $"{dim1} |{dim2}|{field2}|{field3}";
}
}
public class CsvThree : dim
{
public int field1 { get; set; }
public int field2 { get; set; }
public int field3 { get; set; }
public CsvThree()
{
field1 = field2 = field3 = 0;
}
public CsvThree(CsvOne value)
{
field1 = field2 = field3 = 0;
dim1 = value.dim1;
dim2 = value.dim2;
field1 = value.field1;
}
public CsvThree(CsvTwo value)
{
field1 = field2 = field3 = 0;
dim1 = value.dim1;
dim2 = value.dim2;
field2 = value.field2;
field3 = value.field3;
}
public override string ToString()
{
return $"{dim1} |{dim2}|{field1}|{field2}|{field3}";
}
}
}
结果:
Csv One Records
dim1|dim2|field1
2 |wwa|3
1 |arr|6
Csv Two Records
dim1|dim2|field2|field3
2 |jaja|1000|2
3 |waa|1000|3
1 |arr|2000|4
Csv Three Records
dim1|dim2|field1|field2|field3
1 |arr|6|2000|4
2 |jaja|0|1000|2
2 |wwa|3|0|0
3 |waa|0|1000|3
Press any key to exit...