我试图在动态数量的列上加入两个DataTable。我已经得到了下面的代码。问题是连接的ON语句。如何根据列表中的列名和#34; joinColumnNames"来创建此动态。
我在想我需要构建某种表达式树,但是我找不到任何关于如何使用多个连接列以及没有属性的DataRow对象的示例每一栏。
private DataTable Join(List<string> joinColumnNames, DataTable pullX, DataTable pullY)
{
DataTable joinedTable = new DataTable();
// Add all the columns from pullX
foreach (string colName in joinColumnNames)
{
joinedTable.Columns.Add(pullX.Columns[colName]);
}
// Add unique columns from PullY
foreach (DataColumn col in pullY.Columns)
{
if (!joinedTable.Columns.Contains((col.ColumnName)))
{
joinedTable.Columns.Add(col);
}
}
var Join = (from PX in pullX.AsEnumerable()
join PY in pullY.AsEnumerable() on
// This must be dynamic and join on every column mentioned in joinColumnNames
new { A = PX[joinColumnNames[0]], B = PX[joinColumnNames[1]] } equals new { A = PY[joinColumnNames[0]], B = PY[joinColumnNames[1]] }
into Outer
from PY in Outer.DefaultIfEmpty<DataRow>(pullY.NewRow())
select new { PX, PY });
foreach (var item in Join)
{
DataRow newRow = joinedTable.NewRow();
foreach (DataColumn col in joinedTable.Columns)
{
var pullXValue = item.PX.Table.Columns.Contains(col.ColumnName) ? item.PX[col.ColumnName] : string.Empty;
var pullYValue = item.PY.Table.Columns.Contains(col.ColumnName) ? item.PY[col.ColumnName] : string.Empty;
newRow[col.ColumnName] = (pullXValue == null || string.IsNullOrEmpty(pullXValue.ToString())) ? pullYValue : pullXValue;
}
joinedTable.Rows.Add(newRow);
}
return joinedTable;
}
使用3个连接列(Country,Company和DateId)添加一个显示输入/输出的特定示例:
拉X:
Country Company DateId Sales United States Test1 Ltd 20160722 $25 Canada Test3 Ltd 20160723 $30 Italy Test4 Ltd 20160724 $40 India Test2 Ltd 20160725 $35
拉Y:
Country Company DateId Downloads United States Test1 Ltd 20160722 500 Mexico Test2 Ltd 20160723 300 Italy Test4 Ltd 20160724 900
结果:
Country Company DateId Sales Downloads United States Test1 Ltd 20160722 $25 500 Canada Test3 Ltd 20160723 $30 Mexico Test2 Ltd 20160723 300 Italy Test4 Ltd 20160724 $40 900 India Test2 Ltd 20160725 $35
答案 0 :(得分:1)
由于您使用的是LINQ to Objects,因此无需使用表达式树。您可以使用自定义相等比较器解决问题。
创建一个相等比较器,它可以根据特定列的值比较两个DataRow
对象之间的相等性。这是一个例子:
public class MyEqualityComparer : IEqualityComparer<DataRow>
{
private readonly string[] columnNames;
public MyEqualityComparer(string[] columnNames)
{
this.columnNames = columnNames;
}
public bool Equals(DataRow x, DataRow y)
{
return columnNames.All(cn => x[cn].Equals(y[cn]));
}
public int GetHashCode(DataRow obj)
{
unchecked
{
int hash = 19;
foreach (var value in columnNames.Select(cn => obj[cn]))
{
hash = hash * 31 + value.GetHashCode();
}
return hash;
}
}
}
然后你可以用它来做这样的连接:
public class TwoRows
{
public DataRow Row1 { get; set; }
public DataRow Row2 { get; set; }
}
private static List<TwoRows> LeftOuterJoin(
List<string> joinColumnNames,
DataTable leftTable,
DataTable rightTable)
{
return leftTable
.AsEnumerable()
.GroupJoin(
rightTable.AsEnumerable(),
l => l,
r => r,
(l, rlist) => new {LeftValue = l, RightValues = rlist},
new MyEqualityComparer(joinColumnNames.ToArray()))
.SelectMany(
x => x.RightValues.DefaultIfEmpty(rightTable.NewRow()),
(x, y) => new TwoRows {Row1 = x.LeftValue, Row2 = y})
.ToList();
}
请注意我使用的是方法语法,因为我不认为你可以使用自定义相等比较器。
请注意,该方法执行左外连接,而不是完整外连接。根据您提供的示例,您似乎需要完整的外部联接。为此,您需要执行两个左外连接(请参阅此answer)。以下是完整方法的样子:
private static DataTable FullOuterJoin(
List<string> joinColumnNames,
DataTable pullX,
DataTable pullY)
{
var pullYOtherColumns =
pullY.Columns
.Cast<DataColumn>()
.Where(x => !joinColumnNames.Contains(x.ColumnName))
.ToList();
var allColumns =
pullX.Columns
.Cast<DataColumn>()
.Concat(pullYOtherColumns)
.ToArray();
var allColumnsClone =
allColumns
.Select(x => new DataColumn(x.ColumnName, x.DataType))
.ToArray();
DataTable joinedTable = new DataTable();
joinedTable.Columns.AddRange(allColumnsClone);
var first =
LeftOuterJoin(joinColumnNames, pullX, pullY);
var resultRows = new List<DataRow>();
foreach (var item in first)
{
DataRow newRow = joinedTable.NewRow();
foreach (DataColumn col in joinedTable.Columns)
{
var value = pullX.Columns.Contains(col.ColumnName)
? item.Row1[col.ColumnName]
: item.Row2[col.ColumnName];
newRow[col.ColumnName] = value;
}
resultRows.Add(newRow);
}
var second =
LeftOuterJoin(joinColumnNames, pullY, pullX);
foreach (var item in second)
{
DataRow newRow = joinedTable.NewRow();
foreach (DataColumn col in joinedTable.Columns)
{
var value = pullY.Columns.Contains(col.ColumnName)
? item.Row1[col.ColumnName]
: item.Row2[col.ColumnName];
newRow[col.ColumnName] = value;
}
resultRows.Add(newRow);
}
var uniqueRows =
resultRows
.Distinct(
new MyEqualityComparer(
joinedTable.Columns
.Cast<DataColumn>()
.Select(x => x.ColumnName)
.ToArray()));
foreach (var uniqueRow in uniqueRows)
joinedTable.Rows.Add(uniqueRow);
return joinedTable;
}
还请注意我如何克隆列。您不能在两个表中使用相同的列对象。
答案 1 :(得分:0)
var Join =
from PX in pullX.AsEnumerable()
join PY in pullY.AsEnumerable()
on string.Join("\0", joinColumnNames.Select(c => PX[c]))
equals string.Join("\0", joinColumnNames.Select(c => PY[c]))
into Outer
from PY in Outer.DefaultIfEmpty<DataRow>(pullY.NewRow())
select new { PX, PY };
另一种方法是在DataTable
中同时使用DataSet
并使用DataRelation
How To: Use DataRelation to perform a join on two DataTables in a DataSet?