我有以下格式的文本文件:
Table1 1 Name John
Table1 1 SId 110
Table1 1 City NewYork
Table1 2 Name Abc
Table1 2 SId 111
Table1 2 City Paris
Table2 1 SId 110
Table2 1 DeptName HR
Table2 1 DeptId 00001
Table2 2 SId 111
Table2 2 DeptName Dev
Table2 2 DeptId 00002
Table3 1 DeptId 00001
Table3 1 DeptHead Adam
Table3 2 DeptId 00002
Table3 2 DeptHead Nik
第一列是表名,然后是主键,然后是表的实际列名,然后是列值。根据上面的例子,Table1有3列,分别是Name,SId,City。 Table1有2行,我们可以使用值1和2(主键)获得。表2和表3也是如此 我必须从文件中获取所有细节,然后首先使用SId连接Table1和Table2行,然后使用DeptId连接Table3以创建一个公共记录。同样,文本文件中有数千个条目。还有大约20张桌子。以上只是一个例子。
直到现在我已经创建了一个类
public class Student
{
public string TableName { get; set; }
public string ColumnName { get; set; }
public string ColumnValue { get; set; }
public int PrimaryKey { get; set; }
}
public class Program
{
private static bool IsAllColumnsExist(DataTable tableNameToCheck, List<string> columnsNames)
{
var iscolumnExist = true;
if (tableNameToCheck != null && tableNameToCheck.Columns != null)
{
foreach (string columnName in columnsNames)
{
if (!tableNameToCheck.Columns.Contains(columnName))
{
iscolumnExist = false;
break;
}
}
}
else
{
iscolumnExist = false;
}
return iscolumnExist;
}
static void Main()
{
var table1 = new DataTable();
var table2 = new DataTable();
var table3 = new DataTable();
var tablesData = (from line in File.ReadLines(@"C:\Console.txt")
let values = line.Split('\t')
select new Toyo
{
TableName = values[0].ToString(),
PrimaryKey = Convert.ToInt32(values[1]),
ColumnName = values[2].ToString(),
ColumnValue = values[3].ToString()
}).ToList();
var groupedData = tablesData.GroupBy(x => new { x.TableName,x.PrimaryKey }).ToList();
foreach (var tableData in groupedData)
{
var primaryKey = tableData.Select(x => x.PrimaryKey).First();
var columnNames = tableData.Select(x => x.ColumnName).ToList();
var columnValues = tableData.Select(x => x.ColumnValue).ToList();
if (tableData.All(x => x.TableName == "Table1"))
{
if (!IsAllColumnsExist(table1, columnNames))
{
table1.Columns.AddRange(new DataColumn[4]
{
new DataColumn("pkTable1", typeof(int)),
new DataColumn(columnNames[0], typeof(string)),
new DataColumn(columnNames[1], typeof(string)),
new DataColumn(columnNames[2], typeof(string))
});
}
table1.Rows.Add(primaryKey, columnValues[0], columnValues[1], columnValues[2]);
}
else if (tableData.All(x => x.TableName == "Table2"))
{
if (!IsAllColumnsExist(table2, columnNames))
{
table2.Columns.AddRange(new DataColumn[4]
{
new DataColumn("pkTable2", typeof(int)),
new DataColumn(columnNames[0], typeof(string)),
new DataColumn(columnNames[1], typeof(string)),
new DataColumn(columnNames[2], typeof(string))
});
}
table2.Rows.Add(primaryKey, columnValues[0], columnValues[1], columnValues[2]);
}
else if (tableData.All(x => x.TableName == "Table3"))
{
if (!IsAllColumnsExist(table3, columnNames))
{
table3.Columns.AddRange(new DataColumn[3]
{
new DataColumn("pkTable3", typeof(int)),
new DataColumn(columnNames[0], typeof(string)),
new DataColumn(columnNames[1], typeof(string))
});
}
table3.Rows.Add(primaryKey, columnValues[0], columnValues[1]);
}
}
var result = (from firstTable in table1.AsEnumerable()
join secondTable in table2.AsEnumerable() on firstTable.Field<string>("SId") equals secondTable.Field<string>("SId")
join thirdTable in table3.AsEnumerable() on secondTable.Field<string>("DeptId") equals thirdTable.Field<string>("DeptId")
select new
{
Name = firstTable.Field<string>("Name"),
StudentId = firstTable.Field<string>("SId"),
City = firstTable.Field<string>("City"),
DepartmentName = secondTable.Field<string>("DeptName"),
DepartmentId = secondTable.Field<string>("DeptId"),
DepartmentHead = thirdTable.Field<string>("DeptHead")
}).ToList();
}
}
但是这样我必须为每个表创建数据表。而在我的确切问题中,大约有20个表,这将导致错误的代码创建20个数据表,如上所述。
有更好的方法吗? 有什么帮助吗?
感谢。
答案 0 :(得分:0)
我确实会创建20个DataTables,但我会将它们存储在字典中以便于访问。它还可以使您的程序更加灵活。
所以要使用的数据类型:
Dictionary<string, DataTable>
答案 1 :(得分:0)
每个表的列是否相同?每个表中都存在DeptId
DeptHead
等?这些列是“固定的”(即你不能随意获得你不期望的列名)?如果是这样,我将创建一个类结构来表示列,并创建一个标题为TableId
的列,并将数据导入到该内容中,作为List存储在内存中或者保存到数据库中。
看起来你正在尝试对数据进行聚合功能(按TableId分组),所以我觉得这是最简单的方法
答案 2 :(得分:0)
我假设你只需要保留表之间任何重叠列名的一个值。我假设文件中的第一个表可用于为常用的最终表提供主键。我假设链接表之间只有一个使用与外键相同名称的公共字段。
我编写的代码完全是动态的。我假设您需要中间DataTable
以及最终合并DataTable
,否则您可以将传入数据动态映射到记录中,然后将它们合并到最终DataTable
。< / p>
我的方法是使用LINQ到文本文件中的行到匿名对象转换,然后处理对象以提取所有的表名,为每个表然后列名和构建DataTable
S和数据加载到各之一。
我根据所有表的所有列名构建了一个公共DataTable
。
然后我找到每个表之间的公共字段(假设一个)。我通过公共(外键)字段索引DataTable
s,这样我就可以轻松地查找行(例如本地连接实现),然后将每条记录连接到一个组合记录中并将其放入组合表中。
// load textfile and convert lines into objects
// var inFile = File.ReadLines(...);
var inData = inFile.Select(line => line.Split('\t'))
.Select(lineArray => new {
TableName = lineArray[0],
PrimaryKey = lineArray[1],
ColName = lineArray[2],
ColValue = lineArray[3],
});
// Buld a dictionary of tables and their columns
var tableColNames = inData.Select(r => new { r.TableName, r.ColName })
.Distinct()
.GroupBy(r => r.TableName, r => r.ColName)
.ToDictionary(rg => rg.Key, rg => rg.Select(r => r).ToList());
var tableNames = tableColNames.Keys.ToList();
// build all of the tables into a dictionary of tables
var dataTables = new Dictionary<string, DataTable>();
foreach (var tableName in tableNames) {
var aTable = new DataTable(tableName);
var primaryKeyCol = new DataColumn("PrimaryKey");
aTable.Columns.Add(primaryKeyCol);
aTable.PrimaryKey = new[] { primaryKeyCol };
foreach (var colName in tableColNames[tableName]) {
aTable.Columns.Add(new DataColumn(colName));
}
dataTables[tableName] = aTable;
}
// load all of the tables with their data
foreach (var aTable in dataTables.Values) {
var inRows = inData.Where(r => r.TableName == aTable.TableName)
.GroupBy(r => r.PrimaryKey);
foreach (var row in inRows) {
var newRow = aTable.NewRow();
newRow["PrimaryKey"] = row.Key;
foreach (var col in row)
newRow[col.ColName] = col.ColValue;
aTable.Rows.Add(newRow);
}
}
// build the combined table with all columns
var combinedTable = new DataTable();
foreach (var aTable in dataTables.Values)
combinedTable.Columns.AddRange(aTable.DataColumns()
.Where(dc => !combinedTable.ColumnNames()
.Contains(dc.ColumnName))
.Select(dc => new DataColumn(dc.ColumnName))
.ToArray());
// find and index the common columns between tables
var commonColNames = new Dictionary<string, Dictionary<string, string>>(); // [TableName]=>[LinkedTableName]=>ColumnName
var indexes = new Dictionary<string, Dictionary<string, DataRow>>(); // [TableName]=>[ColumnValue]=>DataRow
for (int j1 = 0; j1 < tableNames.Count; ++j1) { // foreach table, find its linked tables
var startTableName = tableNames[j1];
var startTable = dataTables[startTableName];
var startTableColNames = startTable.NonPrimaryKeyColumnNames().ToList();
var linkedTables = tableNames.Skip(j1 + 1) // only find links to later tables
.Select(n => (TableName: n, CommonColName: startTableColNames.Intersect(dataTables[n].ColumnNames()).SingleOrDefault()))
.Where(tc => tc.CommonColName != null);
if (linkedTables.Count() > 0) { // if it has linked tables, save the linking column and index that column
var linkingColNames = new Dictionary<string, string>(); // [LinkedTableName]=>ColumnName
foreach (var linkedTable in linkedTables) {
linkingColNames.Add(linkedTable.TableName, linkedTable.CommonColName);
if (!indexes.ContainsKey(linkedTable.TableName)) { // only build indexes once per linking column
var colIndex = dataTables[linkedTable.TableName].AsEnumerable()
.Select(r => (Key: r.Field<string>(linkedTable.CommonColName), DataRow: r))
.ToDictionary(t => t.Key, t => t.DataRow);
indexes.Add(linkedTable.TableName, colIndex);
}
}
commonColNames[startTableName] = linkingColNames;
}
}
// combine the tables starting with the first table
var firstTableName = tableNames[0];
var firstTable = dataTables[firstTableName];
var firstTableColNames = firstTable.ColumnNames().ToList();
// foreach DataRow in the first table
foreach (var r1 in firstTable.AsEnumerable()) {
var newRow = combinedTable.NewRow();
// load the first table DataRow into the combined DataRow
newRow.CopyColumnValues(r1, firstTableColNames);
// find all the linked tables and merge their DataRows into the combined DataRow
var linkedTableNamesStack = new Stack<(string TableName, string LinkedTableName)>();
linkedTableNamesStack.PushRange(commonColNames[firstTableName].Keys.Select(ltn => (firstTableName,ltn)));
while (linkedTableNamesStack.Count > 0) {
var nextTableLink = linkedTableNamesStack.Pop();
if (commonColNames.TryGetValue(nextTableLink.LinkedTableName, out var linkedTables))
linkedTableNamesStack.PushRange(linkedTables.Keys.Select(ltn => (nextTableLink.LinkedTableName,ltn)));
var linkingColName = commonColNames[nextTableLink.TableName][nextTableLink.LinkedTableName];
var linkingColValue = newRow.Field<string>(linkingColName);
var linkedRow = indexes[nextTableLink.LinkedTableName][linkingColValue];
newRow.CopyColumnValues(linkedRow, linkedRow.Table.NonPrimaryKeyColumnNames());
}
combinedTable.Rows.Add(newRow);
}
以下是使用的扩展方法:
public static class Ext {
public static IEnumerable<string> NonPrimaryKeyColumnNames(this DataTable aTable) => aTable.ColumnNames().Where(n => n != "PrimaryKey");
public static IEnumerable<DataColumn> DataColumns(this DataTable aTable) => aTable.Columns.Cast<DataColumn>();
public static IEnumerable<string> ColumnNames(this DataTable aTable) => aTable.DataColumns().Select(dc => dc.ColumnName);
public static void CopyColumnValues(this DataRow dest, DataRow src, IEnumerable<string> colNames) {
foreach (var colName in colNames)
dest[colName] = src[colName];
}
public static void PushRange<T>(this Stack<T> s, IEnumerable<T> Ts) => Ts.ForEach(aT => s.Push(aT));
public static void ForEach<T>(this IEnumerable<T> source, Action<T> action) {
foreach (var s in source)
action(s);
}
}