从具有不同表的文本文件中获取数据并加入c#

时间:2018-04-04 19:42:58

标签: c# linq dictionary

我有以下格式的文本文件:

Table1  1   Name      John
Table1  1   SId       110
Table1  1   City      NewYork
Table1  2   Name      Abc
Table1  2   SId       111
Table1  2   City      Paris
Table2  1   SId       110
Table2  1   DeptName  HR
Table2  1   DeptId    00001
Table2  2   SId       111
Table2  2   DeptName  Dev
Table2  2   DeptId    00002
Table3  1   DeptId    00001
Table3  1   DeptHead  Adam
Table3  2   DeptId    00002
Table3  2   DeptHead  Nik

第一列是表名,然后是主键,然后是表的实际列名,然后是列值。根据上面的例子,Table1有3列,分别是Name,SId,City。 Table1有2行,我们可以使用值1和2(主键)获得。表2和表3也是如此 我必须从文件中获取所有细节,然后首先使用SId连接Table1和Table2行,然后使用DeptId连接Table3以创建一个公共记录。同样,文本文件中有数千个条目。还有大约20张桌子。以上只是一个例子。

直到现在我已经创建了一个类

 public class Student
 {
    public string TableName { get; set; }
    public string ColumnName { get; set; }
    public string ColumnValue { get; set; }
    public int PrimaryKey { get; set; }
 }

public class Program
{
    private static bool IsAllColumnsExist(DataTable tableNameToCheck, List<string> columnsNames)
    {
        var iscolumnExist = true;
        if (tableNameToCheck != null && tableNameToCheck.Columns != null)
        {
            foreach (string columnName in columnsNames)
            {
                if (!tableNameToCheck.Columns.Contains(columnName))
                {
                    iscolumnExist = false;
                    break;
                }
            }
        }
        else
        {
            iscolumnExist = false;
        }
        return iscolumnExist;
    }

    static void Main()
    {
        var table1 = new DataTable();
        var table2 = new DataTable();
        var table3 = new DataTable();

        var tablesData = (from line in File.ReadLines(@"C:\Console.txt")
                           let values = line.Split('\t')
                           select new Toyo
                           {
                               TableName = values[0].ToString(),
                               PrimaryKey = Convert.ToInt32(values[1]),
                               ColumnName = values[2].ToString(),
                               ColumnValue = values[3].ToString()
                           }).ToList();



        var groupedData = tablesData.GroupBy(x => new { x.TableName,x.PrimaryKey }).ToList();
        foreach (var tableData in groupedData)
        {
            var primaryKey = tableData.Select(x => x.PrimaryKey).First();
            var columnNames = tableData.Select(x => x.ColumnName).ToList();
            var columnValues = tableData.Select(x => x.ColumnValue).ToList();
            if (tableData.All(x => x.TableName == "Table1"))
            {
                if (!IsAllColumnsExist(table1, columnNames))
                {
                    table1.Columns.AddRange(new DataColumn[4]
                    {
                        new DataColumn("pkTable1", typeof(int)),
                        new DataColumn(columnNames[0], typeof(string)),
                        new DataColumn(columnNames[1], typeof(string)),
                        new DataColumn(columnNames[2], typeof(string))
                    });
                }
                table1.Rows.Add(primaryKey, columnValues[0], columnValues[1], columnValues[2]);
            }
            else if (tableData.All(x => x.TableName == "Table2"))
            {
                if (!IsAllColumnsExist(table2, columnNames))
                {
                    table2.Columns.AddRange(new DataColumn[4]
                    {
                        new DataColumn("pkTable2", typeof(int)),
                        new DataColumn(columnNames[0], typeof(string)),
                        new DataColumn(columnNames[1], typeof(string)),
                        new DataColumn(columnNames[2], typeof(string))
                    });
                }
                table2.Rows.Add(primaryKey, columnValues[0], columnValues[1], columnValues[2]);
            }
            else if (tableData.All(x => x.TableName == "Table3"))
            {
                if (!IsAllColumnsExist(table3, columnNames))
                {
                    table3.Columns.AddRange(new DataColumn[3]
                    {
                        new DataColumn("pkTable3", typeof(int)),
                        new DataColumn(columnNames[0], typeof(string)),
                        new DataColumn(columnNames[1], typeof(string))
                    });
                }
                table3.Rows.Add(primaryKey, columnValues[0], columnValues[1]);
            }
        }


        var result = (from firstTable in table1.AsEnumerable()
                      join secondTable in table2.AsEnumerable() on firstTable.Field<string>("SId") equals secondTable.Field<string>("SId")
                      join thirdTable in table3.AsEnumerable() on secondTable.Field<string>("DeptId") equals thirdTable.Field<string>("DeptId")
                      select new
                      {
                          Name = firstTable.Field<string>("Name"),
                          StudentId = firstTable.Field<string>("SId"),
                          City = firstTable.Field<string>("City"),
                          DepartmentName = secondTable.Field<string>("DeptName"),
                          DepartmentId = secondTable.Field<string>("DeptId"),
                          DepartmentHead = thirdTable.Field<string>("DeptHead")
                      }).ToList();
    }
}

但是这样我必须为每个表创建数据表。而在我的确切问题中,大约有20个表,这将导致错误的代码创建20个数据表,如上所述。

有更好的方法吗? 有什么帮助吗?

感谢。

3 个答案:

答案 0 :(得分:0)

我确实会创建20个DataTables,但我会将它们存储在字典中以便于访问。它还可以使您的程序更加灵活。

所以要使用的数据类型:

Dictionary<string, DataTable>

更多关于Dictionary<TKey, TValue>

答案 1 :(得分:0)

每个表的列是否相同?每个表中都存在DeptId DeptHead等?这些列是“固定的”(即你不能随意获得你不期望的列名)?如果是这样,我将创建一个类结构来表示列,并创建一个标题为TableId的列,并将数据导入到该内容中,作为List存储在内存中或者保存到数据库中。

看起来你正在尝试对数据进行聚合功能(按TableId分组),所以我觉得这是最简单的方法

答案 2 :(得分:0)

我假设你只需要保留表之间任何重叠列名的一个值。我假设文件中的第一个表可用于为常用的最终表提供主键。我假设链接表之间只有一个使用与外键相同名称的公共字段。

我编写的代码完全是动态的。我假设您需要中间DataTable以及最终合并DataTable,否则您可以将传入数据动态映射到记录中,然后将它们合并到最终DataTable。< / p>

我的方法是使用LINQ到文本文件中的行到匿名对象转换,然后处理对象以提取所有的表名,为每个表然后列名和构建DataTable S和数据加载到各之一。

我根据所有表的所有列名构建了一个公共DataTable

然后我找到每个表之间的公共字段(假设一个)。我通过公共(外键)字段索引DataTable s,这样我就可以轻松地查找行(例如本地连接实现),然后将每条记录连接到一个组合记录中并将其放入组合表中。

// load textfile and convert lines into objects
//  var inFile = File.ReadLines(...);
var inData = inFile.Select(line => line.Split('\t'))
                   .Select(lineArray => new {
                       TableName = lineArray[0],
                       PrimaryKey = lineArray[1],
                       ColName = lineArray[2],
                       ColValue = lineArray[3],
                   });

// Buld a dictionary of tables and their columns
var tableColNames = inData.Select(r => new { r.TableName, r.ColName })
                          .Distinct()
                          .GroupBy(r => r.TableName, r => r.ColName)
                          .ToDictionary(rg => rg.Key, rg => rg.Select(r => r).ToList());
var tableNames = tableColNames.Keys.ToList();

// build all of the tables into a dictionary of tables
var dataTables = new Dictionary<string, DataTable>();
foreach (var tableName in tableNames) {
    var aTable = new DataTable(tableName);
    var primaryKeyCol = new DataColumn("PrimaryKey");
    aTable.Columns.Add(primaryKeyCol);
    aTable.PrimaryKey = new[] { primaryKeyCol };
    foreach (var colName in tableColNames[tableName]) {
        aTable.Columns.Add(new DataColumn(colName));
    }
    dataTables[tableName] = aTable;
}

// load all of the tables with their data
foreach (var aTable in dataTables.Values) {
    var inRows = inData.Where(r => r.TableName == aTable.TableName)
                       .GroupBy(r => r.PrimaryKey);
    foreach (var row in inRows) {
        var newRow = aTable.NewRow();
        newRow["PrimaryKey"] = row.Key;
        foreach (var col in row)
            newRow[col.ColName] = col.ColValue;
        aTable.Rows.Add(newRow);
    }
}

// build the combined table with all columns
var combinedTable = new DataTable();
foreach (var aTable in dataTables.Values)
    combinedTable.Columns.AddRange(aTable.DataColumns()
                                         .Where(dc => !combinedTable.ColumnNames()
                                                                    .Contains(dc.ColumnName))
                                         .Select(dc => new DataColumn(dc.ColumnName))
                                         .ToArray());

// find and index the common columns between tables
var commonColNames = new Dictionary<string, Dictionary<string, string>>(); // [TableName]=>[LinkedTableName]=>ColumnName
var indexes = new Dictionary<string, Dictionary<string, DataRow>>(); // [TableName]=>[ColumnValue]=>DataRow
for (int j1 = 0; j1 < tableNames.Count; ++j1) { // foreach table, find its linked tables
    var startTableName = tableNames[j1];
    var startTable = dataTables[startTableName];
    var startTableColNames = startTable.NonPrimaryKeyColumnNames().ToList();
    var linkedTables = tableNames.Skip(j1 + 1) // only find links to later tables
                                     .Select(n => (TableName: n, CommonColName: startTableColNames.Intersect(dataTables[n].ColumnNames()).SingleOrDefault()))
                                     .Where(tc => tc.CommonColName != null);

    if (linkedTables.Count() > 0) { // if it has linked tables, save the linking column and index that column
        var linkingColNames = new Dictionary<string, string>(); // [LinkedTableName]=>ColumnName
        foreach (var linkedTable in linkedTables) {
            linkingColNames.Add(linkedTable.TableName, linkedTable.CommonColName);
            if (!indexes.ContainsKey(linkedTable.TableName)) { // only build indexes once per linking column
                var colIndex = dataTables[linkedTable.TableName].AsEnumerable()
                                                                  .Select(r => (Key: r.Field<string>(linkedTable.CommonColName), DataRow: r))
                                                                  .ToDictionary(t => t.Key, t => t.DataRow);
                indexes.Add(linkedTable.TableName, colIndex);
            }
        }
        commonColNames[startTableName] = linkingColNames;
    }
}

// combine the tables starting with the first table
var firstTableName = tableNames[0];
var firstTable = dataTables[firstTableName];
var firstTableColNames = firstTable.ColumnNames().ToList();
// foreach DataRow in the first table
foreach (var r1 in firstTable.AsEnumerable()) {
    var newRow = combinedTable.NewRow();
    // load the first table DataRow into the combined DataRow
    newRow.CopyColumnValues(r1, firstTableColNames);

    // find all the linked tables and merge their DataRows into the combined DataRow
    var linkedTableNamesStack = new Stack<(string TableName, string LinkedTableName)>();
    linkedTableNamesStack.PushRange(commonColNames[firstTableName].Keys.Select(ltn => (firstTableName,ltn)));
    while (linkedTableNamesStack.Count > 0) {
        var nextTableLink = linkedTableNamesStack.Pop();
        if (commonColNames.TryGetValue(nextTableLink.LinkedTableName, out var linkedTables))
            linkedTableNamesStack.PushRange(linkedTables.Keys.Select(ltn => (nextTableLink.LinkedTableName,ltn)));
        var linkingColName = commonColNames[nextTableLink.TableName][nextTableLink.LinkedTableName];
        var linkingColValue = newRow.Field<string>(linkingColName);
        var linkedRow = indexes[nextTableLink.LinkedTableName][linkingColValue];
        newRow.CopyColumnValues(linkedRow, linkedRow.Table.NonPrimaryKeyColumnNames());
    }
    combinedTable.Rows.Add(newRow);
}

以下是使用的扩展方法:

public static class Ext {
    public static IEnumerable<string> NonPrimaryKeyColumnNames(this DataTable aTable) => aTable.ColumnNames().Where(n => n != "PrimaryKey");

    public static IEnumerable<DataColumn> DataColumns(this DataTable aTable) => aTable.Columns.Cast<DataColumn>();
    public static IEnumerable<string> ColumnNames(this DataTable aTable) => aTable.DataColumns().Select(dc => dc.ColumnName);
    public static void CopyColumnValues(this DataRow dest, DataRow src, IEnumerable<string> colNames) {
        foreach (var colName in colNames)
            dest[colName] = src[colName];
    }

    public static void PushRange<T>(this Stack<T> s, IEnumerable<T> Ts) => Ts.ForEach(aT => s.Push(aT));
    public static void ForEach<T>(this IEnumerable<T> source, Action<T> action) {
        foreach (var s in source)
            action(s);
    }
}