删除重复中的所有行(与不同的行选择不同)

时间:2012-10-02 09:23:26

标签: c# datatable duplicates

如何根据重复的两列的值删除DataTable中的每个重复行。不幸的是,我无法找到等效的LINQ查询。 (我甚至不想要不同的价值观)。下表将解释我的问题

我想基于Column_A和Column_B删除重复的每一行

COLUMN_A COLUMN_B COLUMN_C COLUMN_D .....
A B
Çd
电子网
G H
A B
电子网

预期输出:

COLUMN_A COLUMN_B COLUMN_C COLUMN_D .....
Çd
G H

请帮忙

2 个答案:

答案 0 :(得分:5)

var rowsToDelete = dataTable.AsEnumerable()
    .GroupBy(r => new{A=r["COLUMN_A"],B=r["COLUMN_B"]})
    .Where(g => g.Count() > 1)
    .SelectMany(g=>g)
    .ToList();

foreach (var row in rowsToDelete)
{
    dataTable.Rows.Remove(row);
}

答案 1 :(得分:1)

您可以尝试使用此示例

链接:http://geekswithblogs.net/ajohns/archive/2004/06/24/7191.aspx

编码

private static void RemoveDuplicates(DataTable tbl,
                                             DataColumn[] keyColumns)
        {
              int rowNdx = 0;
              while(rowNdx < tbl.Rows.Count-1)
              {
                    DataRow[] dups = FindDups(tbl, rowNdx, keyColumns);
                    if(dups.Length>0)
                    {
                          foreach(DataRow dup in dups)
                          {
                                tbl.Rows.Remove(dup);
                          }
                    }
                    else
                    {
                          rowNdx++;
                    }
              }
        }

        private static DataRow[] FindDups(DataTable tbl,
                                          int sourceNdx,
                                          DataColumn[] keyColumns)
        {
              ArrayList retVal = new ArrayList();

              DataRow sourceRow = tbl.Rows[sourceNdx];
              for(int i=sourceNdx + 1; i<tbl.Rows.Count; i++)
              {
                    DataRow targetRow = tbl.Rows[i];
                    if(IsDup(sourceRow, targetRow, keyColumns))
                    {
                          retVal.Add(targetRow);
                    }
              }
              return (DataRow[]) retVal.ToArray(typeof(DataRow));
        }

        private static bool IsDup(DataRow sourceRow,
                                  DataRow targetRow,
                                  DataColumn[] keyColumns)
        {
              bool retVal = true;
              foreach(DataColumn column in keyColumns)
              {
                    retVal = retVal && sourceRow[column].Equals(targetRow[column]);
                    if(!retVal) break;
              }
              return retVal;
        }

测试

              // create an example datatable with duplicate rows
              DataTable tbl = new DataTable();

              tbl.Columns.Add("ColumnA");
              tbl.Columns.Add("ColumnB");
              tbl.Columns.Add("ColumnC");
              for(int i = 0; i<10; i++)
              {
                    DataRow nr = tbl.NewRow();
                    nr["ColumnA"] = "A" + i.ToString();
                    nr["ColumnB"] = "B" + i.ToString();
                    nr["ColumnC"] = "C" + i.ToString();
                    tbl.Rows.Add(nr);
                    // duplicate
                    nr = tbl.NewRow();
                    nr["ColumnA"] = "A" + i.ToString();
                    nr["ColumnB"] = "B" + i.ToString();
                    nr["ColumnC"] = "C" + i.ToString();
                    tbl.Rows.Add(nr);
              }

              PrintRows(tbl); // show table with duplicates

              //Create an array of DataColumns to compare
              //If these columns all match we consider the
              //rows duplicate.
              DataColumn[] keyColumns =
                          new DataColumn[]{tbl.Columns["ColumnA"],
                                           tbl.Columns["ColumnA"]};

              //remove the duplicates
              RemoveDuplicates(tbl, keyColumns);