我正在接收数据源中的数据,在将信息发送到UI进行显示之前,我需要进行数据转换。 to
问题分为两部分:
答案 0 :(得分:9)
您所谓的static columns
通常称为行组,dynamic columns
- 列组和value columns
- 值聚合或简单的值。
为了实现目标,我建议使用以下简单的数据结构:
public class PivotData
{
public IReadOnlyList<PivotValues> Columns { get; set; }
public IReadOnlyList<PivotDataRow> Rows { get; set; }
}
public class PivotDataRow
{
public PivotValues Data { get; set; }
public IReadOnlyList<PivotValues> Values { get; set; }
}
Columns
PivotData
成员代表您所谓的标题,而Row
成员 - PivotDataRow
对象列表{ {1}}成员包含行组值和Data
- 相应Values
索引的值(Columns
将始终与PivotDataRow.Values
)具有相同的Count
。
上述数据结构可序列化/反序列化为JSON(使用Newtosoft.Json测试),可用于以所需格式填充UI。
用于表示行组值,列组值和聚合值的核心数据结构如下:
PivotData.Columns.Count
基本上它表示具有相等和顺序比较语义的public class PivotValues : IReadOnlyList<string>, IEquatable<PivotValues>, IComparable<PivotValues>
{
readonly IReadOnlyList<string> source;
readonly int offset, count;
public PivotValues(IReadOnlyList<string> source) : this(source, 0, source.Count) { }
public PivotValues(IReadOnlyList<string> source, int offset, int count)
{
this.source = source;
this.offset = offset;
this.count = count;
}
public string this[int index] => source[offset + index];
public int Count => count;
public IEnumerator<string> GetEnumerator()
{
for (int i = 0; i < count; i++)
yield return this[i];
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public override int GetHashCode()
{
unchecked
{
var comparer = EqualityComparer<string>.Default;
int hash = 17;
for (int i = 0; i < count; i++)
hash = hash * 31 + comparer.GetHashCode(this[i]);
return hash;
}
}
public override bool Equals(object obj) => Equals(obj as PivotValues);
public bool Equals(PivotValues other)
{
if (this == other) return true;
if (other == null) return false;
var comparer = EqualityComparer<string>.Default;
for (int i = 0; i < count; i++)
if (!comparer.Equals(this[i], other[i])) return false;
return true;
}
public int CompareTo(PivotValues other)
{
if (this == other) return 0;
if (other == null) return 1;
var comparer = Comparer<string>.Default;
for (int i = 0; i < count; i++)
{
var compare = comparer.Compare(this[i], other[i]);
if (compare != 0) return compare;
}
return 0;
}
public override string ToString() => string.Join(", ", this); // For debugging
}
列表的范围(切片)。前者允许在数据透视转换期间使用基于哈希的高效LINQ运算符,而后者允许可选的排序。此数据结构也允许有效转换,无需分配新列表,同时在从JSON反序列化时保留实际列表。
(通过实现string
接口 - IEquatable<PivotValues>
和GetHashCode
方法提供相等比较。通过这样做,它允许将两个Equals
类实例视为相等基于输入PivotValues
的{{1}}元素内指定范围内的值。类似,通过实现List<string>
接口 - List<List<string>>
方法提供排序)) EM>
转型本身很简单:
IComparable<PivotValues>
首先使用简单的LINQ CompareTo
运算符确定列(标题)。然后通过对行列的源集进行分组来确定行。每个行分组中的值由外部加入public static PivotData ToPivot(this List<List<string>> data, int rowDataCount, int columnDataCount, int valueDataCount)
{
int rowDataStart = 0;
int columnDataStart = rowDataStart + rowDataCount;
int valueDataStart = columnDataStart + columnDataCount;
var columns = data
.Select(r => new PivotValues(r, columnDataStart, columnDataCount))
.Distinct()
.OrderBy(c => c) // Optional
.ToList();
var emptyValues = new PivotValues(new string[valueDataCount]); // For missing (row, column) intersection
var rows = data
.GroupBy(r => new PivotValues(r, rowDataStart, rowDataCount))
.Select(rg => new PivotDataRow
{
Data = rg.Key,
Values = columns.GroupJoin(rg,
c => c,
r => new PivotValues(r, columnDataStart, columnDataCount),
(c, vg) => vg.Any() ? new PivotValues(vg.First(), valueDataStart, valueDataCount) : emptyValues
).ToList()
})
.OrderBy(r => r.Data) // Optional
.ToList();
return new PivotData { Columns = columns, Rows = rows };
}
和分组内容确定。
由于我们的数据结构实现,LINQ转换非常有效(空间和时间)。列和行排序是可选的,如果您不需要,可以将其删除。
使用虚拟数据进行样本测试:
Distinct
答案 1 :(得分:3)
这是LINQ的方法:
var working =
data
.Select(d => new
{
Region_L1 = d[0],
Region_L2 = d[1],
Product_L1 = d[2],
Product_L2 = d[3],
Value = double.Parse(d[4]),
});
var output =
working
.GroupBy(x => new { x.Region_L1, x.Region_L2 }, x => new { x.Product_L1, x.Product_L2, x.Value })
.Select(x => new { x.Key, Lookup = x.ToLookup(y => new { y.Product_L1, y.Product_L2 }, y => y.Value) })
.Select(x => new
{
x.Key.Region_L1,
x.Key.Region_L2,
P_One_One = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item One" }].Sum(),
P_One_Two = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Two" }].Sum(),
P_One_Four = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Four" }].Sum(),
P_One_Six = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Six" }].Sum(),
P_One_Eight = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Eight" }].Sum(),
P_One_Eleven = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Eleven" }].Sum(),
P_Two_Five = x.Lookup[new { Product_L1 = "Product Two", Product_L2 = "Item Five" }].Sum(),
P_Two_Seven = x.Lookup[new { Product_L1 = "Product Two", Product_L2 = "Item Seven" }].Sum(),
P_Two_Nine = x.Lookup[new { Product_L1 = "Product Two", Product_L2 = "Item Nine" }].Sum(),
P_Three_Three = x.Lookup[new { Product_L1 = "Product Three", Product_L2 = "Item Three" }].Sum(),
P_Three_Ten = x.Lookup[new { Product_L1 = "Product Three", Product_L2 = "Item Ten" }].Sum(),
});
这给出了:
请注意,LINQ需要输出列的特定字段名称。
如果列数不知道,但您有一个方便的headerInfo
List<List<string>>
,那么您可以这样做:
var output =
working
.GroupBy(x => new { x.Region_L1, x.Region_L2 }, x => new { x.Product_L1, x.Product_L2, x.Value })
.Select(x => new { x.Key, Lookup = x.ToLookup(y => new { y.Product_L1, y.Product_L2 }, y => y.Value) })
.Select(x => new
{
x.Key.Region_L1,
x.Key.Region_L2,
Headers =
headerInfo
.Select(y => new { Product_L1 = y[0], Product_L2 = y[1] })
.Select(y => new { y.Product_L1, y.Product_L2, Value = x.Lookup[y].Sum() })
.ToArray(),
});
这给出了:
答案 2 :(得分:1)
您可以使用NReco PivotData库以下列方式按任意数量的列创建数据透视表(不要忘记安装“NReco.PivotData”nuget包):
// rows in dataset are represented as 'arrays'
// lets define 'field name' -> 'field index' mapping
var fieldToIndex = new Dictionary<string,int>() {
{"Region L1", 0},
{"Region L2", 1},
{"Product L1", 2},
{"Product L2", 3},
{"Val", 4}
};
// create multidimensional dataset
var pvtData = new PivotData(
// group by 4 dimensions
new[]{"Region L1", "Region L2", "Product L1", "Product L2"},
// value (use CompositeAggregatorFactory for multiple values)
new SumAggregatorFactory("Val") );
pvtData.ProcessData(data, (row, field) => ((IList)row)[fieldToIndex[field]] );
// create pivot table data model by the grouped data
var pvtTbl = new PivotTable(
// dimensions for rows
new[] {"Region L1", "Region L2"},
// dimensions for columns
new[] {"Product L1", "Product L2"},
pvtData);
// now you can iterate through 'pvtTbl.RowKeys' and 'pvtTbl.ColumnKeys'
// to get row\column header labels and use 'pvtTbl.GetValue()'
// or 'pvtTbl[]' to pivot table get values
// you can easily render pivot table to HTML (or Excel, CSV) with
// components from PivotData Toolkit (NReco.PivotData.Extensions assembly):
var htmlResult = new StringWriter();
var pvtHtmlWr = new PivotTableHtmlWriter(htmlResult);
pvtHtmlWr.Write(pvtTbl);
var pvtTblHtml = htmlResult.ToString();
默认情况下,数据透视表行/列按标题(A-Z)排序。你可以change the order as you need。
PivotData OLAP库(PivotData,PivotTable类)可以在单部署项目中免费使用。高级组件(如PivotTableHtmlWriter)需要商业许可证密钥。
答案 3 :(得分:0)
有点“简化”版本:
O(count_of_ones(f))
string[][] data = {
new [] { "Global", "Europe" , "Product One" , "Item One" , "579984.59" },
new [] { "Global", "North America", "Product One" , "Item Two" , "314586.73" },
new [] { "Global", "Asia" , "Product One" , "Item One" , "62735.13" },
new [] { "Global", "Asia" , "Product Two" , "Item Five" , "12619234.69" },
new [] { "Global", "North America", "Product Two" , "Item Five" , "8953713.39" },
new [] { "Global", "Europe" , "Product One" , "Item Two" , "124267.4" },
new [] { "Global", "Asia" , "Product One" , "Item Four" , "482338.49" },
new [] { "Global", "North America", "Product One" , "Item Four" , "809185.13" },
new [] { "Global", "Europe" , "Product One" , "Item Four" , "233101" },
new [] { "Global", "Asia" , "Product One" , "Item Two" , "120561.65" },
new [] { "Global", "North America", "Product One" , "Item Six" , "1517359.37" },
new [] { "Global", "Europe" , "Product One" , "Item Six" , "382590.45" },
new [] { "Global", "North America", "Product One" , "Item Eight" , "661835.64" },
new [] { "Global", "Europe" , "Product Three", "Item Three" , "0" },
new [] { "Global", "Europe" , "Product One" , "Item Eight" , "0" },
new [] { "Global", "Europe" , "Product Two" , "Item Five" , "3478145.38" },
new [] { "Global", "Asia" , "Product One" , "Item Six" , "0" },
new [] { "Global", "North America", "Product Two" , "Item Seven" , "4247059.97" },
new [] { "Global", "Asia" , "Product Two" , "Item Seven" , "2163718.01" },
new [] { "Global", "Europe" , "Product Two" , "Item Seven" , "2158782.48" },
new [] { "Global", "North America", "Product Two" , "Item Nine" , "72634.46" },
new [] { "Global", "Europe" , "Product Two" , "Item Nine" , "127500" },
new [] { "Global", "North America", "Product One" , "Item One" , "110964.44" },
new [] { "Global", "Asia" , "Product Three", "Item Ten" , "2064.99" },
new [] { "Global", "Europe" , "Product One" , "Item Eleven", "0" },
new [] { "Global", "Asia" , "Product Two" , "Item Nine" , "1250" }
};
string[][] headerInfo = {
new [] { "Product One" , "Item One" },
new [] { "Product One" , "Item Two" },
new [] { "Product One" , "Item Four" },
new [] { "Product One" , "Item Six" },
new [] { "Product One" , "Item Eight" },
new [] { "Product One" , "Item Eleven" },
new [] { "Product Two" , "Item Five" },
new [] { "Product Two" , "Item Seven" },
new [] { "Product Two" , "Item Nine" },
new [] { "Product Three", "Item Three" },
new [] { "Product Three", "Item Ten" }
};
结果:
int[] rowHeaders = { 0, 1 }, colHeaders = { 2, 3 }; int valHeader = 4;
var pivot = data.ToLookup(r => string.Join("|", rowHeaders.Select(i => r[i])))
.Select(g => g.ToLookup(c => string.Join("|", colHeaders.Select(i => c[i])), c => c[valHeader]));
foreach (var r in pivot)
Debug.Print(string.Join(", ", headerInfo.Select(h => "[" + r[string.Join("|", h)].FirstOrDefault() + "]")));
由于许多字符串连接,上述内容远非效率最高,因此使用自定义比较器可以快5倍:
[579984.59], [124267.4], [233101], [382590.45], [0], [0], [3478145.38], [2158782.48], [127500], [0], []
[110964.44], [314586.73], [809185.13], [1517359.37], [661835.64], [], [8953713.39], [4247059.97], [72634.46], [], []
[62735.13], [120561.65], [482338.49], [0], [], [], [12619234.69], [2163718.01], [1250], [], [2064.99]
然后:
public class SequenceComparer : IEqualityComparer<IEnumerable<string>>
{
public bool Equals(IEnumerable<string> first, IEnumerable<string> second)
{
return first.SequenceEqual(second);
}
public int GetHashCode(IEnumerable<string> value)
{
return value.Aggregate(0, (a, v) => a ^ v.GetHashCode());
}
}
答案 4 :(得分:0)
由于我们可以预先确定结果的大小,因此我们可以将其定义为多维数组。
让我们选择功能方法并将结果视为累加器,因此我们只需编写 reducer (对于Linq) Aggregate
方法)。
它将基于字典来映射水平到垂直线的转换,另一个状态字典来构建它的行(当然字典需要一个标准的比较器)。
header_info
,所以 - 首先 - 我必须修复第一个条目这是最后一个重复。
与其他解决方案相比,以下解决方案非常高效(我的笔记本电脑只需要1毫秒,比接受的答案快4倍以上)。
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace pivot
{
class Program
{
static void Main(string[] args)
{
var _staticColumnCount = 2; //Columns that should not be pivoted
var _dynamicColumnCount = 2; // Columns which needs to be pivoted to form header
var _valueColumnCount = 1; //Columns that represent Actual value
var valueColumnIndex = 4; //Assuming index starts with 0;
List<List<string>> headerInfo = new List<List<string>>();
headerInfo.Add(new List<string> { "Product Three", "Item Three" });
headerInfo.Add(new List<string> { "Product Two", "Item Five" });
headerInfo.Add(new List<string> { "Product Two", "Item Seven" });
headerInfo.Add(new List<string> { "Product Two", "Item Nine" });
headerInfo.Add(new List<string> { "Product One", "Item One" });
headerInfo.Add(new List<string> { "Product One", "Item Two" });
headerInfo.Add(new List<string> { "Product One", "Item Four" });
headerInfo.Add(new List<string> { "Product One", "Item Six" });
headerInfo.Add(new List<string> { "Product One", "Item Eight" });
headerInfo.Add(new List<string> { "Product One", "Item Eleven" });
headerInfo.Add(new List<string> { "Product Three", "Item Ten" });
List<List<string>> data = new List<List<string>>();
data.Add(new List<string> { "Global", "Europe", "Product One", "Item One", "579984.59" });
data.Add(new List<string> { "Global", "North America", "Product One", "Item Two", "314586.73" });
data.Add(new List<string> { "Global", "Asia", "Product One", "Item One", "62735.13" });
data.Add(new List<string> { "Global", "Asia", "Product Two", "Item Five", "12619234.69" });
data.Add(new List<string> { "Global", "North America", "Product Two", "Item Five", "8953713.39" });
data.Add(new List<string> { "Global", "Europe", "Product One", "Item Two", "124267.4" });
data.Add(new List<string> { "Global", "Asia", "Product One", "Item Four", "482338.49" });
data.Add(new List<string> { "Global", "North America", "Product One", "Item Four", "809185.13" });
data.Add(new List<string> { "Global", "Europe", "Product One", "Item Four", "233101" });
data.Add(new List<string> { "Global", "Asia", "Product One", "Item Two", "120561.65" });
data.Add(new List<string> { "Global", "North America", "Product One", "Item Six", "1517359.37" });
data.Add(new List<string> { "Global", "Europe", "Product One", "Item Six", "382590.45" });
data.Add(new List<string> { "Global", "North America", "Product One", "Item Eight", "661835.64" });
data.Add(new List<string> { "Global", "Europe", "Product Three", "Item Three", "0" });
data.Add(new List<string> { "Global", "Europe", "Product One", "Item Eight", "0" });
data.Add(new List<string> { "Global", "Europe", "Product Two", "Item Five", "3478145.38" });
data.Add(new List<string> { "Global", "Asia", "Product One", "Item Six", "0" });
data.Add(new List<string> { "Global", "North America", "Product Two", "Item Seven", "4247059.97" });
data.Add(new List<string> { "Global", "Asia", "Product Two", "Item Seven", "2163718.01" });
data.Add(new List<string> { "Global", "Europe", "Product Two", "Item Seven", "2158782.48" });
data.Add(new List<string> { "Global", "North America", "Product Two", "Item Nine", "72634.46" });
data.Add(new List<string> { "Global", "Europe", "Product Two", "Item Nine", "127500" });
data.Add(new List<string> { "Global", "North America", "Product One", "Item One", "110964.44" });
data.Add(new List<string> { "Global", "Asia", "Product Three", "Item Ten", "2064.99" });
data.Add(new List<string> { "Global", "Europe", "Product One", "Item Eleven", "0" });
data.Add(new List<string> { "Global", "Asia", "Product Two", "Item Nine", "1250" });
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
Reducer reducer = new Reducer();
reducer.headerCount = headerInfo.Count;
reducer.headerCount = headerInfo.Count;
var resultCount = (int)Math.Ceiling((double)data.Count / (double)reducer.headerCount);
ValueArray[,] results = new ValueArray[resultCount, _staticColumnCount + reducer.headerCount];
reducer.headerDict = new Dictionary<IEnumerable<string>, int>(new MyComparer());
reducer.skipCols = _staticColumnCount;
reducer.headerKeys = _dynamicColumnCount;
reducer.rowDict = new Dictionary<IEnumerable<string>, int>(new MyComparer());
reducer.currentLine = 0;
reducer.valueCount = _valueColumnCount;
for (int i = 0; i < reducer.headerCount; i++)
{
reducer.headerDict.Add(headerInfo[i], i);
}
results = data.Aggregate(results, reducer.reduce);
stopwatch.Stop();
Console.WriteLine("millisecs: " + stopwatch.ElapsedMilliseconds);
for (int i = 0; i < resultCount; i++)
{
var curr_header = new string[reducer.headerCount];
IEnumerable<string> curr_key = null;
for (int j = 0; j < reducer.headerCount; j++)
{
curr_header[j] = "[" +
String.Join(",", (results[i, reducer.skipCols + j]?.values) ?? new string[0])
+ "]";
curr_key = curr_key ?? (results[i, reducer.skipCols + j]?.row_keys);
}
Console.WriteLine(String.Join(",", curr_key)
+ ": " + String.Join(",", curr_header)
);
}
Console.ReadKey();
// if you want to compare it to the accepted answer
stopwatch.Reset();
stopwatch.Start();
var pivotData = data.ToPivot(2, 2, 1); // with all needed classes/methods
stopwatch.Stop();
Console.WriteLine("millisecs: " + stopwatch.ElapsedMilliseconds);
Console.ReadKey();
}
internal class ValueArray
{
internal IEnumerable<string> row_keys;
internal string[] values;
}
internal class Reducer
{
internal int headerCount;
internal int skipCols;
internal int headerKeys;
internal int valueCount;
internal Dictionary<IEnumerable<string>, int> headerDict;
internal Dictionary<IEnumerable<string>, int> rowDict;
internal int currentLine;
internal ValueArray[,] reduce(ValueArray[,] results, List<string> line)
{
var header_col = headerDict[line.Skip(skipCols).Take(headerKeys)];
var row_keys = line.Take(skipCols);
var curr_values = new string[valueCount];
for (int i = 0; i < valueCount; i++)
{
curr_values[i] = line[skipCols + headerKeys + i];
}
if (rowDict.ContainsKey(row_keys))
{
results[rowDict[row_keys], skipCols + header_col] = new ValueArray();
results[rowDict[row_keys], skipCols + header_col].row_keys = row_keys;
results[rowDict[row_keys], skipCols + header_col].values = curr_values;
}
else
{
rowDict.Add(row_keys, currentLine);
results[currentLine, skipCols + header_col] = new ValueArray();
results[currentLine, skipCols + header_col].row_keys = row_keys;
results[currentLine, skipCols + header_col].values = curr_values;
currentLine++;
}
return results;
}
}
public class MyComparer : IEqualityComparer<IEnumerable<string>>
{
public bool Equals(IEnumerable<string> x, IEnumerable<string> y)
{
return x.SequenceEqual(y);
}
public int GetHashCode(IEnumerable<string> obj)
{
return obj.First().GetHashCode();
}
}
}
}