继续提问:Aggregating different file CSV
我有很多文件的格式为YYYYMMDD_HHmmss_goals.csv
现在我有一个容器有一天,所有文件列表合并在一个文件中。
CSV具有不同ID,X,Y的倍数。现在我希望将它们合并为一天,使用一个ID并将该ID的x和y相加并按单日存储它们。我也不希望它们在一个文件中合并,只是动态,保存合并的单日 - >一行CVS格式的ID,x,y。它有点像将它们排序一天和ID并将它们的x,y相加,但仅限于该ID。
更新:
public class XY_Values
{
public int X { get; set; }
public int Y { get; set; }
}
public class ImageKey
{
public int mLocationId;
public int mFormatId;
public int mEditionId;
public ImageKey(int LocationId, int FormatId, int EditionId)
{
mLocationId = LocationId;
mFormatId = FormatId;
mEditionId = EditionId;
}
public bool Equals(ImageKey x, ImageKey y)
{
return x.mLocationId == y.mLocationId && x.mFormatId == y.mFormatId && x.mEditionId == y.mEditionId;
}
public int GetHashCode(ImageKey obj)
{
return obj.mLocationId ^ obj.mFormatId ^ obj.mEditionId;
}
}
static void MergeFilesForDay(string dir, DateTime date, List<string> files)
{
var idValues = new Dictionary<string, XY_Values>();
foreach (string fn in files)
{
foreach (string line in File.ReadAllLines(fn))
{
string[] fields = line.Split(new string[] { "," }, StringSplitOptions.None);
if (fields.Length < 5) continue; // skip invalid data
int LocationId, FormatID,EditionId;
int x, y;
bool LocationIdValid = int.TryParse(fields[0].Trim(), out x);
bool FormatIDValid = int.TryParse(fields[1].Trim(), out y);
bool EditionIdValid = int.TryParse(fields[2].Trim(), out x);
bool xValid = int.TryParse(fields[3].Trim(), out x);
bool yValid = int.TryParse(fields[4].Trim(), out y);
if (xValid && yValid && LocationIdValid &&FormatIDValid && EditionIdValid)
{
ImageKey key = new ImageKey(LocationId, FormatID, EditionId);
bool knownId = enteries.ContainsKey(key);
if (!knownId)
{
enteries.Add(key, new XY_Values());
}
XY_Values entry = enteries[key];
entry.X += x;
entry.Y += y;
}
}
}
// don't know how would I combine them like to output cvs
//LocationId, FormatID, EditionID, x,y ... items
//Date:
}
答案 0 :(得分:1)
此方法使用Dictionary<string, XY_Values>
按ID进行分组:
public class XY_Values
{
public int X { get; set; }
public int Y { get; set; }
}
static void MergeFilesForDay(string dir, DateTime date, List<string> files)
{
var idValues = new Dictionary<string, XY_Values>();
foreach (string fn in files)
{
foreach (string line in File.ReadAllLines(fn))
{
string[] fields = line.Split(new string[] { "," }, StringSplitOptions.None);
if (fields.Length < 3) continue; // skip invalid data
string id = fields[0].Trim();
int x, y;
bool xValid = int.TryParse(fields[1].Trim(), out x);
bool yValid = int.TryParse(fields[2].Trim(), out y);
if (xValid && yValid)
{
bool knownID = idValues.ContainsKey(id);
if (!knownID) idValues.Add(id, new XY_Values());
XY_Values values = idValues[id];
values.X += x;
values.Y += y;
}
}
}
string file = Path.Combine(dir, date.ToString("yyyyMMdd") + ".csv");
using (var stream = File.CreateText(file))
{
foreach (KeyValuePair<string, XY_Values> idValue in idValues)
{
string line = string.Format("{0},{1},{2}", idValue.Key, idValue.Value.X, idValue.Value.Y);
stream.WriteLine(line);
}
}
}
该方法取代旧的in my last answer。
答案 1 :(得分:0)
您可以创建一个类:
class ItemXY
{
int X;
int Y;
}
然后创建一个词典:
Dictionary<int, ItemXY> dict = new Dictionary();
和foreach文件组,读取所有文件以填充dict:
string [] path_tab; //a tab of path, grouping all one day files
foreach (string path in path_tab)
{
System.IO.StreamReader file = new System.IO.StreamReader(path, Encoding.Default);
while (!file.EndOfStream)
{
string [] tab;
string s = file.ReadLine();
tab = s.split(',');
if (dict.Contains(tab[0]) //ID
{
((ItemXY)dict[tab[0]]).X += tab[1];
((ItemXY)dict[tab[0]]).Y += tab[2];
}
else
{
ItemXY newItem = new ItemXY();
newItem.X = tab[1];
newItem.Y = tab[2];
dict.Add(tab[0], newItem);
}
}
file.Close();
}
之后,使用dict值创建新的CSV。