嘿,我正在开发一个程序,以从文本文件中获取信息并将信息输出到CSV文件中,我需要做的一件事是对重复文件的数量进行补充(在可能的情况下,针对个人的违法行为应汇总到一条记录中,并添加一个称为“计数”的字段,该字段指示找到的重复记录的数量(对于非重复记录,此字段应设置为零)。我在添加计数器时遇到了一些麻烦,并且想知道你们是否对我有任何建议。
谢谢
using System;
using System.IO;
using System.Linq;
using System.Collections.Generic;
using System.Text;
namespace finalproj
{
class Program
{
static void Main(string[] args)
{
StreamReader reader = new StreamReader("DISTRICT.DISTRICT_COURT_.11.13.18.AM.000B.CAL.txt");
StreamWriter writer = new StreamWriter("outtext.csv");
int counts;
string line = "";
for (int x = 0; x < 1; x++)
{
string buffer = reader.ReadLine();
line += " " + buffer;
}
//StreamWriter writer = new StreamWriter("outtext.csv");
//writer.WriteLine(line);
//writer.Close();
//Console.WriteLine(line);
while (line != null)
{
if (line.Contains("APT."))
{
Console.WriteLine(line);
}
else if (line.Contains("BPD"))
{
Console.WriteLine(line);
}
else if (line.Contains("18IF"))
{
Console.WriteLine(line);
}
else if (line.Contains("SHP"))
{
Console.WriteLine(line);
}
else if (line.Contains("SFF"))
{
Console.WriteLine(line);
}
else if (line.Contains("CLS:"))
{
Console.WriteLine(line);
}
else if (line.Contains("BOND"))
{
Console.WriteLine(line);
}
else if (line.Contains("ATTY"))
{
Console.WriteLine(line);
}
else if (line.Contains("(T)"))
{
Console.WriteLine(line);
}
else if (line.Contains("(M)"))
{
Console.WriteLine(line);
}
else if (line.Contains("(F)"))
{
Console.WriteLine(line);
}
else if (line.Contains("(I)"))
{
Console.WriteLine(line);
}
line = reader.ReadLine();
writer.WriteLine(line);
}
writer.WriteLine(line);
reader.Close();
writer.Close();
Console.WriteLine(line);
//using (reader)
//{
//
//string line1;
//string[] split = new
// while((line1 = reader.ReadLine()) !=null)
// {
// string[] split =
// }
//}
Console.ReadKey();
}
}
}
答案 0 :(得分:1)
要分割线并计数出现次数,可以使用NewLine分割并使用Linq
string[] lines = str.Split(new[] { Environment.NewLine },StringSplitOptions.None);
var result = lines.GroupBy(g => g)
.Select(s => new { Key = s.Key, Count = s.Count()})
.ToDictionary(d => d.Key, d => d.Count);
结果将包含仅出现一次的行。如果只想重复行
var result = lines.GroupBy(g => g).Where(x=> x.Count()>1)
.Select(s => new { Key = s.Key, Count = s.Count()})
.ToDictionary(d => d.Key, d => d.Count);
然后您可以直接从字典中编写CSV
File.WriteAllLines(filePath, result.Select(x=>$"{x.Key},{x.Value},"));
答案 1 :(得分:0)
在这里,我使用Regex匹配您要查找的内容,并使用SordedSet捕获行并查看是否存在重复项。请注意,虽然大文件可能会占用一些内存,但是由于它与csv有关,所以我认为您很好:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
namespace ConsoleApp4
{
class Program
{
static void Main(string[] args)
{
StreamReader reader = new StreamReader("DISTRICT.DISTRICT_COURT_.11.13.18.AM.000B.CAL.txt");
StreamWriter writer = new StreamWriter("outtext.csv");
int counts = 0;
string line ;
SortedSet<string> uniqueLine = new SortedSet<string>();
Regex findWords = new Regex(@"(APT.|BPD|18IF|SHP|SFF|CLS:|BOND|ATTY|\(T\)|\(M\)|\(F\)|\(I\))");
while ((line = reader.ReadLine()) != null)
{
if (uniqueLine.Contains(line))
{
counts++;
}
else
{
uniqueLine.Add(line);
writer.WriteLine(line);
}
Match aMatch = findWords.Match(line);
if (aMatch.Success)
{
Console.WriteLine(line);
}
}
writer.WriteLine("Count:{0}", counts);
writer.Close();
Console.ReadKey();
}
}
}