使用C#

时间:2018-12-08 03:00:37

标签: c# text count

嘿,我正在开发一个程序,以从文本文件中获取信息并将信息输出到CSV文件中,我需要做的一件事是对重复文件的数量进行补充(在可能的情况下,针对个人的违法行为应汇总到一条记录中,并添加一个称为“计数”的字段,该字段指示找到的重复记录的数量(对于非重复记录,此字段应设置为零)。我在添加计数器时遇到了一些麻烦,并且想知道你们是否对我有任何建议。

谢谢

using System;
using System.IO;
using System.Linq;
using System.Collections.Generic;
using System.Text;

namespace finalproj
{
    class Program
    {
        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader("DISTRICT.DISTRICT_COURT_.11.13.18.AM.000B.CAL.txt");

            StreamWriter writer = new StreamWriter("outtext.csv");

            int counts;
            string line = "";

            for (int x = 0; x < 1; x++)
            {
                string buffer = reader.ReadLine();
                line += " " + buffer;
            }

            //StreamWriter writer = new StreamWriter("outtext.csv");
            //writer.WriteLine(line);
            //writer.Close();

            //Console.WriteLine(line);

            while (line != null)
            {
                if (line.Contains("APT."))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("BPD"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("18IF"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("SHP"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("SFF"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("CLS:"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("BOND"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("ATTY"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("(T)"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("(M)"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("(F)"))
                {
                    Console.WriteLine(line);
                }
                else if (line.Contains("(I)"))
                {
                    Console.WriteLine(line);
                }


                line = reader.ReadLine();
                writer.WriteLine(line);
            }


            writer.WriteLine(line);

            reader.Close();
            writer.Close();
            Console.WriteLine(line);


            //using (reader)
            //{
            //    
            //string line1;
            //string[] split = new
            //    while((line1 = reader.ReadLine()) !=null)
            //    {
            //        string[] split = 
            //    }
            //}

            Console.ReadKey();
        }
    }
}

2 个答案:

答案 0 :(得分:1)

要分割线并计数出现次数,可以使用NewLine分割并使用Linq

string[] lines = str.Split(new[] { Environment.NewLine },StringSplitOptions.None);
var result = lines.GroupBy(g => g)
            .Select(s => new { Key = s.Key, Count = s.Count()})
            .ToDictionary(d => d.Key, d => d.Count);

结果将包含仅出现一次的行。如果只想重复行

var result = lines.GroupBy(g => g).Where(x=> x.Count()>1)
            .Select(s => new { Key = s.Key, Count = s.Count()})
            .ToDictionary(d => d.Key, d => d.Count);

然后您可以直接从字典中编写CSV

File.WriteAllLines(filePath, result.Select(x=>$"{x.Key},{x.Value},"));

答案 1 :(得分:0)

在这里,我使用Regex匹配您要查找的内容,并使用SordedSet捕获行并查看是否存在重复项。请注意,虽然大文件可能会占用一些内存,但是由于它与csv有关,所以我认为您很好:

using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;

namespace ConsoleApp4
{
    class Program
    {

        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader("DISTRICT.DISTRICT_COURT_.11.13.18.AM.000B.CAL.txt");

            StreamWriter writer = new StreamWriter("outtext.csv");

            int counts = 0;
            string line ;

            SortedSet<string> uniqueLine = new SortedSet<string>();

            Regex findWords = new Regex(@"(APT.|BPD|18IF|SHP|SFF|CLS:|BOND|ATTY|\(T\)|\(M\)|\(F\)|\(I\))");

            while ((line = reader.ReadLine()) != null)
            {
                if (uniqueLine.Contains(line))
                {
                    counts++;
                }
                else
                {
                    uniqueLine.Add(line);
                    writer.WriteLine(line);
                }
                Match aMatch = findWords.Match(line);

                if (aMatch.Success)
                {
                    Console.WriteLine(line);
                }

            }

            writer.WriteLine("Count:{0}", counts);
            writer.Close();


            Console.ReadKey();
        }
    }
}