我有几个包含大量数据的大型.csv文件。我需要从每一行中仅提取数据的特定部分,从而忽略我不感兴趣的部分,并将结果输出到新的文本文件中。
例如,以下是数据的一部分:
Fr 23:59:59 M40 N04161K RX LAG 2 JNYT 17 STORE OCC 1 PRUD 1 RAW -9 LAG 0
Fr 23:59:59 M08 N09461M %SAT 3 %CONG 0 MQ 0 EB 0 OSQ 0 NSQ 4
Fr 23:59:59 M20 N09461M SAT 3% SQ 0 FLOW 4 GN 13 STOC 9
我希望写一个看起来像这样的新文件:
5,23,59,59,2,17,1,1,-9,0
5,23,59,59,3,0,0,0,0,4
5,23,59,59,3,0,4,13,9
(您会注意到数据的开头是' 5'我也想使用而不是' Fr'代表'星期五' ;)
数据在数据集中通过' M'参考(M40,M08等),输出其数据集中的所有数据会很有用(例如,所有数据都将M40过滤成一个.txt文件,因此我的' if'语句)
我希望将每个数字用逗号分隔,但不是必需的
到目前为止,这是我的代码:
class Program
{
static void Main(string[] args)
{
String line;
try
{
//Pass the file path and file name to the StreamReader constructor
StreamReader sr = new StreamReader("C:\\MessExport_20110402_0000.csv");
StreamWriter sw = new StreamWriter("C:\\output.txt");
//Read the first line of text
line = sr.ReadLine();
//Continue to read until you reach end of file
while (line != null)
{
if (line.Contains("M40"))
{
sw.WriteLine(line);
}
if (line.Contains("M08"))
{
sw.WriteLine(line);
}
line = sr.ReadLine();
}
//close the files
sr.Close();
sw.Close();
//Console.ReadLine();
}
catch (Exception e)
{
Console.WriteLine("Exception: " + e.Message);
}
finally
{
Console.WriteLine("Executing finally block.");
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
}
}
}
然后阅读下一个.csv文件并再次将结果输出到新的.txt文件将非常有用
我非常擅长使用任何带有正则表达式和拆分的代码,所以我们非常感谢任何帮助。
答案 0 :(得分:0)
只是一个简单的实施:
string workingDirectory = @"c:\";
var days = new[] { "Su", "Mo", "Tu", "We", "Th", "Fr", "Sa" };
var writers = new Dictionary<string, StreamWriter>();
using (StreamReader sr = new StreamReader(workingDirectory + "data.csv"))
{
string line;
while ((line = sr.ReadLine()) != null)
{
var items = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
StreamWriter w;
if (!writers.TryGetValue(items[2], out w))
{
w = new StreamWriter(workingDirectory + items[2] + ".txt");
writers.Add(items[2], w);
}
var times = items[1].Split(':');
var digits = items.Skip(3)
.Select(x => { int i; return new { IsValid = int.TryParse(x, out i), Value = x }; })
.Where(x => x.IsValid).Select(x => x.Value);
var data = new[] { Array.IndexOf(days, items[0]).ToString() }.Concat(times).Concat(digits);
w.WriteLine(String.Join(",", data));
}
}
foreach (var w in writers)
{
w.Value.Close();
w.Value.Dispose();
}
答案 1 :(得分:0)
这是一个快速的刺,但我认为它将让你成为那里的一部分。
var lines = new List<string> {
"Fr 23:59:59 M40 N04161K RX LAG 2 JNYT 17 STORE OCC 1 PRUD 1 RAW -9 LAG 0",
"Fr 23:59:59 M08 N09461M %SAT 3 %CONG 0 MQ 0 EB 0 OSQ 0 NSQ 4",
"Fr 23:59:59 M20 N09461M SAT 3% SQ 0 FLOW 4 GN 13 STOC 9"
};
var options = RegexOptions.IgnorePatternWhitespace;
var regex = new Regex("(?: ^\\w\\w | -?\\b\\d+\\b )", options );
foreach (var l in lines ){
var matches = regex.Matches( l );
foreach(Match m in matches){
Console.Write( "{0},", m.Value );
}
Console.WriteLine();
}
产地:
Fr,23,59,59,2,17,1,1,-9,0,
Fr,23,59,59,3,0,0,0,0,4,
Fr,23,59,59,3,0,4,13,9,
答案 2 :(得分:0)
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Program
{
public class TransformCsv
{
[STAThread]
public static void Main(String[] args)
{
(new TransformCsv()).Run(@"c:\temp\MessExport_20110402_0000.csv", @"c:\temp\output.txt", LineFilterFunction);
}
public static Boolean LineFilterFunction(String line)
{
return line.Contains("M40") || line.Contains("M08");
}
////////////////////
private List<String> _dayOfWeek = new List<String>() { "Mo", "Tu", "We", "Th", "Fr", "Sa", "Su" };
private Dictionary<String, String> _mReference =
new Dictionary<String, String>()
{
// Add other M-reference mappings here.
{ "M40", "2" },
{ "M08", "3" },
{ "M20", "3" }
};
public void Run(String inputFilePath, String outputFilePath, Func<String, Boolean> lineFilterFunction)
{
using (var reader = new StreamReader(inputFilePath))
{
using (var writer = new StreamWriter(outputFilePath))
{
String line = null;
while ((line = reader.ReadLine()) != null)
{
if (!String.IsNullOrWhiteSpace(line) && lineFilterFunction(line))
writer.WriteLine(this.GetTransformedLine(line));
}
}
}
}
private static Char[] _spaceCharacter = " ".ToCharArray();
private String GetTransformedLine(String line)
{
var elements = line.Split(_spaceCharacter, StringSplitOptions.RemoveEmptyEntries);
var result = new List<String>();
result.Add((_dayOfWeek.IndexOf(elements[0]) + 1).ToString());
result.Add(elements[1].Replace(':', ','));
result.Add(_mReference[elements[2]]);
result.AddRange(elements.Skip(3).Where(e => this.IsInt32(e)));
return String.Join(",", result);
}
private Boolean IsInt32(String s)
{
Int32 _;
return Int32.TryParse(s, out _);
}
}
}