我想解析一个内容类似于下面的文本文件:
START-OF-DATA
#100846105
START SECURITY|US912810DZ85|CBBT|
## in: 20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]
## out:20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]
04/30|15:00:00|B|118.640625||| |A|118.703125||| ||
04/30|14:59:54|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:52|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:23|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:20|B|118.6328125||| |A|118.6953125||| ||
END SECURITY|US912810DZ85|0|
#100846111
START SECURITY|US912810EA26|CBBT|
## in: 20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]
## out:20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]
04/30|15:00:00|B|124.75||| |A|124.828125||| ||
04/30|14:59:55|B|124.75||| |A|124.8203125||| ||
04/30|14:59:53|B|124.7421875||| |A|124.8203125||| ||
04/30|14:59:45|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:43|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:27|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:24|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:22|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:20|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:13|B|124.7421875||| |A|124.8125||| ||
END SECURITY|US912810EA26|0|
END-OF-DATA
使用以下代码
string pattern = @"^(START-OF-DATA\r\n)(?<InstrumentsSection>[^\\]*?)(?:(^END-OF-DATA))";
var expressionMatchColl = regex.Matches(File.ReadAllText(filePath));
foreach (Match match in expressionMatchColl)
{
string[] instrumentRows = match.Groups["InstrumentsSection"].Value.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
instruments = instrumentRows.ToList();
}
我能够在START-OF-DATA和END-OF-DATA部分中检索每一行。但是,想要忽略以START SECURITY开头的行,## 和终止安全。此外,还希望将分组值和标识符(例如100846105,100846111)分组在不同的组中。
有人可以建议吗?
答案 0 :(得分:1)
您可以逐行阅读文件,并过滤掉您不想要的行。此外,可以将刻度值和ID收集到字符串列表中。
示例代码:
var res = string.Empty;
var ids = new List<string>();
using (var sr = new StreamReader(filepath, true))
{
var s = "";
while ((s = sr.ReadLine()) != null)
{
if (s.StartsWith("START-OF-DATA"))
{
while (!s.StartsWith("END-OF-DATA"))
{
if ( !s.StartsWith("START SECURITY") &&
!s.StartsWith("##") &&
!s.StartsWith("END SECURITY"))
{
res += s + System.Environment.NewLine;
}
if (s.StartsWith("#") && !s.StartsWith("##"))
ids.Add(s);
s = sr.ReadLine();
}
res += s;
}
}
}
输出:
START-OF-DATA
#100846105
04/30|15:00:00|B|118.640625||| |A|118.703125||| ||
04/30|14:59:54|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:52|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:23|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:20|B|118.6328125||| |A|118.6953125||| ||
#100846111
04/30|15:00:00|B|124.75||| |A|124.828125||| ||
04/30|14:59:55|B|124.75||| |A|124.8203125||| ||
04/30|14:59:53|B|124.7421875||| |A|124.8203125||| ||
04/30|14:59:45|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:43|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:27|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:24|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:22|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:20|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:13|B|124.7421875||| |A|124.8125||| ||
END-OF-DATA
然后,如果您要读取多个块,只需创建一个字符串列表以存储res
,然后在res += s;
之后添加。
答案 1 :(得分:1)
这是一个简单的解析器
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
List<Section> sections = new List<Section>();
string input =
"START-OF-DATA\n" +
"#100846105\n" +
"START SECURITY|US912810DZ85|CBBT|\n" +
"## in: 20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"## out:20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"04/30|15:00:00|B|118.640625||| |A|118.703125||| ||\n" +
"04/30|14:59:54|B|118.6328125||| |A|118.6953125||| ||\n" +
"04/30|14:59:52|B|118.6328125||| |A|118.6953125||| ||\n" +
"04/30|14:59:23|B|118.6328125||| |A|118.6953125||| ||\n" +
"04/30|14:59:20|B|118.6328125||| |A|118.6953125||| ||\n" +
"END SECURITY|US912810DZ85|0|\n" +
"#100846111\n" +
"START SECURITY|US912810EA26|CBBT|\n" +
"## in: 20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"## out:20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"04/30|15:00:00|B|124.75||| |A|124.828125||| ||\n" +
"04/30|14:59:55|B|124.75||| |A|124.8203125||| ||\n" +
"04/30|14:59:53|B|124.7421875||| |A|124.8203125||| ||\n" +
"04/30|14:59:45|B|124.7421875||| |A|124.8125||| ||\n" +
"04/30|14:59:43|B|124.7421875||| |A|124.828125||| ||\n" +
"04/30|14:59:27|B|124.7421875||| |A|124.8125||| ||\n" +
"04/30|14:59:24|B|124.7421875||| |A|124.828125||| ||\n" +
"04/30|14:59:22|B|124.7421875||| |A|124.8125||| ||\n" +
"04/30|14:59:20|B|124.7421875||| |A|124.828125||| ||\n" +
"04/30|14:59:13|B|124.7421875||| |A|124.8125||| ||\n" +
"END SECURITY|US912810EA26|0|\n" +
"END-OF-DATA\n";
StringReader reader = new StringReader(input);
string inputLine = "";
Section newSection = null;
while ((inputLine = reader.ReadLine()) != null)
{
inputLine = inputLine.Trim();
if (inputLine.StartsWith("#"))
{
if (inputLine.Contains("in:")) continue;
if (inputLine.Contains("out:")) continue;
newSection = new Section();
sections.Add(newSection);
newSection.iD = inputLine.Substring(1);
newSection.data = new List<string>();
}
else
{
if (inputLine.Substring(0, 3) == "END") continue;
if (inputLine.Substring(0, 5) == "START") continue;
newSection.data.Add(inputLine);
}
}
}
public class Section
{
public string iD { get; set; }
public List<string> data { get; set; }
}
}
}