我有一个相当大的文件(95K行),我需要解析。对于以下样本数据......
<FIPS>10440<STATE>AL<WFO>BMX
8 32.319 32.316 -86.484 -86.487 32.316 -86.484
32.316 -86.484
102 32.501 31.965 -85.919 -86.497 32.496 -86.248
32.448 -86.181 32.432 -86.189 32.433 -86.125 32.417 -86.116
32.406 -86.049 32.419 -86.023 32.337 -85.991 32.333 -85.969
32.276 -85.919 32.271 -85.986 32.250 -85.999 31.968 -85.995
31.965 -86.302 32.052 -86.307 32.051 -86.406 32.245 -86.410
32.276 -86.484 32.302 -86.491 32.332 -86.475 32.344 -86.497
32.364 -86.492 32.378 -86.463 32.405 -86.460 32.414 -86.396
32.427 -86.398 32.433 -86.350 32.412 -86.310 32.441 -86.325
32.487 -86.314 32.473 -86.288 32.488 -86.260 32.501 -86.263
32.496 -86.248
我需要做的是从一个FIPS读到下一个FIPS,并将每个组内的线组合成一个巨大的线,如下所示......
<FIPS>10440<STATE>AL<WFO>BMX 8 32.319 32.316 -86.484 -86.487 32.316 -86.484 32.316 -86.484...
<FIPS>10440<STATE>AL<WFO>BMX 102 32.501 31.965 -85.919 -86.497 32.496 -86.248 32.448 -86.181...
我目前有以下代码(关于我当天的第6个变体)。我错过了什么?
using (var reader = new StreamReader(winterBoundsPath))
{
while (!reader.EndOfStream)
{
var line = reader.ReadLine().Trim();
if (!Char.IsLetter(line[0]))
{
if (line.Contains("<FIPS>"))
{
var lineReplace = line.Replace('<', ' ').Replace('>', ' ');
string[] rawData = lineReplace.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
temp = new WinterJsonModel
{
FIPS = rawData[1],
State = rawData[3],
Center = rawData[5],
polyCoords = new List<polyCoordsJsonData>()
};
}
else
{
string[] rawData2 = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
if (rawData2.Count() > 1)
{
allValues.Add(listPointValue);
listPointValue = new List<string>();
}
// Add values to line
foreach (string value in rawData2)
{
listPointValue.Add(value);
}
}
}
}
reader.Close();
}
答案 0 :(得分:2)
根据您提供的样本判断,换行符是CRLF字符。这意味着你真的只需要知道两件事。
1.如果该行包含“FIPS”作为标记括起来的string
字面值
2.如果你已到达有回车的行的末尾。
我现在要忽略JSON位,因为它不是你问题的一部分。我假设这意味着你已经很好地处理了JSON,如果我们得到这些strings
你想要它们,你就可以从那里得到它。
var x = new List<string>();
while (!reader.EndOfStream)
{
var line = reader.ReadLine().Trim();
if (line.Contains("<FIPS>"))
{
x.Add(line.Replace(Environment.NewLine, " "));
}
else
{
var s = String.Concat(x.Last(), line.Replace(Environment.NewLine, string.Empty), " ");
x[x.Count - 1] = s;
}
}
这里的重点是将数据组织与实际放入对象中分开。在这里,您可以在foreach
中遍历列表,根据string.Split()
string
的{{1}}结果创建新对象
答案 1 :(得分:0)
我已经解析了40多年的文本文件了。以下代码是我已完成的示例
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace Oppgave3Lesson1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
static void Main(string[] args)
{
WinterJsonModel data = new WinterJsonModel();
data.ParseFile(FILENAME);
}
}
public class WinterJsonModel
{
public static List<WinterJsonModel> samplData = new List<WinterJsonModel>();
public string fips { get; set; }
public string state { get; set; }
public string wfo { get; set; }
public List<Group> groups = new List<Group>();
public void ParseFile(string winterBoundsPath)
{
WinterJsonModel winterJsonModel = null;
Group group = null;
List<KeyValuePair<decimal, decimal>> values = null;
using (var reader = new StreamReader(winterBoundsPath))
{
while (!reader.EndOfStream)
{
var line = reader.ReadLine().Trim();
if (line.Length > 0)
{
if (line.StartsWith("<FIPS>"))
{
winterJsonModel = new WinterJsonModel();
WinterJsonModel.samplData.Add(winterJsonModel);
string[] rawData = line.Split(new char[] { '<', '>' }, StringSplitOptions.RemoveEmptyEntries);
winterJsonModel.fips = rawData[1];
winterJsonModel.state = rawData[3];
winterJsonModel.wfo = rawData[5];
group = null; // very inportant line
}
else
{
decimal[] rawData = line.Split(new char[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries).Select(x => decimal.Parse(x)).ToArray();
//if odd number of numbers in a line
if (rawData.Count() % 2 == 1)
{
group = new Group();
winterJsonModel.groups.Add(group);
group.id = (int)rawData[0];
//remove group number from raw data
rawData = rawData.Skip(1).ToArray();
}
for (int i = 0; i < rawData.Count(); i += 2)
{
group.values.Add(new KeyValuePair<decimal, decimal>(rawData[i], rawData[i + 1]));
}
}
}
}
}
}
}
public class Group
{
public int id { get; set; }
public List<KeyValuePair<decimal, decimal>> values = new List<KeyValuePair<decimal, decimal>>();
}
}