我需要解析文本文件中的特定行,其中以特定单词开头,如下图所示:
我需要解析以" Level"开头的行。只提取" Row"的值和" Col"只要。 请注意,文本文件将包含6组此类数据,其中每个组以
开头----------------染色体:#------------------
请参阅以下示例:Sample
我需要在每个!!的单独列表中保存每个组的行和列。 有没有办法执行此操作?
我尝试了以下方法:
public List<int[,]> getValuesFromTextFile(String filePath ) {
IEnumerable<string> allLines = Enumerable.Empty<string>();
List<int[,]> path = new List<int[,]>();
int[,] item = new int[2,1];
if (File.Exists(filePath))
{
//Read all content of the files and store it to the list split with new line
allLines = File.ReadLines(filePath);
}
//all Level lines
IEnumerable<string> levelLines = allLines.Where(d => d.StartsWith("Level", StringComparison.CurrentCultureIgnoreCase));
foreach(string line in levelLines)
{
string[] values= line.Split(':');//either space or tab or others as your file contain seperator
for(int i=1; i <values.Length;i++) {
string value = values[i];// skip index 0,it contains label, remaining are point data
if (i == 3) item[1,0] = Int32.Parse(value);
if (i == 5 && item[1,0] != null ) { item[0,0] = Int32.Parse(value);
path.Add(item);
}
}
}
return path;
}
我在第(if (i == 3) item[1,0] = Int32.Parse(value);
)行收到了以下错误:
输入字符串的格式不正确。
当我在这一行放置一个断点时,我看到了字符串的值&#34;值&#34;等于null !!
当我添加断点以查看所有行列表内容时,我得到如下图所示:
上述方法需要分别解析每组级别!!
答案 0 :(得分:2)
int groupInx = 0;
var groupLines = File.ReadLines(@"d:\temp\a.txt")
.GroupBy(x => x.Contains("Chromosome") ? ++groupInx : groupInx);
foreach(var group in groupLines)
{
var lines = group.Skip(2) //skip ----- lines
.Select(x => JObject.Parse($"{{{x}}}"))
.ToList();
//use a loop for each lines here
int level = (int)lines[0]["Level"]; //for example
int col = (int)lines[0]["Col"]; //for example
}
答案 1 :(得分:1)
您可以使用命名组Regex来解析行并构建记录列表。
void Main()
{
var file = File.ReadLines(@"C:\TreeParser.txt");
var groupRegex = new Regex(@"Chromosome : (?<Chromosome>[0-9])");
var recordRegex = new Regex(@"Level : '(?<Level>[0-9])', Row : '(?<Row>[0-9])', Col : '(?<Col>[0-9])'");
var groups = new List<Group>();
foreach (var line in file)
{
var groupMatch = groupRegex.Match(line);
if (groupMatch.Success)
{
groups.Add(new Group
{
Chromosome = int.Parse(groupMatch.Groups["Chromosome"].Value),
Records = new List<Record>()
});
}
var recordMatch = recordRegex.Match(line);
if (!recordMatch.Success)
{
// No match was found
continue;
}
var level = new Record
{
Level = int.Parse(recordMatch.Groups["Level"].Value),
Row = int.Parse(recordMatch.Groups["Row"].Value),
Col = int.Parse(recordMatch.Groups["Col"].Value)
};
groups.Last().Records.Add(level);
}
// groups now contains a list of each section from the file with a list of records
}
public class Record
{
public int Level { get; set; }
public int Row { get; set; }
public int Col { get; set; }
}
public class Group
{
public int Chromosome { get; set; }
public List<Record> Records { get; set; }
}
答案 2 :(得分:1)
这个怎么样:
List<RowAndCol> lstrc = new List<RowAndCol>();
private void Form1_Load(object sender, EventArgs e)
{
var file = File.ReadLines("E:\\SAMPLE_FILE\\sample.txt");
var getList = (from f in file.ToList() where f.Contains("Level") || f.Contains("Chromosome") select f).ToList();
int cnt = 1;
string chr = string.Empty;
foreach (var fl in getList.ToList())
{
RowAndCol rl = new RowAndCol();
if (fl.Contains("Level"))
{
String[] s = fl.Split(',');
String[] rowValue = s[1].Trim().Split(':');
String[] colValue = s[2].Trim().Split(':');
rl.Chromosome = chr;
rl.rownum = cnt;
rl.rowtext = "Row";
rl.coltext = "Col";
rl.row = rowValue[1].ToString();
rl.col = colValue[1].ToString();
cnt += 1;
}
else
{
chr = fl.ToString();
}
lstrc.Add(rl);
}
cnt = 1;
}
public class RowAndCol
{
public int rownum { get; set; }
public string Chromosome { get; set; }
public String rowtext { get; set; }
public String coltext { get; set; }
public String row { get;set; }
public String col { get; set; }
}