我正在构建一个自定义管道分隔文件格式的解析器,我发现我的代码非常笨重,有人可以建议更好的解析这些数据的方法吗?
文件的数据由管道(|)分隔的行分解,每行以记录类型开头,后跟一个ID,后跟不同的列数。
实施例 CDI | 11111 | OTHERDATA | somemore |其他
CEX001 | 123131 | DATA |数据
CCC | 123131 | DATA | DATA1 | DATA2 | DATA3 | DATA4 | DATA5 | DATA6
。我通过管道拆分,然后抓住前两列,然后使用开关检查第一行并调用一个函数,将剩余的解析成为该记录类型构建的对象。我真的想要一个更优雅的方法。
public Dictionary<string, DataRecord> Parse()
{
var data = new Dictionary<string, DataRecord>();
var rawDataDict = new Dictionary<string, List<List<string>>>();
foreach (var line in File.ReadLines(_path))
{
var split = line.Split('|');
var Id = split[1];
if (!rawDataDict.ContainsKey(Id))
{
rawDataDict.Add(Id, new List<List<string>> {split.ToList()});
}
else
{
rawDataDict[Id].Add(split.ToList());
}
}
rawDataDict.ToList().ForEach(pair =>
{
var key = pair.Key.ToString();
var values = pair.Value;
foreach (var value in values)
{
var recordType = value[0];
switch (recordType)
{
case "CDI":
var cdiRecord = ParseCdi(value);
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key, CdiRecords = new List<CdiRecord>() { cdiRecord }
});
}
else
{
data[key].CdiRecords.Add(cdiRecord);
}
break;
case "CEX015":
var cexRecord = ParseCex(value);
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key,
CexRecords = new List<Cex015Record>() { cexRecord }
});
}
else
{
data[key].CexRecords.Add(cexRecord);
}
break;
case "CPH":
CphRecord cphRecord = ParseCph(value);
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key,
CphRecords = new List<CphRecord>() { cphRecord }
});
}
else
{
data[key].CphRecords.Add(cphRecord);
}
break;
}
}
});
return data;
}
答案 0 :(得分:1)
试用FileHelper,这是您的确切示例 - http://www.filehelpers.net/example/QuickStart/ReadFileDelimited/
鉴于您的
数据CDI|11111|OTHERDATA|Datas
CEX001|123131|DATA
CCC|123131
您可以创建一个类来对其进行建模,以允许FileHelpers解析分隔文件:
[DelimitedRecord("|")]
public class Record
{
public string Type { get; set; }
public string[] Fields { get; set; }
}
然后我们可以允许FileHelpers解析为这个对象类型:
var engine = new FileHelperEngine<Record>();
var records = engine.ReadFile("Input.txt");
在我们将所有记录加载到Record
个对象后,我们可以使用一些linq将它们拉入到给定的类型中
var cdis = records.Where(x => x.Type == "CDI")
.Select(x => new Cdi(x.Fields[0], x.Fields[1], x.Fields[2])
.ToArray();
var cexs = records.Where(x => x.Type == "CEX001")
.Select(x => new Cex(x.Fields[0], x.Fields[1)
.ToArray();
var cccs = records.Where(x => x.Type == "CCC")
.Select(x => new Ccc(x.Fields[0])
.ToArray();
您还可以使用AutoMapper - http://automapper.org/
等内容简化上述操作 或者您可以使用ConditionalRecord
属性,只有在符合给定条件的情况下才能解析某些行。但是,如果您拥有的记录类型越多,这将会越慢,但您的代码将更清晰,FileHelpers将完成大部分繁重工作:
[DelimitedRecord("|")]
[ConditionalRecord(RecordCondition.IncludeIfMatchRegex, "^CDI")]
public class Cdi
{
public string Type { get; set; }
public int Number { get; set; }
public string Data1 { get; set; }
public string Data2 { get; set; }
public string Data3 { get; set; }
}
[DelimitedRecord("|")]
[ConditionalRecord(RecordCondition.IncludeIfMatchRegex, "^CEX001")]
public class Cex001
{
public string Type { get; set; }
public int Number { get; set; }
public string Data1 { get; set; }
}
[DelimitedRecord("|")]
[ConditionalRecord(RecordCondition.IncludeIfMatchRegex, "^CCC")]
public class Ccc
{
public string Type { get; set; }
public int Number { get; set; }
}
var input =
@"CDI|11111|Data1|Data2|Data3
CEX001|123131|Data1
CCC|123131";
var CdiEngine = new FileHelperEngine<Cdi>();
var cdis = CdiEngine.ReadString(input);
var cexEngine = new FileHelperEngine<Cex001>();
var cexs = cexEngine.ReadString(input);
var cccEngine = new FileHelperEngine<Ccc>();
var cccs = cccEngine.ReadString(input);
答案 1 :(得分:1)
除了以不同方式组织数据之外,你的第一个循环并没有做任何事情。您应该能够消除它并使用文件中的数据。这样的东西可以给你你想要的东西:
foreach (var line in File.ReadLines(_path))
{
var split = line.Split('|');
var key = split[1];
var value = split;
var recordType = value[0];
switch (recordType)
{
case "CDI":
var cdiRecord = ParseCdi(value.ToList());
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key, CdiRecords = new List<CdiRecord>() { cdiRecord }
});
}
else
{
data[key].CdiRecords.Add(cdiRecord);
}
break;
case "CEX015":
var cexRecord = ParseCex(value.ToList());
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key,
CexRecords = new List<Cex015Record>() { cexRecord }
});
}
else
{
data[key].CexRecords.Add(cexRecord);
}
break;
case "CPH":
CphRecord cphRecord = ParseCph(value.ToList());
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key,
CphRecords = new List<CphRecord>() { cphRecord }
});
}
else
{
data[key].CphRecords.Add(cphRecord);
}
break;
}
};
警告:这只是放在一起,并没有正确检查语法。