我在这里实施了Cuong的解决方案: C# Processing Fixed Width Files
这是我的代码:
var lines = File.ReadAllLines(@fileFull);
var widthList = lines.First().GroupBy(c => c)
.Select(g => g.Count())
.ToList();
var list = new List<KeyValuePair<int, int>>();
int startIndex = 0;
for (int i = 0; i < widthList.Count(); i++)
{
var pair = new KeyValuePair<int, int>(startIndex, widthList[i]);
list.Add(pair);
startIndex += widthList[i];
}
var csvLines = lines.Select(line => string.Join(",",
list.Select(pair => line.Substring(pair.Key, pair.Value))));
File.WriteAllLines(filePath + "\\" + fileName + ".csv", csvLines);
@fileFull =文件路径&amp;名称
我遇到的问题是输入文件的第一行还包含数字。所以它可能是AAAAAABBC111111111DD2EEEEEE等。由于某种原因,Cuong代码的输出给了我像1111RRRR和222223333这样的CSV标题。
有谁知道这是为什么以及如何解决它?
标题行示例:
AAAAAAAAAAAAAAAABBBBBBBBBBCCCCCCCCDEFCCCCCCCCCGGGGGGGGHHHHHHHHIJJJJJJJJKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQQQ1111RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR222222222333333333444444444555555555666666666777777777888888888999999999S00001111TTTTTTTTTTTTUVWXYZ!"£$$$$$$%&
转换标题行:
AAAAAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCDEFCCCCCC C C C GGGGGGGG HHHHHHHH I JJJJJJJJ KKKK LLLL MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPP QQQQ 1111RRRR RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR2222 222223333 333334444 444445555 555556666 666667777 777778888 888889999 99999S000 0 1111 TTTTTTTTTTTT U V W X Y Z ! ",�,$$$$$$,%,&,"
Jodrell - 我实现了您的建议,但标题输出如下:
BBBBBBBBBBCCCCCC CCCCCCCCD DEFCCCC GGGGGGGG HHHHHHH IJJJJJJ KKKKLLL LLL MMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPPQQQQ1111RRRRRRRRRRRRRRRRR QQQ 111 RRR 33333333 44444444 55555555 66666666 77777777 88888888 99999999 S0000111 111 TTT UVWXYZ!"�$$ %&
答案 0 :(得分:2)
正如Jodrell已经提到的,你的代码不起作用,因为它假设代表每个列标题的字符是不同的。更改解析标题宽度的代码将修复它。
替换:
var widthList = lines.First().GroupBy(c => c)
.Select(g => g.Count())
.ToList();
使用:
var widthList = new List<int>();
var header = lines.First().ToArray();
for (int i = 0; i < header.Length; i++)
{
if (i == 0 || header[i] != header[i-1])
widthList.Add(0);
widthList[widthList.Count-1]++;
}
Parsed标题栏:
AAAAAAAAAAAAAAAA BBBBBBBBBB CCCCCCCC D E F CCCCCCCCC GGGGGGGG HHHHHHHH I JJJJJJJJ KKKK LLLL MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPP QQQQ 1111 RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR 222222222 333333333 444444444 555555555 666666666 777777777 888888888 999999999 S 0000 1111 TTTTTTTTTTTT U V W X Y Z ! " £ $$$$$$ % &
答案 1 :(得分:1)
修改强>
因为问题让我烦恼,我写了一些处理"
和,
的代码。此代码用逗号分隔的交替零和1替换标题行。正文中的任何逗号或双引号都会被适当地转义。
static void FixedToCsv(string sourceFile)
{
if (sourceFile == null)
{
// Throw exception
}
var dir = Path.GetDirectory(sourceFile)
var destFile = string.Format(
"{0}{1}",
Path.GetFileNameWithoutExtension(sourceFile),
".csv");
if (dir != null)
{
destFile = Path.Combine(dir, destFile);
}
if (File.Exists(destFile))
{
// Throw Exception
}
var blocks = new List<KeyValuePair<int, int>>();
using (var output = File.OpenWrite(destFile))
{
using (var input = File.OpenText(sourceFile))
{
var outputLine = new StringBuilder();
// Make header
var header = input.ReadLine();
if (header == null)
{
return;
}
var even = false;
var lastc = header.First();
var counter = 0;
var blockCounter = 0;
foreach(var c in header)
{
counter++;
if (c == lastc)
{
blockCounter++;
}
else
{
blocks.Add(new KeyValuePair<int, int>(
counter - blockCounter - 1,
blockCounter));
blockCounter = 1;
outputLine.Append(',');
even = !even;
}
outputLine.Append(even ? '1' : '0');
lastc = c;
}
blocks.Add(new KeyValuePair<int, int>(
counter - blockCounter,
blockCounter));
outputLine.AppendLine();
var lineBytes = Encoding.UTF.GetBytes(outputLine.ToString());
outputLine.Clear();
output.Write(lineBytes, 0, lineBytes.Length);
// Process Body
var inputLine = input.ReadLine();
while (inputLine != null)
{
foreach(var block in block.Select(b =>
inputLine.Substring(b.Key, b.Value)))
{
var sanitisedBlock = block;
if (block.Contains(',') || block.Contains('"'))
{
santitisedBlock = string.Format(
"\"{0}\"",
block.Replace("\"", "\"\""));
}
outputLine.Append(sanitisedBlock);
outputLine.Append(',');
}
outputLine.Remove(outputLine.Length - 1, 1);
outputLine.AppendLine();
lineBytes = Encoding.UTF8.GetBytes(outputLne.ToString());
outputLine.Clear();
output.Write(lineBytes, 0, lineBytes.Length);
inputLine = input.ReadLine();
}
}
}
}
1
,因此您的两个四分之一被计算为八分之一,并且从那里一切都出错了。
(在1
之后有四个Q
块,在1
s之后有四个0
s的另一个块
实际上,您的标题行无效,或者至少不能使用建议的解决方案。
好的,你可以这样做。
public void FixedToCsv(string fullFile)
{
var lines = File.ReadAllLines(fullFile);
var firstLine = lines.First();
var widths = new List<KeyValuePair<int, int>>();
var innerCounter = 0;
var outerCounter = 0
var firstLineChars = firstLine.ToCharArray();
var lastChar = firstLineChars[0];
foreach(var c in firstLineChars)
{
if (c == lastChar)
{
innerCounter++;
}
else
{
widths.Add(new KeyValuePair<int, int>(
outerCounter
innerCounter);
innerCounter = 0;
lastChar = c;
}
outerCounter++;
}
var csvLines = lines.Select(line => string.Join(",",
widths.Select(pair => line.Substring(pair.Key, pair.Value))));
// Get filePath and fileName from fullFile here.
File.WriteAllLines(filePath + "\\" + fileName + ".csv", csvLines);
}