好,所以我要完成的工作对于单行csv文档来说是完美的。我正在使用一个配置文件来保存值,以格式化和清理csv文档以进行处理。如果文档中有任何报价,或者货币字段中有空格,则会导致批量处理/上载到我们的数据库失败。但是,当我尝试只处理一行csv文件的过程时,它就可以工作。下面是配置文件和代码的示例。
<dip>
<versions>
<version number="4">
<location path="C:\OutputDestination" />
<numberOfFields>26</numberOfFields>
<orderedfields>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>currency</type>
<type>string</type>
<type>currency</type>
<type>string</type>
<type>currency</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
<type>string</type>
</orderedfields>
</version>
</versions>
</dip>
从此配置文件中,我创建了一个类,该类允许我使用代码中的每个元素:
{
[XmlRoot(ElementName = "location")]
public class Location
{
[XmlAttribute(AttributeName = "path")]
public string Path { get; set; }
}
[XmlRoot(ElementName = "orderedfields")]
public class Orderedfields
{
[XmlElement(ElementName = "type")]
public List<string> Type { get; set; }
}
[XmlRoot(ElementName = "version")]
public class Version
{
[XmlElement(ElementName = "location")]
public Location Location { get; set; }
[XmlElement(ElementName = "numberOfFields")]
public string NumberOfFields { get; set; }
[XmlElement(ElementName = "orderedfields")]
public Orderedfields Orderedfields { get; set; }
[XmlAttribute(AttributeName = "number")]
public string Number { get; set; }
}
[XmlRoot(ElementName = "versions")]
public class Versions
{
[XmlElement(ElementName = "version")]
public List<Version> Version { get; set; }
}
[XmlRoot(ElementName = "dip")]
public class Dip
{
[XmlElement(ElementName = "versions")]
public Versions Versions { get; set; }
}
}
在我的代码中,我告诉我检查字段类型的配置,因为我们不在csv文件中使用标头,因为它会破坏上传过程。因此,为什么我必须使用配置文件来告诉程序哪一列是字符串,货币等。我试图找出是否有可能一次将文档中的单行读入一个数组,拆分将该行按列分成另一个数组,对其进行格式化,然后将其调整为整个文档中每次行/列的迭代。希望有人可以提供解决方案。
static string FormatLine(string[] lineValues, int columns)
{
string line = "";
for (int i = 0; i < columns; i++)
{
line += String.Format("{0}\t", lineValues[i]);
//lineValues[i].Remove(line.Length - 1); //removes trailing tab if necessary
}
return line;
}
static void Main(string[] args)
{
Dip dipConfig = null;
var xmlString = File.ReadAllText(@"DipConfig.xml");
int count, docLength = 0;
string[] lineValues;
string path = @"C:\DIPV4\apptest.csv";
string[] lines = File.ReadAllLines(path);
//string allowedChars = @"^\d+$"; //regex experiment for currency
docLength = lines.Length;
var colCount = lines[0].Split('\t').Count();
using (var stream = new StringReader(xmlString))
{
var serializer = new XmlSerializer(typeof(Dip));
dipConfig = (Dip)serializer.Deserialize(stream);
}
//getting the # of fields in version
var node = dipConfig.Versions.Version.FirstOrDefault(q => q.NumberOfFields == colCount.ToString());
string[] newLines = new string[docLength];
for (int i = 0; i < docLength; i++)
{
lineValues = lines;
count = 0;
var currValue = "";
//check each column of each line to see which column is currency and remove white space
if (node != null)
{
foreach (var fieldType in node.Orderedfields.Type)
{
var columnCount = lineValues[0].Split('\t').Count();
currValue = lineValues[count];
if (!String.IsNullOrEmpty(currValue))
{
if ("currency".Equals(fieldType.ToLower()))
{
lineValues[columnCount] = Regex.Replace(currValue, " ", String.Empty);
}
}
count++;
}
//rebuilds the array into a file
newLines[i] = FormatLine(lineValues, colCount);
}
}
for (int i = 0; i < docLength; i++) //change to docLength
{
//remove all quotes from document ***works***
lines[i] = Regex.Replace(lines[i], "\"", String.Empty);
}
File.WriteAllLines(@"C:\Test Destination\test.csv", lines);
Console.ReadLine();
}