程序应该将csv文件作为输入和输出XML。代码如下
private static void ConvertCSVToXML()
{
string[] source = File.ReadAllLines("data.csv");
string RootNameStartTag = "<" + Path.GetFileName("\\CSVTOXML\\CSV-XML\\bin\\Debug\\data.csv") + ">";
RootNameStartTag = RootNameStartTag.Replace(".csv", "");
string RootNameEndTag = RootNameStartTag.Insert(1, "/");
StreamWriter writeFile = new StreamWriter("Output.xml");
string[] headers = source[0].Split(',');
source = source.Where(w => w != source[0]).ToArray();
string[] fields = new string[] { };
XElement xmlElement ;
for (int i = 0; i < source.Length; i++)
{
writeFile.WriteLine(RootNameStartTag);
fields = source[i].Split(',');
for (int j = 0; j < fields.Length; j++)
{
xmlElement = new XElement(new XElement(headers[j], fields[j]));
writeFile.Write(xmlElement);
writeFile.WriteLine();
}
writeFile.WriteLine(RootNameEndTag);
fields = null;
}
}
以上代码的唯一问题是它基于逗号(,)分割数据,所以如果我在csv中有一行 A,&#34; DEF,XYZ,GHI&#34;&#34; FDNFB,dfhjd&#34;
然后字段[0] =字段[1] =&#34; DEF字段[3] = XYZ字段[4] = GHI&#34; field [5] =&#34; FDNB field [6] = dfhjd&#34; 但我需要输出为字段[0] =字段[1] = DEF,XYZ,GHI字段[2] = FDNFB,dfhjd 请根据上述模式帮助我拆分
答案 0 :(得分:1)
.Net内置的TextFieldParser
处理带引号的字段。它位于Microsoft.VisualBasic.FileIO
中,但可以从c#或任何其他.Net语言中使用。以下测试代码:
public static class TextFieldParserTest
{
public static void Test()
{
var csv = @"""DEF,XYZ,GHI"",""FDNFB,dfhjd""";
using (var stream = new StringReader(csv))
using (TextFieldParser parser = new TextFieldParser(stream))
{
parser.SetDelimiters(new string[] { "," });
parser.HasFieldsEnclosedInQuotes = true; // Actually already default
while (!parser.EndOfData)
{
string[] fields = parser.ReadFields();
Debug.WriteLine(fields.Length);
foreach (var field in fields)
Debug.WriteLine(field);
}
}
}
}
提供以下输出:
2
DEF,XYZ,GHI
FDNFB,dfhjd
答案 1 :(得分:0)
请参阅以下解决方案[Convert CSV to XML when CSV contains both character and number data]
他建议使用正则表达式解析CSV行,使用 SplitCSV(line) 而不是 line.Split(“,”)
答案 2 :(得分:0)
Cinchoo ETL - 一个开源库简化了CSV到Xml文件转换的过程。
对于CSV示例:
Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC
使用下面的代码可以生成Xml
string csv = @"Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC";
StringBuilder sb = new StringBuilder();
using (var p = ChoCSVReader.LoadText(csv).WithFirstLineHeader())
{
using (var w = new ChoXmlWriter(sb)
.Configure(c => c.RootName = "Emps")
.Configure(c => c.NodeName = "Emp")
)
{
w.Write(p);
}
}
Console.WriteLine(sb.ToString());
输出Xml:
<Emps>
<Emp>
<Id>1</Id>
<Name>Tom</Name>
<City>NY</City>
</Emp>
<Emp>
<Id>2</Id>
<Name>Mark</Name>
<City>NJ</City>
</Emp>
<Emp>
<Id>3</Id>
<Name>Lou</Name>
<City>FL</City>
</Emp>
<Emp>
<Id>4</Id>
<Name>Smith</Name>
<City>PA</City>
</Emp>
<Emp>
<Id>5</Id>
<Name>Raj</Name>
<City>DC</City>
</Emp>
</Emps>
Checkout CodeProject文章提供了一些额外的帮助。
免责声明:我是这个图书馆的作者。
答案 3 :(得分:-1)
这似乎是一个不错的选择,可能会对您的问题进行排序: http://msdn.microsoft.com/en-GB/library/bb387090.aspx
// Create the text file.
string csvString = @"GREAL,Great Lakes Food Market,Howard Snyder,Marketing Manager,(503) 555-7555,2732 Baker Blvd.,Eugene,OR,97403,USA
HUNGC,Hungry Coyote Import Store,Yoshi Latimer,Sales Representative,(503) 555-6874,City Center Plaza 516 Main St.,Elgin,OR,97827,USA
LAZYK,Lazy K Kountry Store,John Steel,Marketing Manager,(509) 555-7969,12 Orchestra Terrace,Walla Walla,WA,99362,USA
LETSS,Let's Stop N Shop,Jaime Yorres,Owner,(415) 555-5938,87 Polk St. Suite 5,San Francisco,CA,94117,USA";
File.WriteAllText("cust.csv", csvString);
// Read into an array of strings.
string[] source = File.ReadAllLines("cust.csv");
XElement cust = new XElement("Root",
from str in source
let fields = str.Split(',')
select new XElement("Customer",
new XAttribute("CustomerID", fields[0]),
new XElement("CompanyName", fields[1]),
new XElement("ContactName", fields[2]),
new XElement("ContactTitle", fields[3]),
new XElement("Phone", fields[4]),
new XElement("FullAddress",
new XElement("Address", fields[5]),
new XElement("City", fields[6]),
new XElement("Region", fields[7]),
new XElement("PostalCode", fields[8]),
new XElement("Country", fields[9])
)
)
);
Console.WriteLine(cust);
此代码生成以下输出:
Xml
<Root>
<Customer CustomerID="GREAL">
<CompanyName>Great Lakes Food Market</CompanyName>
<ContactName>Howard Snyder</ContactName>
<ContactTitle>Marketing Manager</ContactTitle>
<Phone>(503) 555-7555</Phone>
<FullAddress>
<Address>2732 Baker Blvd.</Address>
<City>Eugene</City>
<Region>OR</Region>
<PostalCode>97403</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
<Customer CustomerID="HUNGC">
<CompanyName>Hungry Coyote Import Store</CompanyName>
<ContactName>Yoshi Latimer</ContactName>
<ContactTitle>Sales Representative</ContactTitle>
<Phone>(503) 555-6874</Phone>
<FullAddress>
<Address>City Center Plaza 516 Main St.</Address>
<City>Elgin</City>
<Region>OR</Region>
<PostalCode>97827</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
<Customer CustomerID="LAZYK">
<CompanyName>Lazy K Kountry Store</CompanyName>
<ContactName>John Steel</ContactName>
<ContactTitle>Marketing Manager</ContactTitle>
<Phone>(509) 555-7969</Phone>
<FullAddress>
<Address>12 Orchestra Terrace</Address>
<City>Walla Walla</City>
<Region>WA</Region>
<PostalCode>99362</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
<Customer CustomerID="LETSS">
<CompanyName>Let's Stop N Shop</CompanyName>
<ContactName>Jaime Yorres</ContactName>
<ContactTitle>Owner</ContactTitle>
<Phone>(415) 555-5938</Phone>
<FullAddress>
<Address>87 Polk St. Suite 5</Address>
<City>San Francisco</City>
<Region>CA</Region>
<PostalCode>94117</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
</Root>
修改强> 我之前没有看到第一个问题。首先对CSV进行一些预处理,然后替换列分隔符。
使用此:
var filePath = "Your csv file path here including name";
var newFilePath = filePath + ".tmp";
using (StreamReader vReader = new StreamReader(filePath))
{
using (StreamWriter vWriter = new StreamWriter(newFilePath, false, Encoding.ASCII))
{
int vLineNumber = 0;
while (!vReader.EndOfStream)
{
string vLine = vReader.ReadLine();
vWriter.WriteLine(ReplaceLine(vLine, vLineNumber++));
}
}
}
File.Delete(filePath);
File.Move(newFilePath, filePath);
Dts.TaskResult = (int)ScriptResults.Success;
}
protected string ReplaceLine(string Line, int LineNumber)
{
var newLine = Line.Replace("\",\"", "|");
newLine = newLine.Replace(",\"", "|");
newLine = newLine.Replace("\",", "|");
return newLine;
}
答案 4 :(得分:-1)
我对Excel生成的CSV文件存在完全相同的问题。事情是(并且这很好),如果字段内容包含分隔符,则在示例中引用内容(如果内容包含引号字符,则加倍)。
我也没有使用现成的解析器,但实现如下:
private string[] ParseLine(string line, char fieldSeparator, char? textSeparator)
{
List<string> items = new List<string>();
StringBuilder itemBuilder = new StringBuilder();
bool textSeparatorFound = false;
for (int i = 0; i < line.Length; i++)
{
// Get current character
char currentChar = line[i];
// In case it is a field separator...
if (currentChar == fieldSeparator)
{
// a) Did we recognize a quote before => Add the character to the item
if (textSeparatorFound)
{
itemBuilder.Append(currentChar);
}
// b) We're not within an open quote => We've finished a field
else
{
string item = itemBuilder.ToString();
itemBuilder.Remove(0, itemBuilder.Length);
// Replace doubled text separators
if (textSeparator != null)
{
string replaceWhat = String.Concat(textSeparator, textSeparator);
string replaceWith = textSeparator.ToString();
item = item.Replace(replaceWhat, replaceWith);
}
items.Add(item);
}
}
// If it is a quote character
else if (currentChar == textSeparator)
{
// a) If we have no open quotation, we open one
if (!textSeparatorFound)
{
textSeparatorFound = true;
}
// b) If we have an open quotation we have to decide whether to close it or not
else
{
// If this character is followed by the field separator or the end of the string,
// this ends a quoted block. Otherwise we just add it to the output to
// handle quoted quotes.
if (i < line.Length - 1 && line[i + 1] != fieldSeparator)
itemBuilder.Append(currentChar);
else
textSeparatorFound = false;
}
}
// All other characters are appended to the current item
else
itemBuilder.Append(currentChar);
}
// All other text is just appended
if (itemBuilder.Length > 0)
{
string item = itemBuilder.ToString();
itemBuilder.Remove(0, itemBuilder.Length);
// Remember to replace quoted quotes
if (textSeparator != null)
{
string replaceWhat = String.Concat(textSeparator, textSeparator);
string replaceWith = textSeparator.ToString();
item = item.Replace(replaceWhat, replaceWith);
}
items.Add(item.Trim());
}
return items.ToArray();
}
答案 5 :(得分:-2)
CSV的问题在于它是一种不规则的语言。这意味着字符具有不同的含义,具体取决于字符流中它们之前或之后的内容。正如您所看到的那样,使用string进行拆分.Split方法无法正确识别使用引号转义的字段中的逗号。
虽然可以使用正则表达式对CSV行进行粗略分析,然后回顾并展望技术,但这些技术通常都是错误的和缓慢的。这是因为正则表达式是为常规语言设计的。更好的方法是使用像这样的简单函数简单地解析字符:
using System;
using System.Collections.Generic;
using System.Text;
class Program
{
static void Main(string[] args)
{
IList<string> fields = ParseCSVLine("text,\"text with quote(\"\") and comma (,)\",text");
foreach (string field in fields)
{
Console.WriteLine(field);
}
}
public static IList<string> ParseCSVLine(string csvLine)
{
List<string> result = new List<string>();
StringBuilder buffer = new StringBuilder(csvLine.Length);
bool inQuotes = false;
char lastChar = '\0';
foreach (char c in csvLine)
{
switch (c)
{
case '"':
if (inQuotes)
{
inQuotes = false;
}
else
{
// This next if handles the case where
// we have a doubled up quote
if (lastChar == '"')
{
buffer.Append('"');
}
inQuotes = true;
}
break;
case ',':
if (inQuotes)
{
buffer.Append(',');
}
else
{
result.Add(buffer.ToString());
buffer.Clear();
}
break;
default:
buffer.Append(c);
break;
}
lastChar = c;
}
result.Add(buffer.ToString());
return result;
}
}
以上输出:
text
text with quote(") and comma (,)
text