除了引号之间的列之外,我需要用逗号分割csv文件。但是,我在这里所看到的似乎并没有实现我所需要的,而且列中的逗号被分成单独的数组项。
public List<string> GetData(string dataFile, int row)
{
try
{
var lines = File.ReadAllLines(dataFile).Select(a => a.Split(';'));
var csv = from line in lines select (from piece in line select piece.Split(',')).ToList();
var foo = csv.ToList();
var result = foo[row][0].ToList();
return result;
}
catch
{
return null;
}
}
private const string QUOTE = "\"";
private const string ESCAPED_QUOTE = "\"\"";
private static char[] CHARACTERS_THAT_MUST_BE_QUOTED = { ',', '"', '\n' };
public static string Escape(string s)
{
if (s.Contains(QUOTE))
s = s.Replace(QUOTE, ESCAPED_QUOTE);
if (s.IndexOfAny(CHARACTERS_THAT_MUST_BE_QUOTED) > -1)
s = QUOTE + s + QUOTE;
return s;
}
在这种情况下,我不确定在哪里可以使用我的转义函数。
示例:
学位,研究生,08年12月8日,第1级,“高级,数学”
字符串Advanced,Maths被分成两个不同的数组项,我不想
答案 0 :(得分:0)
不确定这是如何实现的 - 但您可以使用Linq.Aggregate来解决这个问题:
using System;
using System.Linq;
using System.Collections.Generic;
public class Program
{
public static IEnumerable<string> SplitIt(
char[] splitters,
string text,
StringSplitOptions opt = StringSplitOptions.None)
{
bool inside = false;
var result = text.Aggregate(new List<string>(), (acc, c) =>
{
// this will check each char of your given text
// and accumulate it in the (empty starting) string list
// your splitting chars will lead to a new item put into
// the list if they are not inside. inside starst as false
// and is flipped anytime it hits a "
// at end we either return all that was parsed or only those
// that are neither null nor "" depending on given opt's
if (!acc.Any()) // nothing in yet
{
if (c != '"' && (!splitters.Contains(c) || inside))
acc.Add("" + c);
else if (c == '"')
inside = !inside;
else if (!inside && splitters.Contains(c)) // ",bla"
acc.Add(null);
return acc;
}
if (c != '"' && (!splitters.Contains(c) || inside))
acc[acc.Count - 1] = (acc[acc.Count - 1] ?? "") + c;
else if (c == '"')
inside = !inside;
else if (!inside && splitters.Contains(c)) // ",bla"
acc.Add(null);
return acc;
}
);
if (opt == StringSplitOptions.RemoveEmptyEntries)
return result.Where(r => !string.IsNullOrEmpty(r));
return result;
}
public static void Main()
{
var s = ",,Degree,Graduate,08-Dec-17,Level 1,\"Advanced, Maths\",,";
var spl = SplitIt(new[]{','}, s);
var spl2 = SplitIt(new[]{','}, s, StringSplitOptions.RemoveEmptyEntries);
Console.WriteLine(string.Join("|", spl));
Console.WriteLine(string.Join("|", spl2));
}
}
输出:
|Degree|Graduate|08-Dec-17|Level 1|Advanced, Maths||
Degree|Graduate|08-Dec-17|Level 1|Advanced, Maths
答案 1 :(得分:0)
您可以使用正则表达式,linq或仅循环遍历每个字符,并使用布尔值来确定当前的行为应该是什么。这个问题实际上让我思考,因为我之前只是循环并对每个角色采取行动。这是Linq打破整个csv文档的方法,假设可以在';'找到行尾:
private static void Main(string[] args)
{
string example = "\"Hello World, My name is Gumpy!\",20,male;My sister's name is Amy,29,female";
var result1 = example.Split(';')
.Select(s => s.Split('"')) // This will leave anything in abbreviation marks at odd numbers
.Select(sl => sl.Select((ss, index) => index % 2 == 0 ? ss.Split(',') : new string[] { ss })) // if it's an even number split by a comma
.Select(sl => sl.SelectMany(sc => sc));
Console.WriteLine("Press any key to continue.");
Console.ReadKey();
}
答案 2 :(得分:0)
该函数在字符串中获取逗号分隔的字段,不包括嵌入在引用字段中的逗号
假设
,,
方法
for loop
i
作为当前字段的占位符Linq
CSV
库注意:没有错误检查,扫描每个字符会更快,这很容易理解
代码
public List<string> GetFields(string line)
{
var list = new List<string>();
for (var i = 0; i < line.Length; i++)
{
var firstQuote = line.IndexOf('"', i);
var firstComma = line.IndexOf(',', i);
if (firstComma >= 0)
{
// first comma is before the first quote, then its just a standard field
if (firstComma < firstQuote || firstQuote == -1)
{
list.Add(line.Substring(i, firstComma - i));
i = firstComma;
continue;
}
// We have found quote so look for the next comma afterwards
var nextQuote = line.IndexOf('"', firstQuote + 1);
var nextComma = line.IndexOf(',', nextQuote + 1);
// if we found a comma, then we have found the end of this field
if (nextComma >= 0)
{
list.Add(line.Substring(i, nextComma - i));
i = nextComma;
continue;
}
}
list.Add(line.Substring(i)); // if were are here there are no more fields
break;
}
return list;
}
测试1
学位,研究生,08年12月18日,1级,&#34;高级,数学&#34;,另一个
Degree Graduate 08-Dec-17 Level 1 "Advanced, Maths" another
测试2
,学位,研究生,08年12月8日,\&#34; asdasd \&#34;,1级,\&#34;高级,数学\&#34;,另一个
<Empty Line> Degree Graduate 08-Dec-17 "asdasd" Level 1 "Advanced, Maths" another