有一个txt文件,其中18列由''
分隔,并以,
分隔,其中每一行代表sqlite query
的插入语句:
(1999,1999,1999,1999,1999,0,0,'flaggr.png',261, 'Βάκχειος', 'Spl-up','B ', 'Pagrati/Athens,Attica,Greece', 'N/A', 'Hellenic Mythology', '','', ''),
(2000,2000,2000,2000,2000,0,2010,'flagru.png',3340, 'Анклав Снов', 'Act', 'G/D ', 'Bryansk,Russia', '2008-2009(as Vampire''s Crypt),2010-present', 'N/A', '','', ''),
(2001,2001,2001,2001,2001,0,2002,'flagru.png',271, 'Аркона', 'Act','P/FO ', 'Moscow,Russia', '2002(as Гиперборея),2002-present', 'Slavic Pism and FOtales, Legends, Mythology', '', '', ''),
(2002,2002,2002,2002,2002,0,1988,'flagru.png',470, 'Аспид', 'Spl-up','PROG ', 'Volgodonsk,Rostovregion,Russia', '1988-1997,2010-?', 'Politics, Horror, Death', '', '', ''),
(2003,2003,2003,2003,2003,0,2000,'flagua.png',359, 'Ірій', 'Unknown','FO D /G ', 'Lviv,Ukraine', '2000-?', 'Slavic mythology, Ukrainian FOlore', '', '', ''),
(2004,2004,2004,2004,2004,0,2011,'flagru.png',3036579, 'Лесьяр', 'Act','P FO ', 'Moscow,Russia', '2011-present', 'Pism, FOlore, Social matters, Feelings', '', '', ''),
(2005,2005,2005,2005,2005,0,2003,'flagru.png',218, 'М8Л8ТХ', 'Act','B with RAC', 'Tver,Ukraine(posterior),Russia', '2003-present', 'National Pride, National Socialism, Hatred, War, Intolerance, Pism', '', '', ''),
(2006,2006,2006,2006,2006,0,0,'flagru.png',354037, 'Рельос', 'Act','PR/POST-/ (early), G/POST-, Ambient (later)', 'Baltiisk,Kaliningradregion,Russia', 'N/A', 'N/A', '', '',''),
(2007,2007,2007,2007,2007,0,2006,'flagru.png',32937, 'Сивый Яр', 'Act','P/POST-B ', 'Vyritsa,Leningradregion,Russia', '2006-present', 'Pism, Pride, Heritage, Poetry, Slavonic Mythology', '', '', ''),
(2008,2008,2008,2008,2008,0,2001,'flagru.png',44, 'Темнозорь', 'Act','FO/B ', 'Moscow,Russia', '2001-present', 'Nature, Slavonic Pism, War, Right-wing nationalism', '4394', '', ''),
(2009,2009,2009,2009,2009,0,1993,'flagru.png',80, 'Эпидемия', 'Act','Pow ', 'Moscow,Russia', '1993-present', 'Fantasy, Tolkien, Elves', '', '', ''),
(2010,2010,2010,2010,2010,0,0,'flagjp.png',354039, 'こくまろみるく', 'Act','G/Pow ', 'N/A,Japan', 'N/A', 'Bizarre, Macabre', '', '', ''),
(2011,2011,2011,2011,2011,0,2012,'flagus.png',38723, 'מזמור', 'Act','B/Drone/D ', 'Portland,Oregon,United States', '2012-present', 'N/A', '', '', ''),
(2012,2012,2012,2012,2012,0,2004,'flaglb.png',67, 'دمار', 'Spl-up','B/Death ', 'Hamra,Beirut,Lebanon', '2004-2006', 'War, Pride, Blasphemy, Supremacy', '', '', ''),
(2013,2013,2013,2013,2013,0,2006,'flagcn.png',760, '原罪', 'Act','B (early), G/B (later)', 'Chengdu,SichuanProvince,China', '2006-present', 'Misanthropy, Hatred, Depression, War, Revelation', '', '', ''),
(2014,2014,2014,2014,2014,0,1995,'flagtw.png',443, '閃靈', 'Act','Melodic B/Death/FO ', 'Taipei,Taiwan', '1995-present', 'Taiwanese Myths and Legends, Anti-Fascism, History', '4443', '', ''),
(2015,2015,2015,2015,2015,0,2001,'flagjp.png',31450, '電気式華憐音楽集団', 'Act','Pow/G', 'N/A,Japan', '2001-present', 'Anime, Fantasy, Liberty', '', '', '');
对齐所有列的最佳方法是什么,例如前两行变为:
(1999,1999,1999,1999,1999,0,0, 'flaggr.png',261, 'Βάκχειος', 'Spl-up', 'B ', 'Pagrati/Athens,Attica,Greece', 'N/A', 'Hellenic Mythology', '','', ''),
(2000,2000,2000,2000,2000,0,2010,'flagru.png',3340, 'Анклав Снов', 'Act', 'G/D ', 'Bryansk,Russia', '2008-2009(as Vampire''s Crypt),2010-present', 'N/A', '','', ''),
我在想:
我提供的代码如下所示,但是我意识到一个问题,有一些列在单引号中有逗号,如'bla1,bla2,bla3'
(columns 12 to 18 could have inner commas...)
所以如果我用逗号分割字符串,我就不会得到18列。
在那个问题之后我不知道如何继续...... 用逗号分割的方法是什么,但考虑一些字符串的单引号?
private static void AdjustColumnsInFile(string filePath, string outputFile)
{
//array to store max size of each column
int[] sizes = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
foreach (var line in File.ReadLines(filePath))
{
var words = line.Split(',');
if (words.Length == 18)
{
var i = 0;
//get max value of each column
foreach (var word in words)
{
sizes[i] = sizes[i] < word.Length ? word.Length : sizes[i];
i++;
}
}
}
...
using (var sw = new StreamWriter(outputFile))
{
foreach (var l in newLines)
{
sw.WriteLine($"{l}");
}
}
}
答案 0 :(得分:2)
据我所知,你唯一的问题是如何在逗号上拆分字符串,因为有些逗号可能出现在''
引号内。你可以用正则表达式来做到这一点:
,(?=(?:[^\']*\'[^\']*\')*[^\']*$)
它基本上匹配逗号,后跟零或偶数引号('
)。如果逗号出现在''
引号内 - 在有效字符串中,它后面会出现奇数引号,因此不匹配。
其余应该很容易,首先计算尺寸:
//array to store max size of each column
int[] sizes = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
foreach (var line in File.ReadLines(filePath)) {
var tmp = line.Trim(); // remove leading and trailing whitespace
tmp = tmp.Remove(tmp.Length - 2, 2); // remove closing ) and , or ;
tmp = tmp.Remove(0, 1); // remove opening (
// split by comma
var words = Regex.Split(tmp, @",(?=(?:[^\']*\'[^\']*\')*[^\']*$)");
if (words.Length == 18) {
for (int i = 0; i < words.Length; i++) {
var word = words[i].Trim(); // remove whitespace
sizes[i] = sizes[i] < word.Length ? word.Length : sizes[i];
}
}
else throw new Exception("Invalid number of columns");
}
然后重复并将空格附加到与预期大小不匹配的列:
using (var writer = new StreamWriter(outputFile)) {
foreach (var line in File.ReadLines(filePath)) {
var tmp = line.Trim(); // remove trailing whitespace
bool hadTrailingComma = tmp.EndsWith(",");
tmp = tmp.Remove(tmp.Length - 2, 2); // remove closing ) and , or ;
tmp = tmp.Remove(0, 1); // remove opening (
var words = Regex.Split(tmp, @",(?=(?:[^\']*\'[^\']*\')*[^\']*$)");
var newLine = String.Join(",", words.Select((w, i) =>
{
w = w.Trim();
var targetSize = sizes[i];
if (w.Length < targetSize)
return w + new string(' ', targetSize - w.Length); // append spaces until max length
return w;
}));
writer.WriteLine($"({newLine}){(hadTrailingComma ? "," : ";")}");
}
}
请注意,由于こくまろみるく
之类的unicode字符,您的输出文件可能看起来未正确对齐,而实际上它是(即每列的字符大小相同)。