我正在尝试根据最后一个属性将csv文件中的条目分组到Lua中的不同表中。但由于某些原因,字符串比较并不起作用。以下是代码:
trainer = {}
tester = {}
valer = {}
for line in io.lines(mlist) do
local split = {}
for token in string.gmatch(line, '([^,]+)') do
table.insert(split, token)
if token == 'val' then
print('heelo')
end
end
if split[5] == "val" then
print('hello world')
end
if split[5] == 'test' then
table.insert(tester, split[1])
elseif split[5] == 'train' then
table.insert(trainer, split[1])
elseif (split[5] == 'val') then
print('hello')
table.insert(valer, split[1])
end
end
文件mlist包含以下条目(它实际上有很多记录,但我正在进行一些测试):
050810,02747177,02747177,a2e9d8324c1f62cd5ecea499ceee624d,train
051087,02747177,02747177,89aff0d006fc22ff9405d3391cbdb79b,train
051145,02747177,02747177,c6ef918f335bb21c5896bab37ddebe7,train
051416,02747177,02747177,ec1c1aa7003cf68d49e6f7df978f3373,train
051506,02747177,02747177,a8b39c32604173c1d2b12aa6a0f050b3,train
051654,02747177,02747177,e3484284e1f301077d9a3c398c7b4709,train
001174,02747177,02747177,bf4dd114b53bd8f0d67352166d8df9fd,val
003009,02747177,02747177,2f1aed8925147120c62ac18334863d36,val
003735,02747177,02747177,c3b31dc8c48265ecfffc97a61124b1a9,val
004213,02747177,02747177,3a982b20a1c8ebf487b2ae2815c9,val
005241,02747177,02747177,91a4d060d380409c2056b4bd5d870b47,val
008467,02747177,02747177,4d6b6d392ec6b5b4c69692992c7aeb,val
008652,02747177,02747177,3be3e86b2fad511048d5a1386787189,val
008659,02747177,02747177,1e1015a06e43c0a5a44b6af22454453b,val
010510,02747177,02747177,62f4ed6e1df63042cecaed25e0da0964,val
013730,02747177,02747177,7069760a16db98f46c9e5b09a1c294d9,val
016657,02747177,02747177,ae3257e7e0dca9a4fc8569054682bff9,val
018178,02747177,02747177,86194a4645da1f19e14ca01ae177e9d,val
019142,02747177,02747177,e5b7b5112e396a49b581cc9c4410f841,val
021424,02747177,02747177,4088f2056763a95752e986a5f722a6f,val
022098,02747177,02747177,b77e94ab409def2b72745b90f9692729,val
022185,02747177,02747177,b51812771e42354f9996a93ae0c9395c,val
027358,02747177,02747177,5092afb4be0a2f89950ab3eaa7fe7772,val
028916,02747177,02747177,63adebf24a1de9ecf91cc5a18046145f,val
031579,02747177,02747177,1c3cf618a6790f1021c6005997c63924,val
032507,02747177,02747177,dc7ce614dc719649b394cfa64dfabe8e,val
034010,02747177,02747177,6a1b359efc20cd1aaec6ee5ba573fa6d,val
035290,02747177,02747177,c904d927672acd48ad8a0ee9b106700e,val
036604,02747177,02747177,f116f3c432856fccddd8899c86e55073,val
039697,02747177,02747177,c21d8f5641be31a285cac25b72167160,val
040234,02747177,02747177,330cdd681a0890b190fdbeabcd02777c,val
041653,02747177,02747177,b838c5bc5241a44bf2f2371022475a36,val
044825,02747177,02747177,af6fa396b2869446d4d8765e3910f617,val
046115,02747177,02747177,1cb574d3f22f63ebd493bfe20f94b6ab,val
046248,02747177,02747177,942887c8d668501681faebbdea6bd9be,val
046853,02747177,02747177,92fa62263ad30506d2b12aa6a0f050b3,val
046957,02747177,02747177,c77e8499be0ce1e04f3443b22038d340,val
048064,02747177,02747177,c18e807ff9858a51fb016d9401ff3e29,val
048971,02747177,02747177,18dbebd6527cffa254685f5f473de41f,val
051563,02747177,02747177,7c90fba6cd7f73871c1ef519b9196b63,val
我检查了split [5]的类型,它确实是字符串。我甚至试过了 打印(拆分[5] ==' val') 打印错误。
感谢您的帮助!
答案 0 :(得分:0)
<强> UPDATE /溶液强>
感谢Egor Skriptunoff在评论中的解决方案,这已经解决了。问题出在Linux上,CR和LF的解释方式不同,我确实使用的是Linux上的Windows文件。所以只是忽略每一行中的最后一个字符来解决它。根据Egor的解决方案,修改内在的解决方案就是分裂:
for token in line:gsub('\r', ''):gmatch('[^,]+') do
....
end
再次感谢Egor的解决方案和解释!
答案 1 :(得分:0)
根据最后一个键,有一种更简单(更通用)的方法将你的行分成多个桶,而不使用一系列if
块来捕捉每一个可能的&#39;关键&#39 ;.像这样的东西(在这个例子中使用输入重定向来加载文件):
local groups = {}
for line in io.lines() do
v,k = line:match '^(%d+),%d+,%d+,%x+,(%w+)$'
if v ~= nil and k ~= nil then -- or, just if k
local t = groups[k]
if t == nil then t = { v } else t[#t+1] = v end
groups[k] = t
end
end
-- And, to see table contents for verification...
for k,v in pairs(groups) do
print(k)
for i,v in ipairs(v) do print('',i,v) end
end