我正在为Lua运行Windows CE 6/7的移动设备编写一个grep实用程序,但我遇到了一些实现不区分大小写的匹配模式的问题。将所有内容转换为大写(或更低)的明显解决方案由于字符类而无法正常工作。
我唯一能想到的是将模式本身的文字转换为大写。
这是我到目前为止所拥有的:
function toUpperPattern(instr)
-- Check first character
if string.find(instr, "^%l") then
instr = string.upper(string.sub(instr, 1, 1)) .. string.sub(instr, 2)
end
-- Check the rest of the pattern
while 1 do
local a, b, str = string.find(instr, "[^%%](%l+)")
if not a then break end
if str then
instr = string.sub(instr, 1, a) .. string.upper(string.sub(instr, a+1, b)) .. string.sub(instr, b + 1)
end
end
return instr
end
我不想承认到目前为止需要多长时间,我仍然可以立即看到像逃脱百分号'%%'这样的问题
我认为这一定是一个相当普遍的问题,但我似乎无法在这个主题上找到太多。 有没有更简单(或至少完整)的方法来做到这一点?我在这里开始变得疯狂...... 希望你那里的Lua大师可以启发我!
答案 0 :(得分:9)
尝试这样的事情:
function case_insensitive_pattern(pattern)
-- find an optional '%' (group 1) followed by any character (group 2)
local p = pattern:gsub("(%%?)(.)", function(percent, letter)
if percent ~= "" or not letter:match("%a") then
-- if the '%' matched, or `letter` is not a letter, return "as is"
return percent .. letter
else
-- else, return a case-insensitive character class of the matched letter
return string.format("[%s%s]", letter:lower(), letter:upper())
end
end)
return p
end
print(case_insensitive_pattern("xyz = %d+ or %% end"))
打印:
[xX][yY][zZ] = %d+ [oO][rR] %% [eE][nN][dD]
答案 1 :(得分:2)
Lua 5.1,LPeg v0.12
do
local p = re.compile([[
pattern <- ( {b} / {escaped} / brackets / other)+
b <- "%b" . .
escaped <- "%" .
brackets <- { "[" ([^]%]+ / escaped)* "]" }
other <- [^[%]+ -> cases
]], {
cases = function(str) return (str:gsub('%a',function(a) return '['..a:lower()..a:upper()..']' end)) end
})
local pb = re.compile([[
pattern <- ( {b} / {escaped} / brackets / other)+
b <- "%b" . .
escaped <- "%" .
brackets <- {: {"["} ({escaped} / bcases)* {"]"} :}
bcases <- [^]%]+ -> bcases
other <- [^[%]+ -> cases
]], {
cases = function(str) return (str:gsub('%a',function(a) return '['..a:lower()..a:upper()..']' end)) end
, bcases = function(str) return (str:gsub('%a',function(a) return a:lower()..a:upper() end)) end
})
function iPattern(pattern,brackets)
('sanity check'):find(pattern)
return table.concat({re.match(pattern, brackets and pb or p)})
end
end
local test = '[ab%c%]d%%]+ o%%r %bnm'
print(iPattern(test)) -- [ab%c%]d%%]+ [oO]%%[rR] %bnm
print(iPattern(test,true)) -- [aAbB%c%]dD%%]+ [oO]%%[rR] %bnm
print(('qwe [%D]% O%r n---m asd'):match(iPattern(test, true))) -- %D]% O%r n---m
Pure Lua版本:
有必要分析字符串中的所有字符以将其转换为正确的模式,因为Lua模式没有像regexps(abc | something)那样的替换。
function iPattern(pattern, brackets)
('sanity check'):find(pattern)
local tmp = {}
local i=1
while i <= #pattern do -- 'for' don't let change counter
local char = pattern:sub(i,i) -- current char
if char == '%' then
tmp[#tmp+1] = char -- add to tmp table
i=i+1 -- next char position
char = pattern:sub(i,i)
tmp[#tmp+1] = char
if char == 'b' then -- '%bxy' - add next 2 chars
tmp[#tmp+1] = pattern:sub(i+1,i+2)
i=i+2
end
elseif char=='[' then -- brackets
tmp[#tmp+1] = char
i = i+1
while i <= #pattern do
char = pattern:sub(i,i)
if char == '%' then -- no '%bxy' inside brackets
tmp[#tmp+1] = char
tmp[#tmp+1] = pattern:sub(i+1,i+1)
i = i+1
elseif char:match("%a") then -- letter
tmp[#tmp+1] = not brackets and char or char:lower()..char:upper()
else -- something else
tmp[#tmp+1] = char
end
if char==']' then break end -- close bracket
i = i+1
end
elseif char:match("%a") then -- letter
tmp[#tmp+1] = '['..char:lower()..char:upper()..']'
else
tmp[#tmp+1] = char -- something else
end
i=i+1
end
return table.concat(tmp)
end
local test = '[ab%c%]d%%]+ o%%r %bnm'
print(iPattern(test)) -- [ab%c%]d%%]+ [oO]%%[rR] %bnm
print(iPattern(test,true)) -- [aAbB%c%]dD%%]+ [oO]%%[rR] %bnm
print(('qwe [%D]% O%r n---m asd'):match(iPattern(test, true))) -- %D]% O%r n---m