我想用pyparsing
解析Windows资源文件,因为Menu可以有深层嵌套结构。 使用正则表达式解析此类结构非常困难。
一切正常,但今天我发现我的代码只能找到一个实例。 为了让我清楚,这里是* .rc文件的内容(E:\ tool \ res \ my.rc,为了节省空间只显示容易出错的部分):
#include "../include/resource.h"
IDR_MENU_OPTION MENU BEGIN
POPUP "Options"
BEGIN
MENUITEM "List Layers for &All Pages", IDM_SHOW_ALL
MENUITEM "List Layers for &Visible Pages", IDM_SHOW_VISIBLE
MENUITEM SEPARATOR
MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT
MENUITEM SEPARATOR
MENUITEM "E&xpand All", IDM_EXPAND_ALL
MENUITEM "C&ollapse All", IDM_COLLAPSE_ALL
END
POPUP ""
BEGIN
MENUITEM "List Layers for &All Pages", IDM_LIST_ALL
MENUITEM "List Layers for &Visible Pages", IDM_LIST_VISIBLE
MENUITEM SEPARATOR
MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT
MENUITEM SEPARATOR
MENUITEM "E&xpand All", IDM_EXPAND_ALL
MENUITEM "C&ollapse All", IDM_COLLAPSE_ALL
MENUITEM SEPARATOR
MENUITEM "Layer &Properties...", IDM_LAYER_PROPERTIES
END END
IDR_MENU_PRPPERTIES MENU BEGIN // the menu block is skiped by pyparsing
POPUP ""
BEGIN
MENUITEM "&Show Layers", IDM_SHOW
MENUITEM "&Properties...", IDM_PROPERTIES
END
MENUITEM "", 65535 END
#endif // not APSTUDIO_INVOKED
我的Python代码无法找到IDR_MENU_PRPPERTIES MENU
,
现在的输出是:
IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION
但预期输出应为:
IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION
IDR_MENU_PRPPERTIES
IDM_SHOW
IDM_PROPERTIES
这是我的代码:
import re
import os
import codecs
import fnmatch
from bs4 import UnicodeDammit
from pyparsing import restOfLine, cStyleComment, Word, alphanums, alphas, \
Optional, SkipTo, ZeroOrMore, Group, Keyword, quotedString, delimitedList, \
nums, commaSeparatedList, Forward, Combine
class RcParser:
def __init__(self, rc_file):
self.rc_file = rc_file
handle = open(rc_file, 'rb')
binary_data = handle.read()
handle.close()
dammit = UnicodeDammit(binary_data)
self.rc_src = dammit.unicode_markup
self.encoding = dammit.original_encoding
self.string_table_id = set()
self.dialog_id = set()
self.menu_id = set()
self.img_id = set()
self.parse(self.rc_src)
def get_rc_header(self):
inx = self.rc_file.rfind('\\')
path = self.rc_file[: inx + 1]
file_lists = [path + file for file in os.listdir(path) if file.lower().endswith('resource.h')]
if not file_lists:
return None
return file_lists[0]
def id_by_parsing_rc(self):
rc_id = self.img_id | self.menu_id | self.dialog_id | self.string_table_id
return rc_id
def rc_statement(self):
""" Generate a RC statement parser that can be used to parse a RC file
:rtype: pyparsing.ParserElement
"""
one_line_comment = '//' + restOfLine
comments = cStyleComment ^ one_line_comment
precompiler = Word('#', alphanums) + restOfLine
language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName(
"language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage"))
block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start")
block_end = (Keyword('}') | Keyword("END")).setName("block_end")
reserved_words = block_start | block_end
name_id = ~reserved_words + \
Word(alphas, alphanums + '_').setName("name_id")
numbers = Word(nums)
integerconstant = numbers ^ Combine('0x' + numbers)
constant = Combine(
Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ')
combined_constants = delimitedList(constant, '|')
block_options = Optional(SkipTo(
Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString(
"caption")) + SkipTo(
block_start)("post_caption")
undefined_control = Group(name_id.setResultsName(
"id_control") + delimitedList(quotedString ^ constant ^ numbers ^ Group(combined_constants)).setResultsName(
"values_"))
block = block_start + \
ZeroOrMore(undefined_control)("controls") + block_end
dialog = name_id(
"block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block
string_table = Keyword("STRINGTABLE")(
"block_type") + block_options + block
menu_item = Keyword(
"MENUITEM")("block_type") + (commaSeparatedList("values_") | Keyword("SEPARATOR"))
popup_block = Forward()
popup_block <<= Group(Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start +
ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*")
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block) + block_end
statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
return statem
def generate_menu_pre_name(self, block_type, block_id):
"""Return the pre-name generated for elements of a menu."""
return "%s.%s" % (block_type, block_id)
def generate_popup_pre_name(self, pre_name, caption):
"""Return the pre-name generated for subelements of a popup.
:param pre_name: The pre_name that already have the popup.
:param caption: The caption (whitout quotes) of the popup.
:return: The subelements pre-name based in the pre-name of the popup and
its caption.
"""
return "%s.%s" % (pre_name, caption.replace(" ", "_"))
def add_popup_units(self, pre_name, popup):
"""Transverses the popup tree making new units as needed."""
for element in popup.elements:
if element.block_type and element.block_type == "MENUITEM":
if element.values_ and len(element.values_) >= 2:
var = element.values_[1]
if not var.isdigit():
self.menu_id.add(var)
# Else it can be a separator.
elif element.popups:
for sub_popup in element.popups:
self.add_popup_units(self.generate_popup_pre_name(pre_name, popup.caption[1:-1]), sub_popup)
def parse(self, rcsrc):
"""Read the source of a .rc file in and include them as units."""
# Parse the strings into a structure.
results = self.rc_statement().searchString(rcsrc)
for statement in results:
if not statement.block_type:
continue
if statement.block_type in ("DIALOG", "DIALOGEX"):
helper = statement.block_id[0]
self.dialog_id.add(statement.block_id[0])
control_type = [
"AUTOCHECKBOX", "AUTORADIOBUTTON", "CAPTION", "CHECKBOX"
, "CTEXT", "CONTROL", "DEFPUSHBUTTON", "GROUPBOX"
, "LTEXT", "PUSHBUTTON", "RADIOBUTTON", "RTEXT"
, "COMBOBOX"
]
for control in statement.controls:
fk = (control.id_control[0] in control_type)
flag = (control.values_[0].startswith('"') or control.values_[0].startswith("'"))
if control.id_control[0] in control_type:
if flag:
self.dialog_id.add(control.values_[1])
else:
self.dialog_id.add(control.values_[0])
continue
if statement.block_type in ("MENU"):
pre_name = self.generate_menu_pre_name(statement.block_type, statement.block_id[0])
self.menu_id.add(statement.block_id[0])
for popup in statement.popups:
self.add_popup_units(pre_name, popup)
continue
if statement.block_type in ("STRINGTABLE"):
for text in statement.controls:
self.string_table_id.add(text.id_control[0])
continue
lines = rcsrc.splitlines()
for line in lines:
line = line.rstrip()
m = re.match(r'(\w+)\s+(\bBITMAP\b|\bPNG\b|\bXML\b|\bICON\b)\s+(\".*\")$', line)
if not m:
continue
self.img_id.add(m.group(1))
def main():
x = RcParser(r'E:\tool\res\my.rc')
print('\n'.join(sorted(x.id_by_parsing_rc())))
if __name__ == "__main__":
main()
答案 0 :(得分:1)
您对菜单的定义是:
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block) + block_end
在你的block_start / block_end中你只允许popup_blocks。在不匹配的菜单中,菜单中有一个menu_item不属于popup_block。您可能需要以下内容:
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block | menu_item) + block_end