pyparsing只能找到一个实例

时间:2015-11-11 02:47:22

标签: python pyparsing

我想用pyparsing解析Windows资源文件,因为Menu可以有深层嵌套结构。 使用正则表达式解析此类结构非常困难。

一切正常,但今天我发现我的代码只能找到一个实例。 为了让我清楚,这里是* .rc文件的内容(E:\ tool \ res \ my.rc,为了节省空间只显示容易出错的部分):

#include "../include/resource.h"

IDR_MENU_OPTION MENU BEGIN
    POPUP "Options"
    BEGIN
        MENUITEM "List Layers for &All Pages",  IDM_SHOW_ALL
        MENUITEM "List Layers for &Visible Pages", IDM_SHOW_VISIBLE
        MENUITEM SEPARATOR
        MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT
        MENUITEM SEPARATOR
        MENUITEM "E&xpand All",                 IDM_EXPAND_ALL
        MENUITEM "C&ollapse All",               IDM_COLLAPSE_ALL
    END
    POPUP ""
    BEGIN
        MENUITEM "List Layers for &All Pages",  IDM_LIST_ALL
        MENUITEM "List Layers for &Visible Pages", IDM_LIST_VISIBLE
        MENUITEM SEPARATOR
        MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT
        MENUITEM SEPARATOR
        MENUITEM "E&xpand All",                 IDM_EXPAND_ALL
        MENUITEM "C&ollapse All",               IDM_COLLAPSE_ALL
        MENUITEM SEPARATOR
        MENUITEM "Layer &Properties...",        IDM_LAYER_PROPERTIES
    END END

IDR_MENU_PRPPERTIES MENU BEGIN // the menu block is skiped by pyparsing
    POPUP ""
    BEGIN
        MENUITEM "&Show Layers",                IDM_SHOW
        MENUITEM "&Properties...",              IDM_PROPERTIES
    END
    MENUITEM "",                            65535 END

#endif    // not APSTUDIO_INVOKED

我的Python代码无法找到IDR_MENU_PRPPERTIES MENU, 现在的输出是:

IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION

但预期输出应为:

IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION
IDR_MENU_PRPPERTIES
IDM_SHOW
IDM_PROPERTIES

这是我的代码:

import re
import os
import codecs
import fnmatch
from bs4 import UnicodeDammit
from pyparsing import restOfLine, cStyleComment, Word, alphanums, alphas, \
    Optional, SkipTo, ZeroOrMore, Group, Keyword, quotedString, delimitedList, \
    nums, commaSeparatedList, Forward, Combine


class RcParser:
    def __init__(self, rc_file):
        self.rc_file = rc_file
        handle = open(rc_file, 'rb')
        binary_data = handle.read()
        handle.close()
        dammit = UnicodeDammit(binary_data)
        self.rc_src = dammit.unicode_markup
        self.encoding = dammit.original_encoding
        self.string_table_id = set()
        self.dialog_id = set()
        self.menu_id = set()
        self.img_id = set()

        self.parse(self.rc_src)

    def get_rc_header(self):
        inx = self.rc_file.rfind('\\')
        path = self.rc_file[: inx + 1]
        file_lists = [path + file for file in os.listdir(path) if file.lower().endswith('resource.h')]
        if not file_lists:
            return None
        return file_lists[0]

    def id_by_parsing_rc(self):
        rc_id = self.img_id | self.menu_id | self.dialog_id | self.string_table_id
        return rc_id

    def rc_statement(self):
        """ Generate a RC statement parser that can be used to parse a RC file

        :rtype: pyparsing.ParserElement
        """

        one_line_comment = '//' + restOfLine
        comments = cStyleComment ^ one_line_comment
        precompiler = Word('#', alphanums) + restOfLine
        language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName(
            "language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage"))
        block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start")
        block_end = (Keyword('}') | Keyword("END")).setName("block_end")
        reserved_words = block_start | block_end
        name_id = ~reserved_words + \
                  Word(alphas, alphanums + '_').setName("name_id")
        numbers = Word(nums)
        integerconstant = numbers ^ Combine('0x' + numbers)
        constant = Combine(
            Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ')
        combined_constants = delimitedList(constant, '|')
        block_options = Optional(SkipTo(
            Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString(
            "caption")) + SkipTo(
            block_start)("post_caption")
        undefined_control = Group(name_id.setResultsName(
            "id_control") + delimitedList(quotedString ^ constant ^ numbers ^ Group(combined_constants)).setResultsName(
            "values_"))
        block = block_start + \
                ZeroOrMore(undefined_control)("controls") + block_end
        dialog = name_id(
            "block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block
        string_table = Keyword("STRINGTABLE")(
            "block_type") + block_options + block
        menu_item = Keyword(
            "MENUITEM")("block_type") + (commaSeparatedList("values_") | Keyword("SEPARATOR"))
        popup_block = Forward()
        popup_block <<= Group(Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start +
                              ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*")
        menu = name_id("block_id") + \
               Keyword("MENU")("block_type") + block_options + \
               block_start + ZeroOrMore(popup_block) + block_end
        statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
        return statem

    def generate_menu_pre_name(self, block_type, block_id):
        """Return the pre-name generated for elements of a menu."""
        return "%s.%s" % (block_type, block_id)

    def generate_popup_pre_name(self, pre_name, caption):
        """Return the pre-name generated for subelements of a popup.

        :param pre_name: The pre_name that already have the popup.
        :param caption: The caption (whitout quotes) of the popup.

        :return: The subelements pre-name based in the pre-name of the popup and
                 its caption.
        """
        return "%s.%s" % (pre_name, caption.replace(" ", "_"))

    def add_popup_units(self, pre_name, popup):
        """Transverses the popup tree making new units as needed."""
        for element in popup.elements:
            if element.block_type and element.block_type == "MENUITEM":
                if element.values_ and len(element.values_) >= 2:
                    var = element.values_[1]
                    if not var.isdigit():
                        self.menu_id.add(var)
                        # Else it can be a separator.
            elif element.popups:
                for sub_popup in element.popups:
                    self.add_popup_units(self.generate_popup_pre_name(pre_name, popup.caption[1:-1]), sub_popup)

    def parse(self, rcsrc):
        """Read the source of a .rc file in and include them as units."""
        # Parse the strings into a structure.
        results = self.rc_statement().searchString(rcsrc)
        for statement in results:
            if not statement.block_type:
                continue
            if statement.block_type in ("DIALOG", "DIALOGEX"):
                helper = statement.block_id[0]
                self.dialog_id.add(statement.block_id[0])
                control_type = [
                    "AUTOCHECKBOX", "AUTORADIOBUTTON", "CAPTION", "CHECKBOX"
                    , "CTEXT", "CONTROL", "DEFPUSHBUTTON", "GROUPBOX"
                    , "LTEXT", "PUSHBUTTON", "RADIOBUTTON", "RTEXT"
                    , "COMBOBOX"
                ]
                for control in statement.controls:
                    fk = (control.id_control[0] in control_type)
                    flag = (control.values_[0].startswith('"') or control.values_[0].startswith("'"))
                    if control.id_control[0] in control_type:
                        if flag:
                            self.dialog_id.add(control.values_[1])
                        else:
                            self.dialog_id.add(control.values_[0])
                continue

            if statement.block_type in ("MENU"):
                pre_name = self.generate_menu_pre_name(statement.block_type, statement.block_id[0])
                self.menu_id.add(statement.block_id[0])
                for popup in statement.popups:
                    self.add_popup_units(pre_name, popup)
                continue

            if statement.block_type in ("STRINGTABLE"):
                for text in statement.controls:
                    self.string_table_id.add(text.id_control[0])
                continue

        lines = rcsrc.splitlines()
        for line in lines:
            line = line.rstrip()
            m = re.match(r'(\w+)\s+(\bBITMAP\b|\bPNG\b|\bXML\b|\bICON\b)\s+(\".*\")$', line)
            if not m:
                continue
            self.img_id.add(m.group(1))


def main():
    x = RcParser(r'E:\tool\res\my.rc')
    print('\n'.join(sorted(x.id_by_parsing_rc())))


if __name__ == "__main__":
    main()

1 个答案:

答案 0 :(得分:1)

您对菜单的定义是:

menu = name_id("block_id") + \
       Keyword("MENU")("block_type") + block_options + \
       block_start + ZeroOrMore(popup_block) + block_end

在你的block_start / block_end中你只允许popup_blocks。在不匹配的菜单中,菜单中有一个menu_item不属于popup_block。您可能需要以下内容:

menu = name_id("block_id") + \
       Keyword("MENU")("block_type") + block_options + \
       block_start + ZeroOrMore(popup_block | menu_item) + block_end