如何仅提取数组名称和括号内的内容,并使其成为键值对?

时间:2019-07-10 19:27:13

标签: python regex extraction data-extraction

我需要提取数组的名称并将其作为键,并提取括号内的内容并将其作为字符串。 {}中的内容以0x开头,我需要剥离它,然后将其串联为“ A00E28”。我是正则表达式的新手。

我已经提取了字典键:

with open(Headerpath, "r") as fp:
        line = fp.readline()
        while line:
            line = fp.readline()
            if "const unsigned char"  in line:
                res = re.findall(r'\w+', line) 

现在,我为提取每个{}

中的内容而感到震惊
const unsigned char x[] = {

    0xA0, 0x0E, 0x28, 0x70, 0x00, 0x3E, 0x15, 0x08, 0x08, 0x08, 0x31, 0x1E, 0x1E, 0x00, 0x00, 0xA7, 0x8E, 0xFF, 0xFF, 0x0F, 0x0F, 0x23, 0x23, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x17, 0x40, 0x20, 0x07, 0x13, 0x07, 0x05, 0x07, 

    0xA0, 0xA4, 0x85, 0x14, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x03, 0x00, 0x05, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x0E, 0x00, 0x10, 0x00, 0x11, 0x00, 0x13, 0x00, 0x14, 0x00, 0x16, 0x00, 0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1F, 0x00, 0x20, 0x00, 0x21, 0x00, 0x24, 0x00, 0x25, 0x00, 0x27, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2F, 0x00, 0x31, 0x00, 0x32, 0x00, 0x34, 0x00, 0x35, 0x00, 0x37, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3F, 0x00, 0x41, 0x00, 0x42, 0x00, 0x44, 0x00, 0x46, 0x00, 0x47, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4C, 0x00, 0x4E, 0x00, 0x4F, 0x00, 0x51, 0x00, 0x52, 0x00, 0x54, 0x00, 0x56, 0x00, 0x57, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5C, 0x00, 0x5E, 0x00, 0x5F, 0x00, 0x61, 0x00, 0x62, 0x00, 0x64, 0x00, 

    0xA0, 0x11, 0x07, 0x04, 0x2A, 0x32, 0x01, 0xC8, 0xF6, 0xF6, 

    0xA0, 0x28, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
    0xA0, 0xA5, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x124,
    0xA0, 0x0D, 0x06, 0xE0, 0x38, 0x67, 0x00, 0x00, 0x00,       
    0xA0, 0x0D, 0x06, 0xE0, 0x39, 0x00, 0x00, 0xFC, 0x00,       
};

.
.
.
.
.

所以我需要获得一本字典: {x:“ A00E28 ...”},{y:“ A00E29 ....”}

1 个答案:

答案 0 :(得分:1)

如果您可以将内容复制粘贴到Python多行字符串(由“ triple double”引号分隔)中,则这是一个简短的解决方案(如果您需要一些解释,请不要犹豫):

import re

def contentMatcher(content):
    return re.findall(r'(?<=const unsigned char )(?P<variable>\w)\[\] = \{(?P<values>.*?)(?=\};)', content, re.MULTILINE + re.DOTALL)

def valuesParser(values):
    return re.findall(r'(?<!_REG, )0x(?P<value>[0123456789ABCDEF]{2})', values)

def valuesMapper(hexaMatches):
    return {variable: ''.join(valuesParser(values)) for variable, values in hexaMatches}
content = """const unsigned char x[] = {
      /* USER_PMU */
    0xA0, 0x0E, 0x28, 0x70, 0x00, 0x3E, 0x15, 0x08, 0x08, 0x08, 0x31, 0x1E, 0x1E, 0x00, 0x00, 0xA7, 0x8E, 0xFF, 0xFF, 0x0F, 0x0F, 0x23, 0x23, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x17, 0x40, 0x20, 0x07, 0x13, 0x07, 0x05, 0x07, 
      /* AGC_PHASE_COMPENSATION */
    0xA0, 0xA4, 0x85, 0x14, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x03, 0x00, 0x05, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x0E, 0x00, 0x10, 0x00, 0x11, 0x00, 0x13, 0x00, 0x14, 0x00, 0x16, 0x00, 0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1F, 0x00, 0x20, 0x00, 0x21, 0x00, 0x24, 0x00, 0x25, 0x00, 0x27, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2F, 0x00, 0x31, 0x00, 0x32, 0x00, 0x34, 0x00, 0x35, 0x00, 0x37, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3F, 0x00, 0x41, 0x00, 0x42, 0x00, 0x44, 0x00, 0x46, 0x00, 0x47, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4C, 0x00, 0x4E, 0x00, 0x4F, 0x00, 0x51, 0x00, 0x52, 0x00, 0x54, 0x00, 0x56, 0x00, 0x57, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5C, 0x00, 0x5E, 0x00, 0x5F, 0x00, 0x61, 0x00, 0x62, 0x00, 0x64, 0x00, 
      /* RF_CLOCK_CFG */
    0xA0, 0x11, 0x07, 0x04, 0x2A, 0x32, 0x01, 0xC8, 0xF6, 0xF6, 
      /* IOT_SETTINGS */
    0xA0, 0x28, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
      /* RF_FLAGS_CONFIG */
    0xA0, 0xA5, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x124,
    0xA0, 0x0D, 0x06, 0xE0, 0x38, 0x67, 0x00, 0x00, 0x00,       // CLIF_GCM_CONFIG0_REG, 0x0E0
    0xA0, 0x0D, 0x06, 0xE0, 0x39, 0x00, 0x00, 0xFC, 0x00,       // CLIF_GCM_CONFIG1_REG, 0x0E4
};

const unsigned char y[] = {
      /* USER_PMU */
    0xA0, 0x0E, 0x29, 0x70, 0x00, 0x3E, 0x15, 0x08, 0x08, 0x08, 0x31, 0x1E, 0x1E, 0x00, 0x00, 0xA7, 0x8E, 0xFF, 0xFF, 0x0F, 0x0F, 0x23, 0x23, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x17, 0x40, 0x20, 0x07, 0x13, 0x07, 0x05, 0x07, 
      /* AGC_PHASE_COMPENSATION */
    0xA0, 0xA4, 0x85, 0x14, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x03, 0x00, 0x05, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x0E, 0x00, 0x10, 0x00, 0x11, 0x00, 0x13, 0x00, 0x14, 0x00, 0x16, 0x00, 0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1F, 0x00, 0x20, 0x00, 0x21, 0x00, 0x24, 0x00, 0x25, 0x00, 0x27, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2F, 0x00, 0x31, 0x00, 0x32, 0x00, 0x34, 0x00, 0x35, 0x00, 0x37, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3F, 0x00, 0x41, 0x00, 0x42, 0x00, 0x44, 0x00, 0x46, 0x00, 0x47, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4C, 0x00, 0x4E, 0x00, 0x4F, 0x00, 0x51, 0x00, 0x52, 0x00, 0x54, 0x00, 0x56, 0x00, 0x57, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5C, 0x00, 0x5E, 0x00, 0x5F, 0x00, 0x61, 0x00, 0x62, 0x00, 0x64, 0x00, 
      /* RF_CLOCK_CFG */
    0xA0, 0x11, 0x07, 0x04, 0x2A, 0x32, 0x01, 0xC8, 0xF6, 0xF6, 
      /* IOT_SETTINGS */
    0xA0, 0x28, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
      /* RF_FLAGS_CONFIG */
    0xA0, 0xA5, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x124,
    0xA0, 0x0D, 0x06, 0xE0, 0x38, 0x67, 0x00, 0x00, 0x00,       // CLIF_GCM_CONFIG0_REG, 0x0E0
    0xA0, 0x0D, 0x06, 0xE0, 0x39, 0x00, 0x00, 0xFC, 0x00,       // CLIF_GCM_CONFIG1_REG, 0x0E4
};"""

valuesMapper(contentMatcher(content))
{'x': 'A00E2870003E15080808311E1E0000A78EFFFF0F0F23230A00000001001000040000001740200713070507A0A485140001000000000000010003000500070008000A000C000E0010001100130014001600180019001A001C001D001F002000210024002500270029002A002C002D002F003100320034003500370039003A003C003D003F004100420044004600470049004A004C004E004F005100520054005600570059005A005C005E005F00610062006400A01107042A3201C8F6F6A028140000000000000000000000000000000000000000A0A50D000000000000FF031F0000000012A00D06E03867000000A00D06E0390000FC00', 'y': 'A00E2970003E15080808311E1E0000A78EFFFF0F0F23230A00000001001000040000001740200713070507A0A485140001000000000000010003000500070008000A000C000E0010001100130014001600180019001A001C001D001F002000210024002500270029002A002C002D002F003100320034003500370039003A003C003D003F004100420044004600470049004A004C004E004F005100520054005600570059005A005C005E005F00610062006400A01107042A3201C8F6F6A028140000000000000000000000000000000000000000A0A50D000000000000FF031F0000000012A00D06E03867000000A00D06E0390000FC00'}