如何在python中遍历一个正则表达式对象

时间:2014-06-06 14:13:44

标签: python regex

我在python中有以下小程序:

from inspect import getmembers
from pprint import pprint
import re

def text_function():
    text = '{"device_ip_address": "10.2.16.221", "device_port": 55550, "server_port": 50100, "protocol": "gsm", "bundled_messages": [{"device_ip_address": "10.2.16.221", "device_port": 55550, "bundled_messages": [{"device_ip_address": "10.2.16.221", "device_port": 55550, "unit_id": 1770, "message_size": 12, "protocol_version": 2, "message_id": 18, "timestamp": "2014-05-22 21:17:16", "num_retries": 0, "message_token": 0, "command_id": 0, "number_of_blocks": 0, "unused": 0, "battery_level_external": 0, "battery_level_internal": 100, "alert_flag04": 0, "alert_flag03": 0, "alert_flag02": 0, "alert_flag01": 0, "gsm_rssi": 0, "gps_average_snr": 0, "satellite_count": 0, "status_flag06": 0, "status_flag05": 0, "status_flag04": 0, "status_flag03": 0, "status_flag02": 0, "status_flag01": 0, "battery_charging_fault": 0, "battery_charging_state": 0, "type": "ant_no_communication_failure_alert", "type_specific_data": 0, "message_name": "geoalert", "blocks": []}], "unit_id": 1770, "message_size": 24, "protocol_version": 2, "message_id": 24, "timestamp": "2014-05-22 00:00:38", "num_retries": 0, "message_token": 0, "message_name": "warehouse"}], "unit_id": 1308, "protocol_version": 2, "message_id": 24, "num_retries": 0, "message_token": 6, "timestamp": "2014-05-22 21:17:16", "message_name": "warehouse"}'

    a = re.compile(r'.*"bundled_messages": ([\[][{].*[}][\]])')
    b = a.match(text)

    return b

def processBundleMessage(bundleMessage):
    return bundleMessage


print processBundleMessage(text_function())

运行时打印出以下内容:<_sre.SRE_Match object at 0x2286d0>。问题是我想通过这个来解决这里的每个比赛,以便我可以单独处理。

我可以这样做:bundleMessage.lastindex1告诉我这里有两个匹配,我可以使用group(x)来获取特定的匹配但是有时会有可能是100场或更多场比赛。那么我该如何理解这句话 - 把我所有的比赛都拿来?

3 个答案:

答案 0 :(得分:0)

也许我没有解决问题,但为什么不尝试这样的事情?

a = processBundleMessage(text_function())
for i in range(a.lastindex + 1):
    print('====')
    print(a.group(i))

答案 1 :(得分:0)

您的文字是JSON数据。为什么不使用 json 标准库并返回一个更容易处理的字典。 ; - )

from json import loads

def load_data(data):
    return loads(data)

text = '{"device_ip_address": "10.2.16.221", "device_port": 55550, "server_port": 50100, "protocol": "gsm", "bundled_messages": [{"device_ip_address": "10.2.16.221", "device_port": 55550, "bundled_messages": [{"device_ip_address": "10.2.16.221", "device_port": 55550, "unit_id": 1770, "message_size": 12, "protocol_version": 2, "message_id": 18, "timestamp": "2014-05-22 21:17:16", "num_retries": 0, "message_token": 0, "command_id": 0, "number_of_blocks": 0, "unused": 0, "battery_level_external": 0, "battery_level_internal": 100, "alert_flag04": 0, "alert_flag03": 0, "alert_flag02": 0, "alert_flag01": 0, "gsm_rssi": 0, "gps_average_snr": 0, "satellite_count": 0, "status_flag06": 0, "status_flag05": 0, "status_flag04": 0, "status_flag03": 0, "status_flag02": 0, "status_flag01": 0, "battery_charging_fault": 0, "battery_charging_state": 0, "type": "ant_no_communication_failure_alert", "type_specific_data": 0, "message_name": "geoalert", "blocks": []}], "unit_id": 1770, "message_size": 24, "protocol_version": 2, "message_id": 24, "timestamp": "2014-05-22 00:00:38", "num_retries": 0, "message_token": 0, "message_name": "warehouse"}], "unit_id": 1308, "protocol_version": 2, "message_id": 24, "num_retries": 0, "message_token": 6, "timestamp": "2014-05-22 21:17:16", "message_name": "warehouse"}'

load_data(text)
>>> {u'num_retries': 0, u'protocol': u'gsm', u'message_name': u'warehouse', u'timestamp': u'2014-05-22 21:17:16', u'bundled_messages': [{u'num_retries': 0, u'message_name': u'warehouse', u'timestamp': u'2014-05-22 00:00:38', u'bundled_messages': [{u'num_retries': 0, u'battery_level_external': 0, u'number_of_blocks': 0, u'type_specific_data': 0, u'battery_level_internal': 100, u'command_id': 0, u'unit_id': 1770, u'message_token': 0, u'status_flag05': 0, u'alert_flag03': 0, u'alert_flag02': 0, u'alert_flag01': 0, u'gsm_rssi': 0, u'type': u'ant_no_communication_failure_alert', u'message_id': 18, u'alert_flag04': 0, u'blocks': [], u'message_name': u'geoalert', u'timestamp': u'2014-05-22 21:17:16', u'unused': 0, u'battery_charging_state': 0, u'satellite_count': 0, u'protocol_version': 2, u'gps_average_snr': 0, u'battery_charging_fault': 0, u'status_flag04': 0, u'message_size': 12, u'status_flag06': 0, u'device_ip_address': u'10.2.16.221', u'device_port': 55550, u'status_flag02': 0, u'status_flag03': 0, u'status_flag01': 0}], u'unit_id': 1770, u'message_size': 24, u'device_ip_address': u'10.2.16.221', u'device_port': 55550, u'message_token': 0, u'protocol_version': 2, u'message_id': 24}], u'unit_id': 1308, u'device_ip_address': u'10.2.16.221', u'device_port': 55550, u'server_port': 50100, u'message_token': 6, u'protocol_version': 2, u'message_id': 24}

答案 2 :(得分:0)

正如Mauro所提到的,你最好使用json库。但是,看一下match object documentation

并不会有什么坏处

匹配对象有一个名为groups()的函数,列出了它的所有匹配项。

matches = text_function()
if matches: # check if there is any match
    print matches.groups() # prints a tuple of all matches