正则表达式拆分字符串并写入新行

时间:2017-11-09 10:22:17

标签: python regex string

我有一个跟随String(来自BeautifulSoup)

[[["RE  3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null],["RB 18642",354496333,422441800,"84/147727/18/19/80","14",8,"","Nauen",[[6329,-1007,-4763,"14","66",null,null],[4962,-791,403,"14","66",null,null],[3686,-594,5192,"14","66",null,null],[3227,-522,6914,"14","66",null,null],[1942,-324,11757,"14","66",null,null],[872,-144,15793,"14","66",null,null],[-1932,314,26394,"20","62",null,null],[-2076,224,27147,"13","126",null,null],[-3425,593,30000,"","0",null,null],[-3425,593,31389,"14","121",null,null],[-4099,710,32779,"14","121",null,null],[-6939,1168,38664,"","0",null,null]],"Berlin-Spandau","8010404","Albrechtshof","8080040","09.11.17","-1",null,"1:32","1:29",null,null,"4",null,null],["01:29:30",2,35000,5000,"guiV=4.1.3&","20171109","69869174432dcbb13e038c953c9a7cc9","09.11.17","11:06:30",0]],[]]

如何通过此regex分割此字符串? \[\"+[A-Z](它不完全正确)并将拆分的字符串写入新行?

[[["RE  3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null]

["RB 18642",354496333,422441800,"84/147727/18/19/80","14",8,"","Nauen",[[6329,-1007,-4763,"14","66",null,null],[4962,-791,403,"14","66",null,null],[3686,-594,5192,"14","66",null,null],[3227,-522,6914,"14","66",null,null],[1942,-324,11757,"14","66",null,null],[872,-144,15793,"14","66",null,null],[-1932,314,26394,"20","62",null,null],[-2076,224,27147,"13","126",null,null],[-3425,593,30000,"","0",null,null],[-3425,593,31389,"14","121",null,null],[-4099,710,32779,"14","121",null,null],[-6939,1168,38664,"","0",null,null]],"Berlin-Spandau","8010404","Albrechtshof","8080040","09.11.17","-1",null,"1:32","1:29",null,null,"4",null,null],["01:29:30",2,35000,5000,"guiV=4.1.3&","20171109","69869174432dcbb13e038c953c9a7cc9","09.11.17","11:06:30",0]],[]]

然后将re.split与此正则表达式一起使用,并将这些行写入新行。

2 个答案:

答案 0 :(得分:0)

不要分裂任何东西,不要写任何正则表达式。它可能是一个字符串,但它看起来像JSON。

请阅读json.loads

>>> import json
>>> json.loads('[[["RE  3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null],["RB 18642",354496333,422441800,"84/147727/18/19/80","14",8,"","Nauen",[[6329,-1007,-4763,"14","66",null,null],[4962,-791,403,"14","66",null,null],[3686,-594,5192,"14","66",null,null],[3227,-522,6914,"14","66",null,null],[1942,-324,11757,"14","66",null,null],[872,-144,15793,"14","66",null,null],[-1932,314,26394,"20","62",null,null],[-2076,224,27147,"13","126",null,null],[-3425,593,30000,"","0",null,null],[-3425,593,31389,"14","121",null,null],[-4099,710,32779,"14","121",null,null],[-6939,1168,38664,"","0",null,null]],"Berlin-Spandau","8010404","Albrechtshof","8080040","09.11.17","-1",null,"1:32","1:29",null,null,"4",null,null],["01:29:30",2,35000,5000,"guiV=4.1.3&","20171109","69869174432dcbb13e038c953c9a7cc9","09.11.17","11:06:30",0]],[]]')
[[['RE  3364', 1140509724, 714348396, '84/149614/18/19/80', '6', 8, '', 'Eberswalde Hbf', [[-35, -27, -1064, '4', '82', None, None], [711, 639, 2823, '5', '81', None, None], [1151, 1043, 5155, '5', '83', None, None], [2383, 2230, 11893, '5', '83', None, None], [4019, 3731, 20530, '5', '82', None, None], [5637, 5232, 29168, '5', '83', None, None], [7273, 6733, 37806, '', '0', None, None]], 'Berlin-Lichtenberg', '8010036', 'Bernau(b Berlin)', '8013470', '09.11.17', '-1', None, '1:37', '1:18', None, None, '4', None, None], ['RB 18642', 354496333, 422441800, '84/147727/18/19/80', '14', 8, '', 'Nauen', [[6329, -1007, -4763, '14', '66', None, None], [4962, -791, 403, '14', '66', None, None], [3686, -594, 5192, '14', '66', None, None], [3227, -522, 6914, '14', '66', None, None], [1942, -324, 11757, '14', '66', None, None], [872, -144, 15793, '14', '66', None, None], [-1932, 314, 26394, '20', '62', None, None], [-2076, 224, 27147, '13', '126', None, None], [-3425, 593, 30000, '', '0', None, None], [-3425, 593, 31389, '14', '121', None, None], [-4099, 710, 32779, '14', '121', None, None], [-6939, 1168, 38664, '', '0', None, None]], 'Berlin-Spandau', '8010404', 'Albrechtshof', '8080040', '09.11.17', '-1', None, '1:32', '1:29', None, None, '4', None, None], ['01:29:30', 2, 35000, 5000, 'guiV=4.1.3&', '20171109', '69869174432dcbb13e038c953c9a7cc9', '09.11.17', '11:06:30', 0]], []]

答案 1 :(得分:0)

如果您跳过前两个括号,json.loads()可以按如下方式使用:

import json

text = """[[["RE  3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null]"""
data = json.loads(text[2:])    
print data    

给你以下输出:

[u'RE  3364', 1140509724, 714348396, u'84/149614/18/19/80', u'6', 8, u'', u'Eberswalde Hbf', [[-35, -27, -1064, u'4', u'82', None, None], [711, 639, 2823, u'5', u'81', None, None], [1151, 1043, 5155, u'5', u'83', None, None], [2383, 2230, 11893, u'5', u'83', None, None], [4019, 3731, 20530, u'5', u'82', None, None], [5637, 5232, 29168, u'5', u'83', None, None], [7273, 6733, 37806, u'', u'0', None, None]], u'Berlin-Lichtenberg', u'8010036', u'Bernau(b Berlin)', u'8013470', u'09.11.17', u'-1', None, u'1:37', u'1:18', None, None, u'4', None, None]

要从unicode转换返回的结构,可以使用以下函数:

def to_strings(nested):
    if isinstance(nested, dict):
        return {to_strings(key): to_strings(value) for key, value in nested.iteritems()}
    elif isinstance(nested, list):
        return [to_strings(element) for element in nested]
    elif isinstance(nested, unicode):
        return nested.encode('utf-8')
    else:
        return nested

print to_strings(data)

给你:

['RE  3364', 1140509724, 714348396, '84/149614/18/19/80', '6', 8, '', 'Eberswalde Hbf', [[-35, -27, -1064, '4', '82', None, None], [711, 639, 2823, '5', '81', None, None], [1151, 1043, 5155, '5', '83', None, None], [2383, 2230, 11893, '5', '83', None, None], [4019, 3731, 20530, '5', '82', None, None], [5637, 5232, 29168, '5', '83', None, None], [7273, 6733, 37806, '', '0', None, None]], 'Berlin-Lichtenberg', '8010036', 'Bernau(b Berlin)', '8013470', '09.11.17', '-1', None, '1:37', '1:18', None, None, '4', None, None]

Mark Amery's回答的帮助下。