我需要从文件中提取帖子代码,我从谷歌文档下载如下:
{
'address_components':[
{
'long_name':'Orrs Walk',
'short_name':'Orrs Walk',
'types':[
'route'
]
},
{
'long_name':'South Wharf',
'short_name':'South Wharf',
'types':[
'locality',
'political'
]
},
{
'long_name':'Melbourne City',
'short_name':'Melbourne',
'types':[
'administrative_area_level_2',
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
},
{
'long_name':'3006',
'short_name':'3006',
'types':[
'postal_code'
]
}
],
'formatted_address':'Orrs Walk, South Wharf VIC 3006, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.8236993,
'lng':144.9560253
},
'southwest':{
'lat':-37.8250865,
'lng':144.9523143
}
},
'location':{
'lat':-37.8247832,
'lng':144.9541924
},
'location_type':'GEOMETRIC_CENTER',
'viewport':{
'northeast':{
'lat':-37.8230439197085,
'lng':144.9560253
},
'southwest':{
'lat':-37.8257418802915,
'lng':144.9523143
}
}
},
'place_id':'ChIJb1iRdFdd1moReZ57m5XraDk',
'types':[
'route'
]
},
{
'address_components':[
{
'long_name':'South Wharf',
'short_name':'South Wharf',
'types':[
'locality',
'political'
]
},
{
'long_name':'Melbourne City',
'short_name':'Melbourne',
'types':[
'administrative_area_level_2',
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
},
{
'long_name':'3006',
'short_name':'3006',
'types':[
'postal_code'
]
}
],
'formatted_address':'South Wharf VIC 3006, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.8230788,
'lng':144.9566558
},
'southwest':{
'lat':-37.8276482,
'lng':144.9475157
}
},
'location':{
'lat':-37.825,
'lng':144.952
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-37.8230788,
'lng':144.9566558
},
'southwest':{
'lat':-37.8276482,
'lng':144.9475157
}
}
},
'place_id':'ChIJ1YQ5tfdn1moRAAGNIXVWBAU',
'types':[
'locality',
'political'
]
},
{
'address_components':[
{
'long_name':'Melbourne',
'short_name':'Melbourne',
'types':[
'colloquial_area',
'locality',
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'Melbourne VIC, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.5112737,
'lng':145.5125288
},
'southwest':{
'lat':-38.4338593,
'lng':144.5937418
}
},
'location':{
'lat':-37.8136276,
'lng':144.9630576
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-37.5112737,
'lng':145.5125288
},
'southwest':{
'lat':-38.4338593,
'lng':144.5937418
}
}
},
'place_id':'ChIJ90260rVG1moRkM2MIXVWBAQ',
'types':[
'colloquial_area',
'locality',
'political'
]
},
{
'address_components':[
{
'long_name':'3006',
'short_name':'3006',
'types':[
'postal_code'
]
},
{
'long_name':'South Wharf',
'short_name':'South Wharf',
'types':[
'locality',
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'South Wharf VIC 3006, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.8192449,
'lng':144.971423
},
'southwest':{
'lat':-37.8314821,
'lng':144.946908
}
},
'location':{
'lat':-37.8245483,
'lng':144.963937
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-37.8192449,
'lng':144.971423
},
'southwest':{
'lat':-37.8314821,
'lng':144.946908
}
}
},
'place_id':'ChIJb3SLlEdd1moR0DkuRnhWBBw',
'postcode_localities':[
'South Wharf',
'Southbank'
],
'types':[
'postal_code'
]
},
{
'address_components':[
{
'long_name':'Melbourne City',
'short_name':'Melbourne',
'types':[
'administrative_area_level_2',
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'Melbourne, VIC, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.7754505,
'lng':144.9913306
},
'southwest':{
'lat':-37.8506672,
'lng':144.8969813
}
},
'location':{
'lat':-37.8100512,
'lng':144.9281496
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-37.7754505,
'lng':144.9913306
},
'southwest':{
'lat':-37.8506672,
'lng':144.8969813
}
}
},
'place_id':'ChIJv_FYgkNd1moRpxLuRXZURFs',
'types':[
'administrative_area_level_2',
'political'
]
},
{
'address_components':[
{
'long_name':'CBD & South Melbourne',
'short_name':'CBD & South Melbourne',
'types':[
'political'
]
},
{
'long_name':'Melbourne',
'short_name':'Melbourne',
'types':[
'colloquial_area',
'locality',
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'CBD & South Melbourne, Melbourne VIC, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.7730008,
'lng':145.0158347
},
'southwest':{
'lat':-37.8574821,
'lng':144.8969813
}
},
'location':{
'lat':-37.8362164,
'lng':144.9501708
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-37.7730008,
'lng':145.0158347
},
'southwest':{
'lat':-37.8574821,
'lng':144.8969813
}
}
},
'place_id':'ChIJORuuCkxd1moRNMrml7yk-C8',
'types':[
'political'
]
},
{
'address_components':[
{
'long_name':'Melbourne Metropolitan Area',
'short_name':'Melbourne Metropolitan Area',
'types':[
'political'
]
},
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'Melbourne Metropolitan Area, VIC, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-37.4017502,
'lng':146.1925247
},
'southwest':{
'lat':-38.4999344,
'lng':144.4440773
}
},
'location':{
'lat':-37.8001063,
'lng':145.3143491
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-37.4017502,
'lng':146.1925247
},
'southwest':{
'lat':-38.4999344,
'lng':144.4440773
}
}
},
'place_id':'ChIJmYjB3BaF1moRtCmV8wIoZQU',
'types':[
'political'
]
},
{
'address_components':[
{
'long_name':'Victoria',
'short_name':'VIC',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'Victoria, Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-33.9806474,
'lng':150.0169685
},
'southwest':{
'lat':-39.18316069999999,
'lng':140.9616819
}
},
'location':{
'lat':-37.4713077,
'lng':144.7851531
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-33.9806474,
'lng':149.9764884
},
'southwest':{
'lat':-39.1590935,
'lng':140.9616819
}
}
},
'place_id':'ChIJT5UYfksx1GoRNJWCvuL8Tlo',
'types':[
'administrative_area_level_1',
'political'
]
},
{
'address_components':[
{
'long_name':'Australia',
'short_name':'AU',
'types':[
'country',
'political'
]
}
],
'formatted_address':'Australia',
'geometry':{
'bounds':{
'northeast':{
'lat':-9.187026399999999,
'lng':159.2872223
},
'southwest':{
'lat':-54.83376579999999,
'lng':110.9510339
}
},
'location':{
'lat':-25.274398,
'lng':133.775136
},
'location_type':'APPROXIMATE',
'viewport':{
'northeast':{
'lat':-0.6911343999999999,
'lng':166.7429167
},
'southwest':{
'lat':-51.66332320000001,
'lng':100.0911072
}
}
},
'place_id':'ChIJ38WHZwf9KysRUhNblaFnglM',
'types':[
'country',
'political'
]
}
]
我需要提取邮政编码'short_name': '3006'
,但不是所有的short_name都跟着邮政编码
我已经厌倦了一些方法,但我只是获得了一些空格,我曾经使用过带语言
import re
hand = open('data')
for line in hand:
#print line
line = line.rstrip()
stuff = re.findall('^\'short_name\':([0-9]+)',line)
print stuff
我得到的结果是:
[]
Process finished with exit code 0
我修理了它,就像那样:它们之间有太多的空间,
import re
hand = open('data')
for line in hand:
line = line.rstrip()
stuff = re.findall(r'\'short_name\':\s*\'([0-9]{4})\'',line)
#stuff = re.findall(r'\'short_name\':([0-9]{4})',line)
print stuff
答案 0 :(得分:0)
您正在处理的数据看起来像JSON,但它不是 - 因为单引号(JSON需要双引号)。
假设fileContent
是文件的内容(字符串,如问题所示),您可以将其转换为Python数据结构,然后搜索为字典列表:
import ast
data = ast.literal_eval(fileContent)
codes = [[field['short_name'] for field in entry['address_components']
if 'postal_code' in field['types']]
for entry in data]
#[['3006'], ['3006'], [], ['3006'], [], [], [], [], []]
您也可以从此列表中提取任何其他信息。
答案 1 :(得分:-3)
'[a-z]++_[a-z]++':'([0-9]++)'
如果中间有空格,请注意空格。