我有很长的字符串列表,我想只提取具有" Town"的行:"某些城市" &安培; "州":"某些州"然后将这些值放入具有城镇和州作为列标题的数据框中。我已经复制了下面字符串的摘录(它排除了开头[和结尾],因为列表真的很长。有什么想法吗?
' "IsPayAtLocation": null,',
' "IsMembershipRequired": null,',
' "IsAccessKeyRequired": null,',
' "ID": 1,',
' "Title": "Public"',
' },',
' "UsageCost": "Free",',
' "AddressInfo": {',
' "ID": 57105,',
' "Title": "Somerset North",',
' "AddressLine1": "2800 W. Big Beaver Rd",',
' "AddressLine2": null,',
' "Town": "Troy",',
' "StateOrProvince": "MI",',
' "Postcode": "48084",',
' "CountryID": 2,',
' "Country": {',
' "ISOCode": "US",'
答案 0 :(得分:0)
^[^,]*\b(?:Town|State).*$
您可以使用此re.findall
。请参阅演示。
https://regex101.com/r/hE4jH0/34
import re
p = re.compile(r'^[^,]*\b(?:Town|State).*$', re.MULTILINE)
test_str = "\"UsageCost\"', ' \"Free\",']\n[' \"AddressInfo\"', ' {']\n[' \"ID\"', ' 57105,']\n[' \"Title\"', ' \"Somerset North\",']\n[' \"AddressLine1\"', ' \"2800 W. Big Beaver Rd\",']\n[' \"AddressLine2\"', ' null,']\n[' \"Town\"', ' \"Troy\",']\n[' \"StateOrProvince\"', ' \"MI\",']\n[' \"Postcode\"', ' \"48084\",']\n[' \"CountryID\"', ' 2,']\n[' \"Country\"', ' {']\n[' \"ISOCode\"', ' \"US\",']\n[' \"ContinentCode\"', ' \"NA\",']\n[' \"ID\"', ' 2,']\n[' \"Title\"', ' \"United States\"']"
re.findall(p, test_str)
答案 1 :(得分:0)
strings = [
' ...',
' "AddressLine2": null,',
' "Town": "Troy",',
' "StateOrProvince": "MI",',
' ...',
' "Town": "Troy",',
' "StateOrProvince": "MO",',
]
cities = [s.split('"')[3] for s in strings if '"Town":' in s]
states = [s.split('"')[3] for s in strings if '"StateO' in s]
import sqlite3
data = sqlite3.connect(':memory:')
data.execute('CREATE TABLE towns (town TEXT, state CHAR(2))')
data.executemany('INSERT INTO towns VALUES (?, ?)', zip(cities, states))
print('town\tstate')
for row in data.execute('SELECT * FROM towns'): print(row[0]+'\t'+row[1])