字符串citystate可以格式化为:
“Greenwich,CT”
“康涅狄格州格林威治”
“格林威治CT”
如何将字符串citystate拆分为两个单独的字符串,city和state(使用Python)?
(这样城市==格林威治和州== CT)
感谢您的帮助!
答案 0 :(得分:4)
import re
city, state = re.split(r' ?,? ?', location)
要解决纽约州纽约市的问题,请使用切片:
split_loc = re.split(r' ?,? ?', location)
city = ' '.join(split_loc[:-1]) # Everything up to last element joined by space
state = split_loc[-1]
答案 1 :(得分:1)
做出一些假设:
(1)状态代码是两个字符,您将通过查找状态代码表来验证分割结果
(2)您的输入数据已经转换为unicode
(Python 2.X)或str
(Python 3.x)
(3)您的数据可能发生了更多范围的可能的数据输入填充和脚本小子填充。
<强>代码:强>
tests = [
u"Foo YZ", u"Foo YZ\n", u"Foo\xa0YZ", u"Foo YZ",
u"Foo, YZ", u"Foo, YZ\n", u"Foo,\xa0YZ",
u"Foo,YZ", u"Foo,YZ\n", u"Foo,,YZ",
u"FooYZ", u" Foo \t,\xa0 YZ \n",
]
import re
def machin(strg):
strg = strg.strip()
state = strg[-2:]
city = u" ".join(strg[:-2].split()) # normalise whitespace
city = city.rstrip(", ")
return city, state
def kettler(strg):
split_loc = re.split(r' ?,? ?', strg)
city = ' '.join(split_loc[:-1]) # Everything up to last element joined by space
state = split_loc[-1]
return city, state
def bothwell(strg):
# split on last comma
res = strg.rsplit(',', 1)
if len(res)==2:
return res[0].strip(), res[1].strip()
# otherwise split on last non-trailing whitespace
res = strg.rsplit(None, 1)
if len(res)==2:
return res[0].lstrip(), res[1]
# otherwise split failed
# raise ValueError("Could not split {0} into city, state".format(s))
return None, None
def paulger_v2(strg):
return strg[:-3].strip(", "), strg[-2:]
funcs = (kettler, bothwell, paulger_v2, machin)
for func in funcs:
print "\n===", func.__name__, "==="
nok = 0
for test in tests:
city, state = func(test)
ok = city == u"Foo" and state == u"YZ"; nok += ok
print [ok, test, city, state]
test = test.replace(u"Foo", u"Foo Baa")
city, state = func(test)
ok = city == u"Foo Baa" and state == u"YZ"; nok += ok
print [ok, test, city, state]
print "\n+++ %s: %d OK +++" % (func.__name__, nok)
<强>输出:强>
=== kettler ===
[True, u'Foo YZ', u'Foo', u'YZ']
[True, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[False, u'Foo YZ\n', u'Foo', u'YZ\n']
[False, u'Foo Baa YZ\n', u'Foo Baa', u'YZ\n']
[False, u'Foo\xa0YZ', '', u'Foo\xa0YZ']
[False, u'Foo Baa\xa0YZ', u'Foo', u'Baa\xa0YZ']
[True, u'Foo YZ', u'Foo', u'YZ']
[True, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[True, u'Foo, YZ', u'Foo', u'YZ']
[True, u'Foo Baa, YZ', u'Foo Baa', u'YZ']
[False, u'Foo, YZ\n', u'Foo', u'YZ\n']
[False, u'Foo Baa, YZ\n', u'Foo Baa', u'YZ\n']
[False, u'Foo,\xa0YZ', u'Foo', u'\xa0YZ']
[False, u'Foo Baa,\xa0YZ', u'Foo Baa', u'\xa0YZ']
[True, u'Foo,YZ', u'Foo', u'YZ']
[True, u'Foo Baa,YZ', u'Foo Baa', u'YZ']
[False, u'Foo,YZ\n', u'Foo', u'YZ\n']
[False, u'Foo Baa,YZ\n', u'Foo Baa', u'YZ\n']
[False, u'Foo,,YZ', u'Foo ', u'YZ']
[False, u'Foo Baa,,YZ', u'Foo Baa ', u'YZ']
[False, u'FooYZ', '', u'FooYZ']
[False, u'Foo BaaYZ', u'Foo', u'BaaYZ']
[False, u' Foo \t,\xa0 YZ \n', u' Foo \t \xa0 YZ', u'\n']
[False, u' Foo Baa \t,\xa0 YZ \n', u' Foo Baa \t \xa0 YZ', u'\n']
+++ kettler: 8 OK +++
=== bothwell ===
[True, u'Foo YZ', u'Foo', u'YZ']
[False, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[True, u'Foo YZ\n', u'Foo', u'YZ']
[False, u'Foo Baa YZ\n', u'Foo Baa', u'YZ']
[True, u'Foo\xa0YZ', u'Foo', u'YZ']
[False, u'Foo Baa\xa0YZ', u'Foo Baa', u'YZ']
[True, u'Foo YZ', u'Foo', u'YZ']
[False, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[True, u'Foo, YZ', u'Foo', u'YZ']
[False, u'Foo Baa, YZ', u'Foo Baa', u'YZ']
[True, u'Foo, YZ\n', u'Foo', u'YZ']
[False, u'Foo Baa, YZ\n', u'Foo Baa', u'YZ']
[True, u'Foo,\xa0YZ', u'Foo', u'YZ']
[False, u'Foo Baa,\xa0YZ', u'Foo Baa', u'YZ']
[True, u'Foo,YZ', u'Foo', u'YZ']
[False, u'Foo Baa,YZ', u'Foo Baa', u'YZ']
[True, u'Foo,YZ\n', u'Foo', u'YZ']
[False, u'Foo Baa,YZ\n', u'Foo Baa', u'YZ']
[False, u'Foo,,YZ', u'Foo,', u'YZ']
[False, u'Foo Baa,,YZ', u'Foo Baa,', u'YZ']
[False, u'FooYZ', None, None]
[False, u'Foo BaaYZ', u'Foo', u'BaaYZ']
[True, u' Foo \t,\xa0 YZ \n', u'Foo', u'YZ']
[False, u' Foo Baa \t,\xa0 YZ \n', u'Foo Baa', u'YZ']
+++ bothwell: 10 OK +++
=== paulger_v2 ===
[True, u'Foo YZ', u'Foo', u'YZ']
[False, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[False, u'Foo YZ\n', u'Foo', u'Z\n']
[False, u'Foo Baa YZ\n', u'Foo Baa', u'Z\n']
[True, u'Foo\xa0YZ', u'Foo', u'YZ']
[False, u'Foo Baa\xa0YZ', u'Foo Baa', u'YZ']
[True, u'Foo YZ', u'Foo', u'YZ']
[False, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[True, u'Foo, YZ', u'Foo', u'YZ']
[False, u'Foo Baa, YZ', u'Foo Baa', u'YZ']
[False, u'Foo, YZ\n', u'Foo', u'Z\n']
[False, u'Foo Baa, YZ\n', u'Foo Baa', u'Z\n']
[True, u'Foo,\xa0YZ', u'Foo', u'YZ']
[False, u'Foo Baa,\xa0YZ', u'Foo Baa', u'YZ']
[True, u'Foo,YZ', u'Foo', u'YZ']
[False, u'Foo Baa,YZ', u'Foo Baa', u'YZ']
[False, u'Foo,YZ\n', u'Foo', u'Z\n']
[False, u'Foo Baa,YZ\n', u'Foo Baa', u'Z\n']
[True, u'Foo,,YZ', u'Foo', u'YZ']
[False, u'Foo Baa,,YZ', u'Foo Baa', u'YZ']
[False, u'FooYZ', u'Fo', u'YZ']
[False, u'Foo BaaYZ', u'Foo Ba', u'YZ']
[False, u' Foo \t,\xa0 YZ \n', u'Foo \t,\xa0 Y', u' \n']
[False, u' Foo Baa \t,\xa0 YZ \n', u'Foo Baa \t,\xa0 Y', u' \n']
+++ paulger_v2: 7 OK +++
=== machin ===
[True, u'Foo YZ', u'Foo', u'YZ']
[True, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[True, u'Foo YZ\n', u'Foo', u'YZ']
[True, u'Foo Baa YZ\n', u'Foo Baa', u'YZ']
[True, u'Foo\xa0YZ', u'Foo', u'YZ']
[True, u'Foo Baa\xa0YZ', u'Foo Baa', u'YZ']
[True, u'Foo YZ', u'Foo', u'YZ']
[True, u'Foo Baa YZ', u'Foo Baa', u'YZ']
[True, u'Foo, YZ', u'Foo', u'YZ']
[True, u'Foo Baa, YZ', u'Foo Baa', u'YZ']
[True, u'Foo, YZ\n', u'Foo', u'YZ']
[True, u'Foo Baa, YZ\n', u'Foo Baa', u'YZ']
[True, u'Foo,\xa0YZ', u'Foo', u'YZ']
[True, u'Foo Baa,\xa0YZ', u'Foo Baa', u'YZ']
[True, u'Foo,YZ', u'Foo', u'YZ']
[True, u'Foo Baa,YZ', u'Foo Baa', u'YZ']
[True, u'Foo,YZ\n', u'Foo', u'YZ']
[True, u'Foo Baa,YZ\n', u'Foo Baa', u'YZ']
[True, u'Foo,,YZ', u'Foo', u'YZ']
[True, u'Foo Baa,,YZ', u'Foo Baa', u'YZ']
[True, u'FooYZ', u'Foo', u'YZ']
[True, u'Foo BaaYZ', u'Foo Baa', u'YZ']
[True, u' Foo \t,\xa0 YZ \n', u'Foo', u'YZ']
[True, u' Foo Baa \t,\xa0 YZ \n', u'Foo Baa', u'YZ']
+++ machin: 24 OK +++
答案 2 :(得分:0)
在你给出的例子中,状态总是最后两个字符citystate [-2:],而city总是一直到那个减去尾随空格和逗号。
我将添加一个额外的例子“New York,NY”。
citystates = ["Greenwich, CT", "Greenwich,CT", "Greenwich CT", "New York, NY"]
for citystate in citystates:
city, state = citystate[:-3].strip(", "), citystate[-2:]
print "City '%s' is in '%s' % (city, state)
运行时给出:
City 'Greenwich' is in 'CT'
City 'Greenwich' is in 'CT'
City 'Greenwich' is in 'CT'
City 'New York' is in 'NY'
答案 3 :(得分:0)
def cityState(s):
# split on last comma
res = s.rsplit(',', 1)
if len(res)==2:
return res[0].strip(), res[1].strip()
# otherwise split on last non-trailing whitespace
res = s.rsplit(None, 1)
if len(res)==2:
return res[0].lstrip(), res[1]
# otherwise split failed
raise ValueError("Could not split {0} into city, state".format(s))
for city_state in ["Greenwich, CT", "Greenwich,CT", "Greenwich CT", "New York, NY"]:
print "City '{0}' is in '{1}'".format(*cityState(city_state))
结果
City 'Greenwich' is in 'CT'
City 'Greenwich' is in 'CT'
City 'Greenwich' is in 'CT'
City 'New York' is in 'NY'