我正在进行日期规范化处理,其中未放入有效格式的日期更改为有效格式。例如
1|1/2002
变为
01-01-2002
和
5|9-20
变为
05-09-0020
分隔符仅限于[ - / |并且任何不遵循的输入都已被排除在列表之外。
re.sub(pattern,repl,string,count = 0,flags = 0)
我查看了库示例,但实际上找不到任何有助于解析数据的内容。
>>> def dashrepl(matchobj):
... if matchobj.group(0) == '-': return ' '
... else: return '-'
>>> re.sub('-{1,2}', dashrepl, 'pro----gram-files')
'pro--gram files'
>>> re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)
'Baked Beans & Spam'
答案 0 :(得分:4)
使用捕获组:
>>> import re
>>>
>>> pattern = re.compile(r'\b(\d{1,2})\D(\d{1,2})\D(\d{1,4})\b')
>>> def repl(match):
... return '{:0>2}-{:0>2}-{:0>4}'.format(*match.groups())
...
>>> pattern.sub(repl, '1|1/2002')
01-01-2002
>>> pattern.sub(repl, '5|9-20')
05-09-0020
答案 1 :(得分:1)
import re
def format_date_string(data):
zmap = {0:2, 1:2, 2:4}
return "-".join(p.zfill(zmap[i]) for i,p in enumerate(re.split("[|/-]",data)))
print(format_date_string("1|1/2002")) # 01-01-2002
print(format_date_string("5|9-20")) # 05-09-0020
答案 2 :(得分:0)
import re
def convert_format(d):
m = re.match('(\d+).(\d+).(\d+)', d)
if m:
m = [int(x) for x in m.groups()]
return '%0.2d-%0.2d-%0.4d' % (m[0], m[1], m[2])
else:
return False
for d in ('1|1/2002', '5|9-20'):
print convert_format(d)
输出:
01-01-2002
05-09-0020
OR:
import re
def convert_format(d):
m = re.findall('\d+', d)
if m:
m = [int(x) for x in m]
return '%0.2d-%0.2d-%0.4d' % (m[0], m[1], m[2])
else:
return False
for d in ('1|1/2002', '5|9-20'):
print convert_format(d)