使用import re [regex]仅提取数字并将逗号放在中间

时间:2019-06-28 09:09:07

标签: regex beautifulsoup

我有一个字符串,如下所示,它是从beautifulsoup获得的。我想要附加到商店的数字,例如0136 37之类的数字表示鞋子的尺码。我的理想输出如下。我该如何实现?link

from bs4 import BeautifulSoup
import re

text = """b'{"36":{"aarhus":"0","pilestraede":"0","klosterstraede":"1"},
"37":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"37,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"38":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"39":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"39,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"40":{"aarhus":"0","pilestraede":"0","klosterstraede":"1"},
"40,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"1"},
"41,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"42":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"}}'"""

no_stock = re.findall('"aarhus":"(\-?\.?\d+)"|"pilestraede":"(\-?\.?\d+)"|"klosterstraede":"(\-?\.?\d+)', text)
print(no_stock)

打印(尺寸):

[('0', '', ''), ('', '0', ''), ('', '', '1'), ('0', '', ''), ('', '0', ''), ('', '', '0'), ('0', '', ''), ('', '0', ''), ('', '', '0'), ('0', '', ''), ('', '0', ''), ('', '', '0'), ('0', '', ''), ('', '0', ''), ('', '', '0'), ('0', '', ''), ('', '0', ''), ('', '', '0'), ('0', '', ''), ('', '0', ''), ('', '', '1'), ('0', '', ''), ('', '0', ''), ('', '', '1'), ('0', '', ''), ('', '0', ''), ('', '', '0'), ('0', '', ''), ('', '0', ''), ('', '', '0')]

我想要的是:

[('0','0','1','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','1','0','0','1','0','0','0','0','0','0')]

1 个答案:

答案 0 :(得分:1)

似乎您以字节为单位从Javascript / JSON中获取了此数据,并对其进行了str()处理,因此您需要执行两次literal_eval()来对其进行解码。抓取数据后,我会立即建议运行literal_eval(),而无需进行str()中间步骤。

from itertools import chain
from ast import literal_eval

text = """b'{"36":{"aarhus":"0","pilestraede":"0","klosterstraede":"1"},
"37":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"37,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"38":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"39":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"39,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"40":{"aarhus":"0","pilestraede":"0","klosterstraede":"1"},
"40,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"1"},
"41,5":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"},
"42":{"aarhus":"0","pilestraede":"0","klosterstraede":"0"}}'"""

d = literal_eval(text.replace('\n', ''))
d = literal_eval(d.decode('utf-8'))

l = chain.from_iterable([v.values() for k, v in d.items()])
print([*l])

打印:

['0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '0', '0', '0']