我使用正则表达式
从html表中抓取了2组数据数据:
override func prepareForSegue(segue: UIStoryboardSegue, sender: AnyObject?) // func for popover
{
if segue.identifier == "popoverView"
{
let vc = segue.destinationViewController
vc.preferredContentSize = CGSize(width: 200, height: 300)
let controller = vc.popoverPresentationController
controller?.delegate = self
//you could set the following in your storyboard
controller?.sourceView = self.view
controller?.sourceRect = CGRect(x:CGRectGetMidX(self.view.bounds), y: CGRectGetMidY(self.view.bounds),width: 315,height: 230)
controller?.permittedArrowDirections = UIPopoverArrowDirection(rawValue: 0)
}
}
正则表达式:
<div class = "info">
<div class="name"><td>random</td></div>
<div class="hp"><td>123456</td></div>
<div class="email"><td>random@mail.com</td></div>
</div>
<div class = "info">
<div class="name"><td>random123</td></div>
<div class="hp"><td>654321</td></div>
<div class="email"><td>random123@mail.com</td></div>
</div>
所以使用正则表达式我可以取出
matchname = re.search('\<div class="name"><td>(.*?)</td>' , match3).group(1)
matchhp = re.search('\<div class="hp"><td>(.*?)</td>' , match3).group(1)
matchemail = re.search('\<div class="email"><td>(.*?)</td>' , match3).group(1)
所以在将这组数据保存到我的数据库后,我想保存下一组如何获取下一组数据?我尝试使用findall然后插入我的数据库,但一切都在1行。我需要将数据放在由set设置的数据库中。
python的新手请评论哪个部分不清楚会尝试编辑
答案 0 :(得分:2)
您不应该使用正则表达式解析HTML。这只是一团糟,用BS4做。以正确的方式做到:
soup = BeautifulSoup(match3, "html.parser")
names = []
allTds = soup.find_all("td")
for i,item in enumerate(allTds[::3]):
# firstname hp email
names.append((item.text, allTds[(i*3)+1].text, allTds[(i*3)+2].text))
为了回答问题,我想我会包含一个你永远不应该使用的可怕丑陋的正则表达式。特别是因为它是html,所以不要使用正则表达式来解析html。 (请不要使用此)
for thisMatch in re.findall(r"<td>(.+?)</td>.+?<td>(.+?)</td>.+?<td>(.+?)</td>", match3, re.DOTALL):
print(thisMatch[0], thisMatch[1], thisMatch[2])
答案 1 :(得分:1)
正如@Racialz指出的那样,你应该研究using HTML parsers instead of regular expressions。
让我们采用BeautifulSoup
以及@Racialz,但建立一个更强大的解决方案。找到所有info
元素并找到输出中生成字典列表的所有字段:
from pprint import pprint
from bs4 import BeautifulSoup
data = """
<div>
<div class = "info">
<div class="name"><td>random</td></div>
<div class="hp"><td>123456</td></div>
<div class="email"><td>random@mail.com</td></div>
</div>
<div class = "info">
<div class="name"><td>random123</td></div>
<div class="hp"><td>654321</td></div>
<div class="email"><td>random123@mail.com</td></div>
</div>
</div>
"""
soup = BeautifulSoup(data, "html.parser")
fields = ["name", "hp", "email"]
result = [
{field: info.find(class_=field).get_text() for field in fields}
for info in soup.find_all(class_="info")
]
pprint(result)
打印:
[{'email': 'random@mail.com', 'hp': '123456', 'name': 'random'},
{'email': 'random123@mail.com', 'hp': '654321', 'name': 'random123'}]