import requests
from bs4 import BeautifulSoup
url ='https://myip.ms/browse/blacklist/Blacklist_IP_Blacklist_IP_Addresses_Live_Database_Real-time'
response = requests.get(url)
data = response.text
soup = BeautifulSoup(data, 'html.parser')
ipList = soup.find("td",{"class": "row_name"})
rows = ipList.findAll('td')
for tr in rows:
cols = td.findAll('td')
if len(cols) > 0:
print (ip.cols.text.strip())
我正在使用BeautifulSoup进行网页抓取,并且遇到了一些问题。我可以知道为什么我无法从数据库表中抓取IP地址。如何将结果输出到CSV文件?
答案 0 :(得分:0)
问题是您将ipList使用#include <lsan_interface.h>
...
#ifdef __SANITIZE_ADDRESS__
__lsan_do_leak_check();
__lsan_disable();
#endif
来获取一个ip,可以使用-O0
或find()
来返回ip数组。
findall()
select
中的输出
import requests
from bs4 import BeautifulSoup
url ='https://myip.ms/browse/blacklist/Blacklist_IP_Blacklist_IP_Addresses_Live_Database_Real-time'
response = requests.get(url).content
soup = BeautifulSoup(response, 'html.parser')
ipList = soup.select(".row_name")
with open('ip_output.csv', 'w') as f:
for ips in ipList:
f.write(ips.find('a').text + '\n')