我有一个脚本可以成功运行,抓取数据并保存数据。但是,问题是我不想要所有类型的数据,所以我想使用字典来获取只需要的相关数据。例如,我想要电话号码,电子邮件等,因此我的字典看起来像这样
check_detail_of_vendors = ["line","phone","email","posts","followers","following"]
我要检查已刮除的信息并打印包含关键字的字符串 在字典中描述。只有关闭,我才能获取我的代码。谁能指导正确的方向。
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import csv
import requests
contents =[]
#dictionary
check_detail_of_bad =
["line","phone","email","posts","followers","following"]
save_path = 'bad_mad.txt'
filename = 'link_business_filter.csv'
def check():
for y in script3:
if y == check_detail_of_vendors:
print(check_detail_of_vendors)
return
with open(filename,'rt') as f:
data = csv.reader(f)
for row in data:
links = row[0]
contents.append(links)
copy_html = open('new.json','a')
for link in contents:
url_html = requests.get(link)
print(link)
browser = webdriver.Chrome('chromedriver')
open = browser.get(link)
source = browser.page_source
data = bs(source,"html.parser")
body = data.find('body')
script = body
x_path = '//*[@id="react-root"]/section/main/div'
script2 = browser.find_element_by_xpath(x_path)
script3 = script2.text
string_open = "" + str(check()) + "\n"
copy_html.write(string_open)
#script2.send_keys(keys.COMMAND + 't')
browser.close()
print(script3)