我需要收集所有网络流量(“网络”标签)。我正在使用python,硒和firefox驱动程序以及browsermob-proxy。
我的代码有问题,因为我有一个域列表并且我的脚本永无止境。
#!/usr/bin/env python2.7
import requests
from time import sleep
import json
import io
import subprocess
import os
import psutil
from browsermobproxy import Server
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from datetime import datetime
process = subprocess.Popen("/usr/bin/php /var/www/html/urls.php",stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
response = process.stdout.read().strip()
data = json.loads(response)
for p in data['projects']:
url = p['url']
id = p['id']
# Start Proxy Server
browsermob_proxy_filepath = "/home/virtualenvironment/selenium-capturetraffic/browsermob-proxy-2.1.4/bin/browsermob-proxy"
server = Server(browsermob_proxy_filepath)
server.start()
proxy = server.create_proxy()
# Setup firefox profile: add the proxy
profile = webdriver.FirefoxProfile()
profile.set_preference('permissions.default.stylesheet', 2)
profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so','false')
profile.set_preference('browser.cache.disk.enable', False)
profile.set_preference('browser.cache.memory.enable', False)
profile.set_preference('browser.cache.offline.enable', False)
profile.set_preference('network.cookie.cookieBehavior', 2)
profile.set_proxy(proxy.selenium_proxy())
options = Options()
options.add_argument('--headless')
# Start the driver
sleep(10)
driver = webdriver.Firefox(options=options,firefox_profile=profile,executable_path='/home/virtualenvironment/geckodriver')
# Create HAR and get website
proxy.new_har("req",options={'trustAllServers':True,'captureContent':True})
try:
print "%s: Go %s"%(datetime.now(), url)
driver.get(url)
sleep(30)
print "%s: Finish %s"%(datetime.now(), url)
har_json = json.dumps(proxy.har, ensure_ascii=False, indent=4, separators=(',', ': '))
# Save '.HAR' file
harFile = '/var/www/html/temp/'+ id + '.har'
with io.open(harFile, mode='wt', buffering=1, encoding='utf8', errors='backslashreplace', newline=None) as output_har_f:
output_har_f.write(unicode(har_json))
os.chmod(harFile, 0o777)
finally:
#driver.quit()
parent_pid = server.process.pid
parent = psutil.Process(parent_pid)
for child in parent.children(recursive=True):
child.kill()
parent.kill()
server.stop()
driver.quit()
为什么我的代码永不结束?
当我的Firefox访问域时,我只需要捕获所有请求即可。