我在python代码中有两个方法调用,它在celery框架中执行Django。
def check_prefeteching(analysis_id, url):
parser = body_parser.Extractor()
tags = ['dns-prefetch', 'prefetch']
feed = parser.start_parser(analysis_id, url)
result = parser.check_tags(tags, feed)
return result
def check_feo_optimizations(analysis_id, url):
return FEO_processor.FEOProcessor().process_feo_debug_output(analysis_id, url)
这是我的body_parser
模块,它在check_prefeteching
方法中被调用。
"""
Module to encapsulate body parsing.
"""
from urlparse import urlparse
from bs4 import BeautifulSoup,Comment
import os
import shutil
from hct import utils
PAGE_SOURCE_CMD = 'phantomas %s --page-source'
FEO_PAGE_SOURCE_CMD = 'phantomjs RequestURL.js %s > body.html'
class Extractor(object):
"""
This file provides utility to do body parsing of an url.
"""
def __init__(self):
pass
def check_tags(self, tags, feed):
"""
Method: Method to handle the tags as encountered during parsing.
Also contains the business logic to check to prefetch and
preresolve DNS eanblement
Args: Takes the tag and its attributes as a list
Returns: A dictionary of tags and their values.
"""
result = {}
for tag in tags:
if len(feed.select('link[rel='+tag+']')) > 0:
result['link'] = tag
return result
def get_generated_html(self, url, has_headers):
"""
Method: Method to get the generated HTML content from Phantomas.
Args: Takes the url as an argument for which to get the HTML content.
hasHeaders defaulted to false for no headers.
Returns: Nothing.
"""
if not urlparse(url).scheme:
url = 'http://'+url
if has_headers == False:
command = PAGE_SOURCE_CMD % url
else:
command = FEO_PAGE_SOURCE_CMD % url
utils.execute_command(command).communicate()
def create_analysis_folder(self, analysis_id, has_headers):
"""
Method: To create a folder to fetch and analyse the HTML based on
analysis ID.
Args: Takes the Analsis ID as an argument.
Returns: The path to the created folder.
"""
analysis_id = str(analysis_id)
path = None
if not os.path.exists(analysis_id):
os.makedirs(analysis_id)
os.chdir(analysis_id)
if has_headers == False:
path = os.getcwd() + '/html'
print path
return path
else:
print "coming here"
os.makedirs('html')
os.chdir('html')
shutil.copy("../../hct/data_processors/RequestURL.js", os.getcwd())
return os.getcwd()
def start_parser(self, analysis_id, url, hasHeaders=False):
"""
Method: Method to start the parser.
Args: Analsyis ID and URL as an argument.
Returns: Nothing.
"""
feed = None
path = self.create_analysis_folder(analysis_id, hasHeaders)
self.get_generated_html(url, hasHeaders)
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith('.html'):
feed = BeautifulSoup(open(path + '/' +file).read())
if hasHeaders:
os.chdir('..')
shutil.rmtree(os.getcwd())
break
return feed
这是我的FEOProcessor
,它是在check_feo_optimizations
方法中调用的。
from body_parser import Extractor
import re
class FEOProcessor(object):
CHECKS = [
('Standard JavaScript Inlining Optimization', ('EMBED_JAVASCRIPT',), 'check_js_inlining'),
('HTML5 Advanced Cache', ('JAVASCRIPT_HTML5_CACHE', 'CSS_HTML5_CACHE'), 'check_html5_advanced_cache'),
('Cookieless Resource Domain', ('RENAME_JAVASCRIPT', 'RENAME_CSS'), 'check_cookieless_resource_domain'),
('Minificatiopn of JS', ('MINIFY_JAVASCRIPT',), 'check_js_minifaction'),
('File Versioning', ('RENAME_JAVASCRIPT', 'RENAME_IMAGE', 'RENAME_CSS'), 'check_file_versioning'),
('Small Image Embedding', ('EMBED_IMAGE',), 'check_small_image_embedding'),
('Responsive Image Loading', ('RESPONSIVE_IMAGES',), 'check_responsive_image_loading'),
('Asynchronous JS and CSS Loading', ('ASYNC_JAVASCRIPT',), 'check_async_js_and_css_loading'),
('JS Pre-Execution', ('PRE_EXECUTE_JAVASCRIPT',), 'check_js_pre_execution'),
('EDGESTART', ('EDGESTART',), 'check_edgestart'),
('Invoke Click OnTouch', ('BlzFastClick',), 'check_click'),
('Cellular Connection Keep-Alive', ('blzEnableMobileHeartbeat',), 'check_cell'),
]
def __init__(self):
self.parser = Extractor()
self.result = dict((k, 'Not Applied') for k,_,_ in self.CHECKS)
for _, keys, name in CHECKS:
locals()[name] = lambda self, result, _keys=keys: all(result.get(k, 0)>0 for k in _keys)
def process_feo_debug_output(self, analysis_id, url):
feed = self.parser.start_parser(analysis_id, url, True)
result = self.get_feo_tags(feed)
for name, _, func in self.CHECKS:
self.result[name] = ('Not Applied','Applied')[getattr(self,func)(result)]
return self.result
def get_feo_tags(self, feed):
result = {}
tag_list = re.findall(r'(?:TextTransApplied):\s*((?:(?:[A-Z]+(?:_[A-Z\d]+)+)?\(\d+\)\s*(?:,\s*|;))*)', str(feed))
for tag in tag_list:
for element in tag.split(","):
index = element.index('(')
if element[:index].strip():
result[element[:index].strip()] = (element.split("(")[1].rstrip(");"))
return result
def check_edgestart(self, result):
return 1 if 'EDGESTART' in result.keys() else 0
def check_click(self, result):
return 1 if 'BlzFastClick' in result.keys() else 0
def check_cell(self, result):
return 1 if 'blzEnableMobileHeartbeat' in result.keys() else 0
现在我的两个方法都是从一个url解析html并根据传递的标志创建一个分析文件夹。当我单独打电话时,它们都能很好地工作。但是当我用芹菜后端一个接一个地打电话给他们时如下
result[RULES.FEO_CHECKS] = check_feo_optimizations(analysis_id, url)
result[RULES.PREFETCH] = check_prefeteching(analysis_id, url)
return result
它会抛出一个错误。
[2014-12-29 14:08:20,776: ERROR/MainProcess] Task hct.tasks.analyse[d23b8b39-397a-4939-9d98-02b415e55ec1] raised unexpected: OSError(2, 'No such file or directory')
Traceback (most recent call last):
File "/Library/Python/2.7/site-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/Library/Python/2.7/site-packages/celery/app/trace.py", line 437, in __protected_call__
return self.run(*args, **kwargs)
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/tasks.py", line 42, in analyse
rp.do_analysis()
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/rule_processor.py", line 41, in do_analysis
return self.lite_analysis()
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/rule_processor.py", line 69, in lite_analysis
result = rules.parse_har(har_json, self.rules, self.analysis.url, self.analysis.id)
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/rules.py", line 472, in parse_har
result[RULES.PREFETCH] = check_prefeteching(analysis_id, url)
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/rules.py", line 1066, in check_prefeteching
feed = parser.start_parser(analysis_id, url)
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/body_parser.py", line 93, in start_parser
path = self.create_analysis_folder(analysis_id, hasHeaders)
File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/body_parser.py", line 70, in create_analysis_folder
os.makedirs(analysis_id)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/os.py", line 157, in makedirs
mkdir(name, mode)
OSError: [Errno 2] No such file or directory: '13'
我不理解为什么我的第二个方法调用无法创建目录,最重要的是为什么makedirs()
调用throw No such file or directory: '13'
。
任何帮助?