芹菜执行任务是抛出错误

时间:2014-12-29 14:20:20

标签: python django django-celery celery-task

我在python代码中有两个方法调用,它在celery框架中执行Django。

def check_prefeteching(analysis_id, url):
    parser = body_parser.Extractor()
    tags = ['dns-prefetch', 'prefetch']
    feed = parser.start_parser(analysis_id, url)
    result = parser.check_tags(tags, feed)
    return result

def check_feo_optimizations(analysis_id, url):
    return FEO_processor.FEOProcessor().process_feo_debug_output(analysis_id, url)

这是我的body_parser模块,它在check_prefeteching方法中被调用。

"""
Module to encapsulate body parsing.
"""

from urlparse import urlparse
from bs4 import BeautifulSoup,Comment
import os
import shutil
from hct import utils

PAGE_SOURCE_CMD = 'phantomas %s --page-source'
FEO_PAGE_SOURCE_CMD = 'phantomjs  RequestURL.js %s > body.html'


class Extractor(object):
    """
    This file provides utility to do body parsing of an url.
    """

    def __init__(self):
        pass

    def check_tags(self, tags, feed):
        """
        Method: Method to handle the tags as encountered during parsing.
                Also contains the business logic to check to prefetch and
                preresolve DNS eanblement

        Args: Takes the tag and its attributes as a list

        Returns: A dictionary of tags and their values.
        """
        result = {}
        for tag in tags:
            if len(feed.select('link[rel='+tag+']')) > 0:
                result['link'] = tag
        return result

    def get_generated_html(self, url, has_headers):
        """
        Method: Method to get the generated HTML content from Phantomas.

        Args: Takes the url as an argument for which to get the HTML content.
              hasHeaders defaulted to false for no headers.

        Returns: Nothing.
        """
        if not urlparse(url).scheme:
            url = 'http://'+url
        if has_headers == False:
            command = PAGE_SOURCE_CMD % url
        else:
            command = FEO_PAGE_SOURCE_CMD % url
        utils.execute_command(command).communicate()


    def create_analysis_folder(self, analysis_id, has_headers):
        """
        Method: To create a folder to fetch and analyse the HTML based on
                analysis ID.

        Args: Takes the Analsis ID as an argument.

        Returns: The path to the created folder.
        """

        analysis_id = str(analysis_id)
        path = None
        if not os.path.exists(analysis_id):
            os.makedirs(analysis_id)
        os.chdir(analysis_id)
        if has_headers == False:
            path = os.getcwd() + '/html'
            print path
            return path
        else:
            print "coming here"
            os.makedirs('html')
            os.chdir('html')
            shutil.copy("../../hct/data_processors/RequestURL.js", os.getcwd()) 
            return os.getcwd()

    def start_parser(self, analysis_id, url, hasHeaders=False):
        """
        Method: Method to start the parser.

        Args: Analsyis ID and URL as an argument.

        Returns: Nothing.
        """

        feed = None
        path = self.create_analysis_folder(analysis_id, hasHeaders)
        self.get_generated_html(url, hasHeaders)
        for root, dirs, files in os.walk(path):
            for file in files:
                if file.endswith('.html'):
                    feed = BeautifulSoup(open(path + '/' +file).read())
                    if hasHeaders:
                        os.chdir('..')
                    shutil.rmtree(os.getcwd())

            break
        return feed

这是我的FEOProcessor,它是在check_feo_optimizations方法中调用的。

from body_parser import Extractor
import re

class FEOProcessor(object):

    CHECKS = [
        ('Standard JavaScript Inlining Optimization', ('EMBED_JAVASCRIPT',), 'check_js_inlining'),
        ('HTML5 Advanced Cache', ('JAVASCRIPT_HTML5_CACHE', 'CSS_HTML5_CACHE'), 'check_html5_advanced_cache'),
        ('Cookieless Resource Domain', ('RENAME_JAVASCRIPT', 'RENAME_CSS'), 'check_cookieless_resource_domain'),
        ('Minificatiopn of JS', ('MINIFY_JAVASCRIPT',), 'check_js_minifaction'),
        ('File Versioning', ('RENAME_JAVASCRIPT', 'RENAME_IMAGE', 'RENAME_CSS'), 'check_file_versioning'),
        ('Small Image Embedding', ('EMBED_IMAGE',), 'check_small_image_embedding'),
        ('Responsive Image Loading', ('RESPONSIVE_IMAGES',), 'check_responsive_image_loading'),
        ('Asynchronous JS and CSS Loading', ('ASYNC_JAVASCRIPT',), 'check_async_js_and_css_loading'),
        ('JS Pre-Execution', ('PRE_EXECUTE_JAVASCRIPT',), 'check_js_pre_execution'),
        ('EDGESTART', ('EDGESTART',), 'check_edgestart'),
        ('Invoke Click OnTouch', ('BlzFastClick',), 'check_click'),
        ('Cellular Connection Keep-Alive', ('blzEnableMobileHeartbeat',), 'check_cell'),
    ]

    def __init__(self):
        self.parser = Extractor()
        self.result = dict((k, 'Not Applied') for k,_,_ in self.CHECKS)

    for _, keys, name in CHECKS:
        locals()[name] = lambda self, result, _keys=keys: all(result.get(k, 0)>0 for k in _keys)


    def process_feo_debug_output(self, analysis_id, url):
        feed = self.parser.start_parser(analysis_id, url, True)
        result = self.get_feo_tags(feed)
        for name, _, func in self.CHECKS:
            self.result[name] = ('Not Applied','Applied')[getattr(self,func)(result)]
        return self.result

    def get_feo_tags(self, feed):
        result = {}
        tag_list = re.findall(r'(?:TextTransApplied):\s*((?:(?:[A-Z]+(?:_[A-Z\d]+)+)?\(\d+\)\s*(?:,\s*|;))*)', str(feed))
        for tag in tag_list:
            for element in tag.split(","):
                index = element.index('(')
                if element[:index].strip():
                    result[element[:index].strip()] = (element.split("(")[1].rstrip(");"))
        return result

    def check_edgestart(self, result):
        return 1 if 'EDGESTART' in result.keys() else 0

    def check_click(self, result):
        return 1 if 'BlzFastClick' in result.keys() else 0

    def check_cell(self, result):
        return 1 if 'blzEnableMobileHeartbeat' in result.keys() else 0

现在我的两个方法都是从一个url解析html并根据传递的标志创建一个分析文件夹。当我单独打电话时,它们都能很好地工作。但是当我用芹菜后端一个接一个地打电话给他们时如下

result[RULES.FEO_CHECKS] = check_feo_optimizations(analysis_id, url)
result[RULES.PREFETCH] = check_prefeteching(analysis_id, url)
return result

它会抛出一个错误。

 [2014-12-29 14:08:20,776: ERROR/MainProcess] Task hct.tasks.analyse[d23b8b39-397a-4939-9d98-02b415e55ec1] raised unexpected: OSError(2, 'No such file or directory')
Traceback (most recent call last):
  File "/Library/Python/2.7/site-packages/celery/app/trace.py", line 240, in trace_task
    R = retval = fun(*args, **kwargs)
  File "/Library/Python/2.7/site-packages/celery/app/trace.py", line 437, in __protected_call__
    return self.run(*args, **kwargs)
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/tasks.py", line 42, in analyse
    rp.do_analysis()
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/rule_processor.py", line 41, in do_analysis
    return self.lite_analysis()
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/rule_processor.py", line 69, in lite_analysis
    result = rules.parse_har(har_json, self.rules, self.analysis.url, self.analysis.id)
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/rules.py", line 472, in parse_har
    result[RULES.PREFETCH] = check_prefeteching(analysis_id, url)
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/rules.py", line 1066, in check_prefeteching
    feed = parser.start_parser(analysis_id, url)
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/body_parser.py", line 93, in start_parser
    path = self.create_analysis_folder(analysis_id, hasHeaders)
  File "/Users/rokumar/SiteAnalysisGit/Src/hct/hct/data_processors/body_parser.py", line 70, in create_analysis_folder
    os.makedirs(analysis_id)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/os.py", line 157, in makedirs
    mkdir(name, mode)
OSError: [Errno 2] No such file or directory: '13'

我不理解为什么我的第二个方法调用无法创建目录,最重要的是为什么makedirs()调用throw No such file or directory: '13'

任何帮助?

0 个答案:

没有答案