Question

我有以下用于抓取网络表单的脚本：

#!/usr/bin/env python

"""
Python script for searching firms on http://www.adviserinfo.sec.gov/IAPD/Content/Search/iapd_Search.aspx

Assumes that the input Excel file is in the same directory as this script.
"""

import os
import re
import sys
import django
import mechanize

from bs4 import BeautifulSoup
from xlrd import open_workbook

XLSX_FILE = 'Legal Names.xlsx'
IAPD_URL = 'http://www.adviserinfo.sec.gov/IAPD/Content/Search/iapd_Search.aspx'

#------------------------------------------------------------------------------------------
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), 'scraper/')))
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), 'scraper/scraper/')))

os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
django.setup()

from django.core.exceptions import ObjectDoesNotExist
from custom_scraper.models import *
#------------------------------------------------------------------------------------------

def legal_names(file=XLSX_FILE):
    '''
    Read in legal names from the excel spreadsheet with the assumption
    that names to be searched for are in column 1. Skip row 1 header
    '''
    wb = open_workbook(file)
    s = wb.sheets()[0]

    col = 1
    for row in range(s.nrows)[1:]:
        name = s.cell(row, col).value
        if type(name) == int:
            continue

        yield name

class IapdScraper(object):
    def __init__(self):
        self.br = mechanize.Browser()
        self.br.addheaders = [('User-agent', 
                               'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7')]


    def scrape(self, q):
        '''
        Search for a Firm in the IAPD database by name
        '''
        def select_form(form):
            return form.attrs.get('id', None) == 'aspnetForm'

        try:
            firm = IapdFirm.objects.get(query_name=q)
        except ObjectDoesNotExist:
            firm = IapdFirm(query_name=q)
            firm.save()
        else:
            if firm.checked:
                return

        self.br.open(IAPD_URL)
        self.br.select_form(predicate=select_form)
        self.br.form['ctl00$cphMainContent$ucUnifiedSearch$rdoSearchBy'] = ['rdoOrg']
        self.br.submit()

        self.br.select_form(predicate=select_form)
        self.br.form['ctl00$cphMainContent$ucUnifiedSearch$txtFirm'] = q.encode('utf8')
        self.br.submit()

        s = BeautifulSoup(self.br.response().read())
        r = re.compile(r'^ctl\d+_cphMainContent_grOrgResults$')
        t = s.find('table', id=r)

        if not t: # Not found
            print 'Not Found'
            firm.checked = True
            firm.save()
            return 

        tr = t.findAll('tr', recursive=False)
        tr = tr[2:-1] # Skip records-per-page header/footer and title header

        for row in tr:
            td = row.findAll('td', recursive=False)

            firm.legal_name = td[0].b.text.strip()
            firm.other_name = td[1].text.strip()
            firm.sec_number = td[2].text.strip()
            firm.address = td[3].text.strip()
            firm.checked = True
            firm.save()

if __name__ == '__main__':
    scraper = IapdScraper()

    for name in legal_names():
        print '\nq=%s' % name
        scraper.scrape(q=name)

但是，当我在venv中运行脚本时，出现以下错误：

c:\Users\bal2155\venv\Scripts>scraper.py
Traceback (most recent call last):
  File "C:\Users\bal2155\venv\Scripts\scraper.py", line 26, in <module>
    django.setup()
  File "C:\Python27\lib\site-packages\django\__init__.py", line 20, in setup
    configure_logging(settings.LOGGING_CONFIG, settings.LOGGING)
  File "C:\Python27\lib\site-packages\django\conf\__init__.py", line 46, in __ge
tattr__
    self._setup(name)
  File "C:\Python27\lib\site-packages\django\conf\__init__.py", line 42, in _set
up
    self._wrapped = Settings(settings_module)
  File "C:\Python27\lib\site-packages\django\conf\__init__.py", line 98, in __in
it__
    % (self.SETTINGS_MODULE, e)
ImportError: Could not import settings 'settings' (Is it on sys.path? Is there a
n import error in the settings file?): No module named settings

我清楚地知道这必须与我如何设置venv或我如何下载django有关。我检查了后者的文档，我不确定myproject.settings扩展名的含义。这里的任何帮助将不胜感激。

Django导入设置错误

0 个答案: