数据库连接问题

时间:2018-03-19 20:55:56

标签: python selenium peewee

我正在建立一个站点,从多个来源提取数据,将其合并到一个数据库中,然后使用这些数据计算每个受尊重团队的数量。我对python,peewee和Selenium都很新。

以下是我的所有代码:

from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup 
from models import *

import os # File I/O
import time
import shutil
import glob
import configparser
config_parser = configparser.ConfigParser()
config_parser.read("config.ini")

#Var
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
download_dir = os.path.abspath('./downloads/')
ids = ['XXXXXX', 'DDDDDD', 'WWWWWW', 'AAAAAAA', 'VVVVVV', 'FFFFFFF']
filelist = glob.glob(download_dir + '/*.html')
dbpath = ('./db')

def store_data(download_dir):
    # if dbpath == ' ':
    #   print(dbpath)
    # else:
    #   print('DataBase exists, will now attempt to remove!')
    #   os.remove('./db/budget.db')
    #   print('Removed')
    database.connect()
    database.create_tables([Charge], safe=False)
    database.close()
    for root, dir, files in os.walk(download_dir):
        for file in files:
            print(file)

            file_markup = ''
            with open(os.path.abspath(os.path.join(download_dir, file)), 'r') as html:
                file_markup = html.read()
            if file_markup == '':
                print('ERROR: File was not read')

            print('Reading {0} into BS4'.format(file))
            soup = BeautifulSoup(file_markup, 'html.parser')
            print('File parsed')

            data = []
            table = soup.find('table') #, attrs={'class':'lineItemsTable'}

            # First 56 tr's are headings 
            rows = table.find_all('tr') # 18th row is header row 
            cols = rows[18].find_all('td')
            cols = [ele.text.strip() for ele in cols]

            #print('cols:') 
            #print(cols) 
            database.connect()
            for row in rows[19:]:
                d = row.find_all('td')
                d = [ele.text.strip() for ele in d]
                data.append([ele for ele in d if ele]) # Get rid of empty values 
                Charge.create(pmt_id=(d[1]),
                    prism_id=(d[2]),
                    owner=file.split('.')[0],
                    date=d[11],
                    reg_hours=float(d[17]),
                    ot_hours=float(d[18]),
                    rate=int(d[42]),
                    resource=(d[14]),
                    pmt_status=(d[24]),
                    resource_status=(d[15]))
            database.close()

def load_home_page(driver):
    driver.get('https://intra.att.com/cmpm/main.cfm')
    elem = driver.find_element_by_css_selector('input[value="Show Options"]')
    elem.click()
    elem = driver.find_element_by_css_selector('input[value="Enable Link"]')
    elem.click()
    elem = driver.find_element_by_css_selector('input[name="successOK"]')
    elem.click()

def type_supervisor_id(driver, supervisor):
    elem = driver.find_element_by_css_selector('input[name="sattuid"]')
    elem.clear()
    elem.send_keys(supervisor)

def select_date(driver, date):      #NEED IT TO SELECT STARTDATE AND CHANGE IT TO JAN 2018 below works as of Feb 7th
    for date in months:     
        select = Select(driver.find_element_by_name('startdate'))
        select.select_by_visible_text('Jan 2018')

def results_display(driver,results):
    elem = driver.find_element_by_css_selector('select[name="DontDisplay"]')
    elem.click()
    ActionChains(driver).key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
    driver.find_element_by_css_selector('img[src="/cmpmrptstatic/images/right.jpg"]').click()

def fetch_data():
    opts = webdriver.ChromeOptions()
    print('Download Directory: {0}'.format(download_dir))
    prefs = {'download.default_directory' : download_dir}
    opts.add_experimental_option('prefs', prefs)    
    print('Opening Chrome')
    driver = webdriver.Chrome(chrome_options=opts)
    print('Authenicating')
    load_home_page(driver)
    time.sleep(2)
    print('Load CMPM home')
    print('Opening CMPM Datamart reports')
    print('elem clicked')
    print('Attemting to switch to frame 0')
    driver.switch_to_frame('main')
    driver.find_element_by_css_selector('button[name="btndm"]').click()

#def pop_up():
    print('New window should be opening')
    wait_time = 60
    try:
        for handle in driver.window_handles:
            driver.switch_to_window(handle)
        print('Waiting for window to load, waiting {0} seconds'.format(wait_time))
        elem = WebDriverWait(driver, wait_time).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[name="btnCDH006"]')))
        elem = driver.find_element_by_css_selector('button[name="btnCDH006"]')
        elem.click()
        print('Found elem: {0}'.format(elem))
    except Exception:
        print('Something went wrong')
    for id in ids:
        print('Fililng out form for: ' +id)
        type_supervisor_id(driver, id)
        select_date(driver, months)
        results_display(driver,results_display)
        driver.find_element_by_css_selector('button[name="btnSubmit"]').click()
        print('Sleeping for 5s')
        time.sleep(5)
        for root, dir, files in os.walk(download_dir):
            for file in files:
                if file[:2] == 'XL':
                    print('Renaming {0} to {1}'.format(file, id))
                    os.rename(os.path.abspath(os.path.join(download_dir, file)), os.path.abspath(os.path.join(download_dir, id+'.html')))
        print('Waiting for window to load, waiting {0} seconds'.format(wait_time))
        elem = WebDriverWait(driver, wait_time).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[name="btnSubmit"]')))
    print('All Data from CMPM has been downloaded')
    driver.quit()


    # TODO: After we execute all our loading code, be sure to delete downloads/
def clr_dir():
    if download_dir == '.html':
        print(download_dir + 'is empty')
    else:
        print('download_dir is not empty! Will now attempt to delete all files')
        for file in filelist:
            os.remove(file)
        print('All files have been removed from ' + download_dir)
#clr_dir()          --- WORKS ---
#fetch_data()       --- WORKS ---
store_data('/downloads')

现在这是我从控制台得到的错误:

C:\Users\daeyiele\Documents\NetBeansProjects\BudgetHome>python cmpm.py
Traceback (most recent call last):
  File "cmpm.py", line 165, in <module>
    store_data('/downloads')
  File "cmpm.py", line 36, in store_data
    database.connect()
  File "C:\Users\daeyiele\AppData\Local\Programs\Python\Python36-32\lib\site-packa
ges\peewee.py", line 2439, in connect
    self._state.set_connection(self._connect())
  File "C:\Users\daeyiele\AppData\Local\Programs\Python\Python36-32\lib\site-packa
ges\peewee.py", line 2666, in _connect
    **self.connect_params)
TypeError: 'threadlocals' is an invalid keyword argument for this function

C:\Users\daeyiele\Documents\NetBeansProjects\BudgetHome

有什么想法吗?

1 个答案:

答案 0 :(得分:0)

从数据库类的定义中删除threadlocals参数。 Peewee 3.x不支持它。回溯告诉你了。