无法使用python更新MYSQL数据库

时间:2019-07-04 15:19:48

标签: python mysql multithreading python-2.7

问题:脚本成功运行,没有任何错误,但是Mysql数据库不会使用脚本结果进行更新

我添加了db.autocommit(True)行来进行每次提交,但仍然失败。

环境:python2.7,MySQL

我也尝试过在每个带有锁的语句执行后手动输入db.commit(),但也会失败。

'''
Specifications:
A multi-Threaded Web Spider that:
Takes website and depth of spidering as input
Downloads the HTML files only
Inserts the HTML into an MYSQL database
It also parses the forms on each page and inserts into db with form details

''' 

import mechanize
import sys
import threading
import MySQLdb

lock  = threading.Lock()

def Parse_Forms(target,curr,br):
    lock.acquire()
    br.open(target)
    curr.execute("use web;");
    response = []
    for forms in br.forms():
        i= 0
        action =  forms.action
        method = forms.method
        d = dict()
        d['method'] = method
        d['name'] = action
        br.select_form(nr=i)
        for control in forms.controls:

            if control.value == '':
                d[control.name] = "NULL"
            elif type(control.value) is list:
                    d[control.name] = control.value[0]
            else:
                d[control.name] = control.value

        for j in d: 
            if str(j) == 'login' or str(j) == 'name' or str(j) == 'password' or str(j) == 'method':     #These are only the valid names that has to be inserted in MYSQL db
                query = "INSERT INTO `forms` ("+str(j)+") values (\""+str(d[j])+"\");"
                curr.execute(query)

        print "Query Executed!" 
        i=i+1
        response.append(br.submit())
        lock.release()

def getHTMLfiles(target,curr):

    br = mechanize.Browser()
    headers = [('User-Agent','Firefoxy'),]
    br.addheaders = headers
    br.open(target)
    for i in range(0,depth):
        for link in br.links():
            if ".hmtl" in link.url:
                print "Downloading File: "+link.url
                os.system("wget "+link.url+" -P Files/")
                curr.execute("INSERT INTO `pages` (name) values ("+ "\"link.url\");")
            if link.url[0] == '/' and not '.' in link.url:     #Indicates that file belongs to server not some external link and is a directory
                Parse_Forms(target+link.url,curr,br,db)

if __name__ == "__main__":

    db = MySQLdb.connect(host="localhost",user="****",password="*****",db="web") 
    #There are 2 db one called pages that saves HTML file url and one forms that saves form parameteres
    db.autocommit(True)
    curr = db.cursor()
    target = sys.argv[1]
    depth = int(sys.argv[2])
    threads = []

    for workers in range(10):
        t = threading.Thread(target = getHTMLfiles,args = (target,curr,))
        t.daemon = True
        t.start()
        threads.append(t)

    for thread in threads:
        thread.join()

该脚本可以正常运行,但应该可以更新MySQL数据库。

注意:似乎一切正常,没有MYSQL错误(我的意思是没有错误)

0 个答案:

没有答案