在比较两个游标之间的数据时无法更新表

时间:2015-11-01 17:16:53

标签: python-2.7 mysql-python

我想比较一下表的行,看看它们是否相等, 我做的是创建2个游标 1.从表中选择链接= yes的链接 2.从表中选择链接=链接的链接=否 使用for循环和if语句我想比较访问过的链接与未访问过的链接是否相等,如果它们相等,那么将该链接更新为" YES" 还没有完成(我的目的是在所有链接都被访问并且所有标记为YES或光标为"其中被访问=否"返回空值时退出程序) 我的部分代码:

import sys
import MySQLdb
import urllib
import urlparse
import re
import HTMLParser
from HTMLParser import HTMLParseError
from bs4 import BeautifulSoup

    mydb = MySQLdb.connect(host='localhost',
    user='root',
    passwd='shailang',
    db='mydb')
    cursor = mydb.cursor()

    def process2(url):
       flag=0
       cursor.execute("SELECT links FROM DATA_urls where visited = 'Ye'")
       Yes_rows = cursor.fetchall()
       cursor.execute("SELECT links FROM DATA_urls where visited = 'No'")
       No_rows = cursor.fetchall()
       for No_links in No_rows: 
        print 'NOOOOOOOOOO'
        k= No_links
        print k
        for Yes_links in Yes_rows:
            print "YESSSSSSSSSSSSSS"
                k1 = Yes_links
            print k1



            if k1 == k :
                print 'EQUALS'
                cursor.execute("UPDATE DATA_urls SET visited = 'Ye' where links = %s",k)
            mydb.commit()
def process(url):
    proxies = {"http":"http://proxy4.nehu.ac.in:3128",
        "https":"https://proxy4.nehu.ac.in:3128"}
    page = urllib.urlopen(url,proxies=None)
    text = page.read()
    page.close()
    soup = BeautifulSoup(text)
    file=open('s.txt','w') 
    cursor.execute("INSERT INTO DATA_urls(links,parent,visited) VALUES(%s,'NULL','Ye')",url)
    for tag in soup.findAll('a', href=True):
    tag['href'] = urlparse.urljoin(url, tag['href'])
        print tag['href']
        if re.match(ur'(?i)\b((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))',tag['href']):
        cursor.execute("INSERT INTO DATA_urls(links,parent,visited) VALUES(%s,%s,'No')", (tag['href'],url))
        file.write('\n')
        file.write(tag['href'])
            #file.close()
   # cursor.execute("SELECT * FROM url")
   # rows = cursor.fetchall()
    mydb.commit()
    process2(1) 


def main():
    if len(sys.argv) == 1:
    print 'No url !!'      
        sys.exit(1)
    for url in sys.argv[1:]:
        process(url)

main()

我没有错误,但我的数据库中没有更新任何内容 我的桌子DESC:

+---------+---------------+------+-----+---------+-------+
| Field   | Type          | Null | Key | Default | Extra |
+---------+---------------+------+-----+---------+-------+
| links   | varchar(1000) | YES  |     | NULL    |       |
| parent  | varchar(1000) | YES  |     | NULL    |       |
| visited | varchar(2)    | YES  |     | NULL    |       |
+---------+---------------+------+-----+---------+-------+

1 个答案:

答案 0 :(得分:0)

将其更改为      mydb = MySQLdb.connect(host =&#39; localhost&#39;,       用户=&#39;根&#39 ;,       的passwd =&#39; shailang&#39 ;,       分贝=&#39; MYDB&#39;)       cursor = mydb.cursor()

  def process2(url):
  flag=0
  cursor.execute("SELECT links FROM DATA_urls where visited = Ye")
 Yes_rows = cursor.fetchall()
 cursor.execute("SELECT links FROM DATA_urls where visited = No")
 No_rows = cursor.fetchall()
 count = len(No_rows)
 for i in range(0, count):
  print 'NOOOOOOOOOO'
  k= No_links
  print k
  for j in range (i+1, count):
    print "YESSSSSSSSSSSSSS"
        k1 = Yes_links
    print k1
    if k1 == k :
        print 'EQUALS'
        cursor.execute("UPDATE DATA_urls SET visited =
    'Ye' where links = %s",k)   

help