从Queue获取后的Python线程块

时间:2017-12-03 23:58:02

标签: python queue python-multithreading

我有一个我想从同一个域验证的电子邮件列表。首先,我检查域是否允许验证或是否是一个catchall(接受所有电子邮件用户有效)。 E.g假设if ( ($currentpage == $homeurl ) || ($currentpage == $homepage) ) [a@domain.com, b@domain.com],我将检查list是否有效,如果它是从函数返回的话。

如果不使用多个线程来验证列表中的所有电子邮件 - 如果所有失败都返回最后一个项目。如果一个有效且其他线程正在运行,则停止它们并返回有效线程。

foo-fake@domain.com

我的期望是下一个列表项运行。

以上代码的结果是:

from Queue import Queue
import threading
import smtplib

class MailWorker(threading.Thread):
    kill = False
    def __init__(self, in_que, out_que):
        super(MailWorker, self).__init__()
        self.in_que = in_que
        self.out_que = out_que

    def run(self):
        while True:
            email, host = self.in_que.get()
            self.test_email(email, host)
            self.in_que.task_done()


    def test_email(self, email, host, retry=0):
        status = "Unknown"
        code = None
        rand = "info@example.com"
        try:
            server = smtplib.SMTP(timeout=20)
            server.connect(host, 25)
            server.ehlo_or_helo_if_needed()
            code, response = server.docmd('mail from:', "<{}>".format(rand))
            code, response = server.docmd('rcpt to:', "<{}>".format(email))
            if code == 421:
                while retry < 3:
                    if retry >= 3:
                        server.quit()
                        self.out_que.put((email, "Service not available", code))
                    server.quit()
                    return self.test_email(email, host, retry=retry)
            if code == 250:
                status = 'valid'
                self.out_que.put((email, status, code,))
        except smtplib.SMTPServerDisconnected:
            while retry < 3:
                retry += 1
                status = "(SMTP Disconnected Unexpectedly) Retry # {}".format(retry)
                code = -2
                time.sleep(2)
                if retry >= 3:
                    self.out_que.put((email, "SMTP Disconnected", code))
                else:
                    return self.test_email(email, host, retry=retry)
            self.out_que.put((email, status, code,))

def check_email(emails, domain, index=0, is_main=False):
    email = status = code = None
    in_que = Queue(maxsize=10)
    out_que = Queue(maxsize=10)

    if 'brighthouse' in domain:
        host = 'brighthouse-co-uk.mail.protection.outlook.com'
    else:
        host = 'eu-smtp-inbound-2.mimecast.com'

    # is it a catchall? if it is i want to return from the function ---  
    # If the email is valid then it is not a catchall all so execute line 91
    # till return checking multipe emails in threads but exit when one valid is found else do all email in list
    if not is_main: # check if the email is main
        in_que.put(('JUNK_EMAIL_CANT_BE_REAL_fewfwewefew@' + domain, host)) # put rubbish email in thread if valid it is a catchall, then exit
        for i in range(1): 
            mw = MailWorker(in_que, out_que)
            print mw
            print "*" * 20
            mw.daemon = True
            mw.start()
            print mw.getName()
        print in_que.qsize(), " in queue size"
        print out_que.qsize(), " out queue size"

        print "*" * 20
        email, status, code = out_que.get()
        print "Contet = ", email, status, code


        if code == 250:
            print 'Domain is a Catch-All.  email: %s host: %s' % (emails[0], host)
            MailWorker.kill = True
            return emails, "catchall", -99, index
        elif code == -1:
            return email, status, code, index
    # in_que.join()
    # out_que.join()

    for index, email_address in enumerate(emails):
        in_que.put((email_address, host,))

    for i in range(10):
        mw = MailWorker(in_que, out_que)
        mw.daemon = True
        mw.start()
    while not out_que.empty():
        email, status, code, index = out_que.get()
        if code == 250:
            MailWorker.kill = True
            return email, status, code, index
    in_que.join()
    out_que.join()
    return email, status, code, index


emails_list = [
    ['fred@brighthouse.co.uk', 'joe.fred@brighthouse.co.uk', 'joe@brighthouse.co.uk'],

    ['fred@cqs.com', 'joe.fred@cqs.com', 'joe@cqs.com']
]

for emails in emails_list:
    domain = emails[0].split('@')[1]

    print(check_email(emails, domain))

0 个答案:

没有答案