如何在多线程或多处理中运行脚本

时间:2015-10-07 13:48:58

标签: python multithreading multiprocessing

此脚本需要2秒才能完成,但如何在多个线程中运行并在50毫秒内完成

import urllib2                                                                                                                              
from threading import  Thread                                                                                       
def btl_test(url):                                                                                                                                                                                                          
    page = urllib2.urlopen(url) 
    print page                                                                                                            


url = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://orkut.com","http://quora.com","http://facebook.com","http://myspace.com","http://gmail.com","http://nltk.org","http://cyber.com"]
for i in url:                                                                                                                        
    t = Thread(target = btl_test,args=(i,))                                                                                                           
    t.start()

如何按顺序排列结果?

3 个答案:

答案 0 :(得分:2)

urls = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://orkut.com","http://quora.com","http://facebook.com","http://myspace.com","http://gmail.com","http://nltk.org","http://cyber.com"]

def btl_test(url):
    import urllib2
    return url, urllib2.urlopen(url).read()

from contextlib import closing # http://stackoverflow.com/a/25968716/968442
from multiprocessing.pool import Pool

with closing(Pool(len(urls))) as pool:
    result = pool.map(btl_test, urls)

print result

应该是方便的代码段。关于顺序,您可以使用元组分配映射并相应地打印它们。

<强>更新

根据此blog window.addEventListener('deviceorientation', handleOrientation); xAvg = 0; yAvg = 0; //how much stored "average" affects current speed coef = 0.9; function handleOrientation(event) { nextX = car.x; x = Math.round(event.beta); // In degree in the range [-180,180] y = Math.round(event.gamma); // In degree in the range [-90,90] // Because we don't want to have the device upside down // We constrain the x value to the range [-90,90] if (x > 90) { x = 90}; if (x < -90) { x = -90}; hit = "nothing" // To make computation easier we shift the range of // x and y to [0,180] x += 90; y += 90; //include current readings to the "average" xAvg = xAvg*coef + x*(1.0 - coef); yAvg = yAvg*coef + y*(1.0 - coef); x = xAvg; y = yAvg; if(y >0){ nextX = car.x +carSpeed; } if(y < 0){ nextX = car.x -carSpeed; }; car.nextX= nextX; } function render(){ if(gameState=="game started"){ if (car.nextX < 0 + car.width / 2){ car.x = 0 + car.width / 2 ; }else if(car.nextX > stage.canvas.width - car.width / 2){ car.x = stage.canvas.width - car.width / 2; } else{ car.x = car.nextX; } } } function tick(){ render(); stage.update(); } 将返回保留订单的输出。这是在不更改顺序的情况下以(url,html_content)格式打印元组列表的代码

{{1}}

答案 1 :(得分:1)

尝试使用Queue()enumerate来存储订单。

import threading
import requests
import Queue

class UrlReader(threading.Thread):
    def __init__(self, queue, output):
        super(UrlReader, self).__init__()
        self.setDaemon = True
        self.queue = queue
        self.output = output

    def run(self):
        while True:
            try:
                target = self.queue.get(block=False)
                data = requests.get(target[1])
                print data.status_code
                if data.status_code == 200:
                    self.queue.task_done()
                    self.output.put((data.url, target[0]), block=False)
                else:
                    self.queue.task_done()
                    self.queue.put(target)
            except Queue.Empty:
                break
            except requests.exceptions.ConnectionError:
                self.queue.task_done()
                self.queue.put(target)


def load(urlrange, num_threads):
    mainqueue = Queue.Queue()
    outq = Queue.Queue()
    mythreads = []

    for url in urlrange:
        mainqueue.put(url)

    for j in xrange(num_threads):
        mythreads.append(UrlReader(mainqueue, outq))
        mythreads[-1].start()

    mainqueue.join()
    for j in xrange(num_threads):
        mythreads.append(UrlReader(mainqueue, outq))
        mythreads[j].join()
    return list(outq.__dict__['queue'])

urls = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://orkut.com","http://quora.com","http://facebook.com","http://myspace.com","http://gmail.com","http://nltk.org","http://cyber.com"]

print load(enumerate(urls), 10)

>>> [(6, 'http://facebook.com'), (9, 'http://nltk.org'), (0, 'http://google.com'), (1, 'http://example.com'), (2, 'http://yahoo.com'), (3, 'http://linkedin.com'), (4, 'http://orkut.com'), (5, 'http://quora.com'), (7, 'http://myspace.com'), (8, 'http://gmail.com'), (10, 'http://cyber.com')]

答案 2 :(得分:1)

这有效

from urlparse import urlparse
from multiprocessing.pool import Pool 
import re
import urllib2 

def btl_test(url):                                                                                                                                                                                                          
    page = urllib2.urlopen(url).read()
    if (re.findall(r'<title>(.*?)<\/title>',page)):
        page1 =  (re.findall(r'<title>(.*?)<\/title>',page)[0])
        print page1

url = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://facebook.com","http://orkut.com","http://oosing.com","http://pinterets.com","http://orkut.com","http://quora.com","http://facebook.com","http://myspace.com","http://gmail.com","http://nltk.org","http://cyber.com"]


#for i in url:
#   print btl_test(i)
nprocs = 2 # nprocs is the number of processes to run
ParsePool = Pool(nprocs)
ParsePool.map(btl_test,url)
#ParsedURLS = ParsePool.map(btl_test,url)
#print ParsedURLS

帮助很多