伙计我正在尝试创建一个蜘蛛但是我一直得到错误:TypeError:file_thread()需要1个位置参数但是39个被给出 这是如此奇怪,因为它需要一个列表,并给出一个列表
import sys
sys.path.insert(0, "/media/user/Data/Programming/Projects")
import re , threading , bs4 , async_lib , urllib.request
file_lock=threading.Lock()
spider_lock=threading.Lock()
def file_thread(data):
file_lock.accquire()
print('IO Operation')
async_lib.WriteAsync('spider.txt',data,).start()
file_lock.release()
def Get_Links(webpage):
byte_html=urllib.request.urlopen(webpage).read()
bad_html= byte_html.decode()
html = str(bs4.BeautifulSoup(bad_html))
all_links = re.findall('href="(http.*?)"',html)
return all_links
def spider_logic(raw_links):
for link in raw_links:
try:
links = Get_Links(link)
t1 = threading.Thread(target=file_thread,args=link).start()
t1 = threading.Thread(target=file_thread,args=links)
spider_lock.accquire()
print(links)
t2 = threading.Thread(target=spider_logic, args=links).start()
spider_lock.release()
except:
pass
def main():
startup='http://www.myegy.to'
links = Get_Links(startup)
spider_logic(links)
if __name__=='__main__':
main()
答案 0 :(得分:0)
您在args
中传递给Thread()
的内容应该是可迭代的。当您传递一个字符串时,它会遍历每个字符并将其视为一个单独的arg。您应该改为将其作为元组传递。
t1 = threading.Thread(target=file_thread,args=(links,))