我编写了一个python脚本,该脚本使用多线程python使用python从SFTP服务器下载文件,以便它可以一次连接到多个服务器并从其中并行下载文件。
它最多可用于10个连接,但是如果有25个连接,则会显示此错误
假设要在每个服务器上下载5000个大小为130mb(几乎)的文件
代码通常可以在以后的尝试中成功运行,或者可以成功地在日期范围内的前几个文件中运行,然后在下载我需要检索的所有文件的过程中出错(请参见下面的错误)
错误的原因是什么,请帮助我解决它。预先感谢
我的代码:
import sys, os, string, threading
import paramiko
import os
import pysftp
import csv
import socket
from stat import S_ISDIR, S_ISREG
import time
import threading
from threading import Thread
from time import sleep
import os.path
import shutil
import lock
threading.Lock()
privatekeyfile = os.path.expanduser("C:\\Users\\Rohan\\.ssh\\cool.prv") # public key authentication
mykey = paramiko.RSAKey.from_private_key_file(privatekeyfile)
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
def workon(serverad,user, textfile, serverpath, local_path_temp):
with pysftp.Connection(host=serverad, username=user,private_key=mykey, cnopts=cnopts) as sftp:
r = (socket.gethostbyaddr(serverad))
print("connection successful with ",r) # connect to SFTP server
def get_file2(sftp ,remotedir): # make the list of all the files on server path of last 2 days
result = []
for entry in sftp.listdir_attr(remotedir):
remotepath = remotedir + "/" + entry.filename
mode = entry.st_mode
if S_ISDIR(mode):
result += get_file2(sftp, remotepath)
elif S_ISREG(mode):
if (time.time() - entry.st_mtime) // (24 * 3600) > 0:
result.append(entry.filename)
return result
remote_path = serverpath
d = get_file2(sftp, remote_path)
def process():
myname = [] # make list of files already in log file
for name in j.readlines():
while '\n' in name:
name = name.replace('\n', '')
myname.append(name)
import os
filtered_list = [string for string in d if string not in myname] # make filtered list of files preent on server but not in local path
print("filtered list:", filtered_list)
print(len(filtered_list))
local_path = local_path_temp
def compare_files(sftp, remotedir, remotefile, localdir, preserve_mtime=True):
remotepath = remotedir + "/" + remotefile # download the files inside filtered list
localpath = os.path.join(localdir, remotefile)
mode = sftp.stat(remotepath).st_mode
if S_ISDIR(mode):
try:
os.mkdir(localpath, mode=777)
except OSError:
pass
compare_files(sftp, remotepath, localpath, preserve_mtime)
elif S_ISREG(mode):
sftp.get(remotepath, localpath, preserve_mtime=True)
for files in filtered_list:
compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
with open(filename, 'a') as f: # append the downloaded file name in the log file
for item in filtered_list:
f.write("%s\n" % item)
with open(filename, 'r') as f:
unique_lines = set(f.readlines())
h = sorted(unique_lines)
with open(filename, 'w') as f:
f.writelines(h)
filename=textfile # if the log file does not exist it can create a new log file
try:
with open(filename, 'r+') as j:
process()
except IOError:
with open(filename, 'w+') as j:
process()
def main():
threads=[]
config_file_path = "config15.txt" # read config file and assign IP,username,logfile,serverpath,localpath
file = open(config_file_path, 'r')
reader = csv.reader(file)
all_rows = [row for row in reader]
for line in all_rows:
if len(line) != 5:
continue
server_ip = line[0]
username = line[1]
txt_file = line[2]
server_path = line[3]
local_path = line[4]
t = threading.Thread(target=workon, args=(server_ip, username, txt_file, server_path, local_path)) # use of multithreading
t.start()
threads.append(t)
with open(config_file_path) as f: # adjust sleep time according to connections in config file
if(sum(1 for _ in f)) >= 5:
sleep(5)
else:
sleep(1)
for t in threads:
t.join()
if __name__ == "__main__": # execute main function
main()
错误:
Traceback (most recent call last):
File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 98, in workon
process()
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 80, in process
compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 77, in compare_files
sftp.get(remotepath, localpath, preserve_mtime=True)
File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\pysftp\__init__.py", line 249, in get
self._sftp.get(remotepath, localpath, callback=callback)
File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\paramiko\sftp_client.py", line 806, in get
"size mismatch in get! {} != {}".format(s.st_size, size)
OSError: size mismatch in get! 0 != 275856