Question

我在我的Web服务器上设置了一个受保护区域。有两个文件 - 一个txt文件和一个rar文件。在rar文件中，您将找到一个pdf文件。我这里有一个可执行程序。首先，我成功登录到文本文件的内容。我抓住路径（ download_path = self.url.text ）。现在我尝试下载该文件。但它不起作用。有人能帮助我吗？

FILE_NAME = "downloader2.py"

import os
import requests
import sys
from requests.auth import HTTPBasicAuth

from PyQt4.QtCore import QThread, pyqtSignal, Qt, QSemaphore
from PyQt4.QtGui import QVBoxLayout, QPushButton, QDialog, QProgressBar, QApplication, QMessageBox   

class Download_Thread(QThread):
    finished_thread = pyqtSignal()
    error_http = pyqtSignal()
    finished_download = pyqtSignal()
    notify_progress = pyqtSignal(int)

    def __init__(self, location, link, parent=None):
        QThread.__init__(self, parent)

        self.link = link

        self.location = location

        self._run_semaphore = QSemaphore(1)



    def run(self):
        try:
            self.url= requests.get(self.link, auth=HTTPBasicAuth('user_name', 'user_password'))
            download_path = self.url.text
            print "URL PATH ", download_path
            file = requests.get(download_path, stream=True)
            status = self.url.status_code
            print "STATUS ", status

            if not status == 200:
                self.error_http.emit()

        except (requests.exceptions.URLRequired,
                requests.exceptions.ConnectionError,
                requests.exceptions.HTTPError,
                requests.exceptions.Timeout,
                requests.exceptions.ConnectTimeout,
                requests.exceptions.ReadTimeout), g:
            print 'Could not download ', g
            self.error_http.emit()
        else:
            file_size = int(requests.head(download_path).headers.get('content-length', [0]))
            print "file_size", file_size
            r = requests.head(download_path)
            print "heanders", r.headers

            print "%s Byte" %file_size
            result = 2000 / (1024*5)
            print "result", result
            chunk_size = int(result)
            print "chunk_size", chunk_size
            downloaded_bytes = 0

            with open(self.location, 'wb') as fd:
                for chunk in file.iter_content(chunk_size):
                    fd.write(chunk)
                    downloaded_bytes = fd.tell()
                    #print (float(downloaded_bytes)/file_size*100)
                    self.notify_progress.emit(float(downloaded_bytes)/file_size*100)

                    if self._run_semaphore.available() == 0:
                        self._run_semaphore.release(1)
                        break

                print "Finish"
                self.finished_download.emit()
                self.finished_thread.emit()

    def stop(self):
        print "stop"
        self._run_semaphore.acquire(1)

class MyCustomDialog(QDialog):

    def __init__(self):
        super(MyCustomDialog, self).__init__()
        layout = QVBoxLayout(self)

        #self.url = get_access_data_and_link('Sophus','danny5658')
        #print "CALLING DOWNLOAD", self.url

        self.url = 'http://xarphus.de/schutz/'

        # Create a progress bar and a button and add them to the main layout
        self.progressBarUpdate = QProgressBar(self)
        self.progressBarUpdate.setAlignment(Qt.AlignCenter)
        layout.addWidget(self.progressBarUpdate)

        pushButtonUpdate = QPushButton("Start", self)
        layout.addWidget(pushButtonUpdate)
        pushButtonCancel = QPushButton("Cancel", self)
        layout.addWidget(pushButtonCancel)

        pushButtonUpdate.clicked.connect(self.check_folder_exists)

        # Set data for download and saving in path
        self.location = os.path.abspath(os.path.join('temp', 'example-app-0.3.win32.zip'))
        #self.url = 'http://sophus.bplaced.net/download/example-app-0.3.win32.zip'

        self.download_task = Download_Thread(self.location, self.url)
        self.download_task.notify_progress.connect(self.on_progress)
        self.download_task.finished_thread.connect(self.on_finished)
        self.download_task.error_http.connect(self.on_HTTPError)
        self.download_task.finished_download.connect(self.on_finish_download)

        pushButtonCancel.clicked.connect(self.on_finished)

    def on_start(self):
        self.progressBarUpdate.setRange(0, 0)
        self.download_task.start()

    def on_finish_download(self):
        msg_box = QMessageBox()

        QMessageBox.question(msg_box, ' Message ',
                                           "The file has been fully downloaded.", msg_box.Ok)

    def on_HTTPError(self):
        reply = QMessageBox.question(self, ' Error ',
                                           "The file could not be downloaded. Will they do it again?", QMessageBox.Yes | 
            QMessageBox.No, QMessageBox.No)

        if reply == QMessageBox.Yes:
            self.on_start()
        else:
            print "Close button pressed"
            #event.ignore()

    def on_progress(self, i):
        self.progressBarUpdate.setRange(0, 100)
        self.progressBarUpdate.setValue(i)

    def check_folder_exists(self):
        location = os.path.abspath(os.path.join('temp'))
        if not os.path.exists(location):
            os.makedirs(location)
            print "Folder was created"
            self.on_start()
        else:
            print "Folder already exists"
            self.on_start()

    def on_finished(self):
        self.progressBarUpdate.setValue(0)
        self.close()

    def closeEvent(self, event):
        self.download_task.stop()

def main():
    app = QApplication(sys.argv)
    window = MyCustomDialog()
    window.resize(600, 400)
    window.show()
    sys.exit(app.exec_())

if __name__ == "__main__":
    main()

如果我运行这个程序，我会在控制台上看到这行：

文件夹已存在网址路径http://xarphus.de/schutz/datei.rar 状态200 file_size 290 heanders {'content-length'：'290'，'x-varnish'：'150708046 150707392'，'content-encoding'：'gzip'，'accept-ranges'：'bytes'，'vary'：'Accept-编码'，'服务器'：'Apache'，'年龄'：'0'，'连接'：'保持活力'，'通过'：'1.1清漆'，'日期'：'星期三，2015年7月22日23： 20:06 GMT'，'content-type'：'text / html; charset = iso-8859-1'，'www-authenticate'：'Basic realm =“Service-Bereich”'} 290字节结果0 chunk_size 0 完停止

Answer 1

我还没有找到解决方案，但我减少了我的来源。我希望这有助于我们找到问题。

import requests
import shutil
from requests.auth import HTTPBasicAuth

def log_in(user, pwd):
    s = requests.session()
    resp = s.get('http://xarphus.de/protect_folder/', auth=HTTPBasicAuth(user, pwd))
    print "Status: ", resp.status_code
    content_txt_file = resp.text
    print "Cookies: ", requests.utils.dict_from_cookiejar(s.cookies)
    print "Content of txt file: ", content_txt_file
    print "start downloading"
    response = s.get(content_txt_file, stream=True)
    with open('test_rar.rar', 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response

if __name__ == '__main__':
    user_name = 'test_account'
    user_password = 'test_user'
    log_in(user_name, user_password)

如果我运行下载该文件的程序，但该文件只有209个字节。它很小。原始文件大约有36兆字节。

Answer 2

所以，虽然没有人帮我（无论出于何种原因），但我找到了一个解决方案。这是正确的代码：

def get_logged_in_session(user, pwd):

    url = 'http://xarphus.de/protect_folder'
    s = requests.session()

    auth = HTTPBasicAuth(user, pwd)
    resp = s.get(url=url, auth=auth)

    print "Status: ", resp.status_code
    content_txt_file = resp.text
    print "Cookies: ", s.cookies
    print "Content of txt file: ", content_txt_file
    print "init the download"
    response = s.get(url=content_txt_file, auth=auth, stream=True)
    print "start downloading"
    with open('test_rar.rar', 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    print "Closing response"
    response.close
    print "response is closed"
    print "Closing resp"
    resp.close
    print "resp is closed"

我已更改此行

response = s.get(content_txt_file, stream=True)

进入这一行

response = s.get(url=content_txt_file, auth=auth, stream=True)

为什么呢？好吧，所以我可以访问该文件，我需要再次验证我。

Python：从受保护的文件夹下载（.htaccess）

2 个答案: