Question

我正在尝试使用python从网站下载文件，只能在您登录后下载，这似乎工作正常！但是当我真正尝试下载文件时，我只收到一个文本文件，说我必须登录。我相信我需要获取PHPSESSID cookie并使用它，但无法弄清楚如何做到这一点。这是我的代码：

from BeautifulSoup import BeautifulSoup
import re
import requests
import sys

class LegendasTV(object):
    URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1'

    URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s'

    URL_LOGIN = 'http://legendas.tv/login'

    def __init__(self, usuario, senha):
        self.usuario = usuario
        self.senha = senha
        self.cookie = None

        self._login()

    def _login(self):
        s = requests.Session()
        url = self.URL_LOGIN
        payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
        r = s.post(url, payload)
        html = r.content

        if "<title>Login - Legendas TV</title>" in html:
            return 0
        else:
            print 'Success login!'
            return 1

    def _request(self, url, method='GET', data=None):
        if method == 'GET':
            r = requests.get(url, stream=True)
        if method == 'POST' and data:
            r = requests.post(url, data=data)

        return r

    def search(self, q, lang='pt-br', tipo='release'):
        if not q:
           pass # raise exception

        if not lang or not self.LEGENDA_LANG.get(lang):
           pass # raise exception

        if not tipo or not self.LEGENDA_TIPO.get(tipo):
           pass # raise exception

        busca = { 'txtLegenda': q,
                  'int_idioma': self.LEGENDA_LANG[lang],
                  'selTipo':    self.LEGENDA_TIPO[tipo] }

        r = self._request(self.URL_BUSCA % q, method='POST', data=busca)
        if r:
            legendas = self._parser(r.text)
        else: 
            pass # raise exception

        return legendas

    def _parser(self, data):
        legendas = []

        html = BeautifulSoup(data)
        results = html.findAll("a")
        for result in results:
            if result.get("href") is not None and "S09E16" in result.get("href"):
                path_href = result.get("href").split("/")
                unique_id_download = path_href[2]
                url = self.URL_DOWNLOAD % unique_id_download

    def download(self, url_da_legenda):
        r = self._request(url_da_legenda)
        if r:
            with open("teste.rar", 'wb') as handle:
                print u'Baixando legenda:', url_da_legenda
                handle.write(r.content)

以下是我尝试使用代码下载一个文件的方法：

$ python
Python 2.7.6 (default, Jun 22 2015, 17:58:13) 
[GCC 4.8.2] on linux2
Type "help", "copyright", "credits" or "license" for more information. 
>>> 
>>> from download_legenda import *
>>> legendas_tv = LegendasTV("Login", "Pass")
Success login!
>>> 
>>> legendas_tv.download("http://legendas.tv/downloadarquivo/56c76ce239291")
Baixando legenda: http://legendas.tv/downloadarquivo/56c76ce239291
>>>

我将不胜感激。

Answer 1

在这个答案的帮助下，我终于明白了！

https://stackoverflow.com/a/12737874/1718174

我试图直接使用cookies，但似乎会话已经完成了繁重的部分，并为我们处理。以下是我的代码需要更新的部分：

def _login(self):
    s = requests.Session()
    url = self.URL_LOGIN
    payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
    r = s.post(url, payload)
    html = r.content

    if "<title>Login - Legendas TV</title>" in html:
        return 0
    else:
        print 'Success on login!'
        self.session = s

        return 1

def _request(self, url, method='GET', data=None):
    if self.session:
        if method == 'GET':
            r = self.session.get(url, cookies=self.cookie, stream=True)
        if method == 'POST' and data:
            r = self.session.post(url, data=data, cookies=self.cookie)

        return r

如何在python中获取会话cookie

1 个答案: