我正在尝试使用python从网站下载文件,只能在您登录后下载,这似乎工作正常!但是当我真正尝试下载文件时,我只收到一个文本文件,说我必须登录。我相信我需要获取PHPSESSID cookie并使用它,但无法弄清楚如何做到这一点。这是我的代码:
from BeautifulSoup import BeautifulSoup
import re
import requests
import sys
class LegendasTV(object):
URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1'
URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s'
URL_LOGIN = 'http://legendas.tv/login'
def __init__(self, usuario, senha):
self.usuario = usuario
self.senha = senha
self.cookie = None
self._login()
def _login(self):
s = requests.Session()
url = self.URL_LOGIN
payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
r = s.post(url, payload)
html = r.content
if "<title>Login - Legendas TV</title>" in html:
return 0
else:
print 'Success login!'
return 1
def _request(self, url, method='GET', data=None):
if method == 'GET':
r = requests.get(url, stream=True)
if method == 'POST' and data:
r = requests.post(url, data=data)
return r
def search(self, q, lang='pt-br', tipo='release'):
if not q:
pass # raise exception
if not lang or not self.LEGENDA_LANG.get(lang):
pass # raise exception
if not tipo or not self.LEGENDA_TIPO.get(tipo):
pass # raise exception
busca = { 'txtLegenda': q,
'int_idioma': self.LEGENDA_LANG[lang],
'selTipo': self.LEGENDA_TIPO[tipo] }
r = self._request(self.URL_BUSCA % q, method='POST', data=busca)
if r:
legendas = self._parser(r.text)
else:
pass # raise exception
return legendas
def _parser(self, data):
legendas = []
html = BeautifulSoup(data)
results = html.findAll("a")
for result in results:
if result.get("href") is not None and "S09E16" in result.get("href"):
path_href = result.get("href").split("/")
unique_id_download = path_href[2]
url = self.URL_DOWNLOAD % unique_id_download
def download(self, url_da_legenda):
r = self._request(url_da_legenda)
if r:
with open("teste.rar", 'wb') as handle:
print u'Baixando legenda:', url_da_legenda
handle.write(r.content)
以下是我尝试使用代码下载一个文件的方法:
$ python
Python 2.7.6 (default, Jun 22 2015, 17:58:13)
[GCC 4.8.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>>
>>> from download_legenda import *
>>> legendas_tv = LegendasTV("Login", "Pass")
Success login!
>>>
>>> legendas_tv.download("http://legendas.tv/downloadarquivo/56c76ce239291")
Baixando legenda: http://legendas.tv/downloadarquivo/56c76ce239291
>>>
我将不胜感激。
答案 0 :(得分:0)
在这个答案的帮助下,我终于明白了!
https://stackoverflow.com/a/12737874/1718174
我试图直接使用cookies,但似乎会话已经完成了繁重的部分,并为我们处理。以下是我的代码需要更新的部分:
def _login(self):
s = requests.Session()
url = self.URL_LOGIN
payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
r = s.post(url, payload)
html = r.content
if "<title>Login - Legendas TV</title>" in html:
return 0
else:
print 'Success on login!'
self.session = s
return 1
def _request(self, url, method='GET', data=None):
if self.session:
if method == 'GET':
r = self.session.get(url, cookies=self.cookie, stream=True)
if method == 'POST' and data:
r = self.session.post(url, data=data, cookies=self.cookie)
return r