我试图抓住这个网站:
http://www.finanzen.net/historische-kurse/Daimler
并使用以下源代码:
from cookies import cookies
import datetime
import requests
import time
import webbrowser
def download(number,
isin,
start=datetime.date(1998,1,1),
end=datetime.date.today(),
dst="raw"):
"""
"""
# ToDo -- I want to give a tag and not an url!
url = "http://www.finanzen.net/historische-kurse/daimler"
# ToDo -- should be dynamic
today = datetime.date.today()
inTag1 = str(1)
inMonat1 = str(1)
inJahr1 = str(1998)
inTag2 = str(today.day)
inMonat2 = str(today.month)
inJahr2 = str(today.year)
strBoerse = "XETRA"
pkBHTs = int(time.time())
payload = {"inTag1" : inTag1,
"inMonat1" : inMonat1,
"inJahr1" : inJahr1,
"inTag2" : inTag2,
"inMonat2" : inMonat2,
"inJahr2" : inJahr2,
"strBoerse" : strBoerse,
"pkBHTs" : pkBHTs}
r = requests.get(url, params=payload, cookies=cookies)
if "08.11.2017" in r.text:
print("OK")
else:
print("FAIL!")
with open("output.html", "w") as f:
f.write(r.text)
if __name__ == "__main__":
print("Test: download()")
download(1510210323,
"DE0007100000",
start=datetime.date(1998,1,1),
end=datetime.date.today(),
dst="raw")
print("Done.")
webbrowser.open("output.html")
我有第二个脚本,它从我的浏览器提供cookie数据:
cookies = {'CAP' : 'data=44a1e1f46fef0411bf06d9bfc501913f',
'CUID' : 'N,1510569302850:ALHGLuQAAAAPTiwxNTEwNTY5MzAyODUwVdj35/i8kfuLw5RmnsCECh6uWduJEJHPHe44+gmS5k1OeVohiY2UE0s8Toc6Z1KsPkSIOyvb0rHFvfBB5GtZD0BeUVeUq8xKkDIkDqq2RsE7AvdO9c+GoqElRytvxjPuoExKFUZ7sMl3+ugTDvQsjM0q6iEkcfYTCjZcqRhGJ2JicnT0yZI8NIINqvt1OUufo4jtHTgznYHCgSG8lxydqzv+Cax90XRsvKoUEzTfJCxzqryt3rkXiy4IMEOrTMxZOZCoT0HO3hgghkd3XyzOhhr70tLnPbY4GxPkWrcXy4y+7xHwwoX+jmJGiNvEJod8mQF3QkkDSN+uwmTlAgy7Yg==',
'MI' : '1',
'OPTOUTMULTI' : '0:0%7Cc2:0',
'__utma' : '99761801.1635127051.1447939999.1510562258.1510569245.10',
'__utmb' : '99761801.1.10.1510569245',
'__utmc' : '99761801',
'__utmt_UA-1858090-1' : '1',
'__utmz' : '99761801.1503872185.2.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)',
'_ga' : 'GA1.2.1635127051.1447939999',
'_gid' : 'GA1.2.216894857.1510518215',
'finpopup2' : 'n=3&d=08%2E09%2E2017',
'fintargeting' : 'v=1&h=0x000000&hd=131117091117091117091117091117091117',
'finvisit' : 'v=5&p=16&d=13%2E11%2E2017',
'mjdkyj' : 'AKsRol8bjClrOgYBI9F53uwo0572ZvzG_ifsQhL0W6CGIPDli067aLH682nhCzAvZJLwgmI_hfnp1G_cO6_R7La4pHyjXqGz7w',
'utag_main' : 'v_id:015e25c45b49001bad6f158e01b004066002a05e00838$_sn:10$_ss:0$_st:1510571043449$dc_visit:10$dc_visit_dip-main:10$dip_times_empty_enrichment:26$ses_id:1510569240676%3Bexp-session$_pn:1%3Bexp-session$collectCookieMode:3rdParty%3Bexp-session$dc_event:1%3Bexp-session$dip_events_this_session:1%3Bexp-session$dc_event_dip-main:1%3Bexp-session$dc_region_dip-main:eu-central-1%3Bexp-session$dc_region:eu-central-1%3Bexp-session',
'xdefcc' : 'G18e8ffb3a46fc000807955c49556bf4cc',
'1P_JAR' : '2017-11-13-10',
'AID' : 'AJHaeXJrF9XjJOxZm8l4doMQZS4yMOPes3h5NKKcWJZxIyGSOAjtzw',
'APISID' : 'VILFXotMggFxtb9h/AcPvkxMR5pWR0AzUo',
'ASPSESSIONIDQSRTTSCC' : 'GLLJJMHAIDCBGEEODOLMADEN',
'ASPSESSIONIDSSRSRSBD' : 'PGHHNMHANNCJJKIPNALFCCMO',
'CAP' : 'data=44a1e1f46fef0411bf06d9bfc501913f',
'CONSENT' : 'YES+DE.de+20150726-13-0',
'CUID' : 'N,1510569302850:ALHGLuQAAAAPTiwxNTEwNTY5MzAyODUwVdj35/i8kfuLw5RmnsCECh6uWduJEJHPHe44+gmS5k1OeVohiY2UE0s8Toc6Z1KsPkSIOyvb0rHFvfBB5GtZD0BeUVeUq8xKkDIkDqq2RsE7AvdO9c+GoqElRytvxjPuoExKFUZ7sMl3+ugTDvQsjM0q6iEkcfYTCjZcqRhGJ2JicnT0yZI8NIINqvt1OUufo4jtHTgznYHCgSG8lxydqzv+Cax90XRsvKoUEzTfJCxzqryt3rkXiy4IMEOrTMxZOZCoT0HO3hgghkd3XyzOhhr70tLnPbY4GxPkWrcXy4y+7xHwwoX+jmJGiNvEJod8mQF3QkkDSN+uwmTlAgy7Yg==',
'HSID' : 'AkD2AyVb5Z9wR9QT-',
'MI' : '1',
'NID' : '117=IbpFdbi-srSVK7HAD-b_ENXheFKQqlI0MtwsBKkQ5kMzKE_YMKHN7THu3mgYrYZZ45mSgYb1r67FfyX30QDu3pT22YEzK4Ylj_DbQ0BwbLx36NJV68KmDoivU9zD38hSPe9oPS89Z8spzbp9mPSEuYOhhznowFxCS0ZnZDhYIKsnwdMEYWqDVhQ0fUqnpnTQFV1VWb1-7dZJ5LvnKKWf5xHi5wNhL5YQtaFdYEb2rY0L3HTR0IeZMu3ZIR-2pOYQYspwyg',
'NID' : '110=Chz_C4sXWBfkLwySlpc-od0DauOGbWPjjZ_UYJgVYSeOXLWaQayrm_PHSMfnI4bkHwFQjWq0atuDmXEGq39-uJHLpanLu7kQDs9WTm004KBx7nWacN1-_x_p05gbw8wx',
'OPTOUTMULTI' : '0:0%7Cc2:0',
'POPUPCHECK' : '1510604610799',
'SAPISID' : 'e-07IvwMgsqnc1KL/AWrA0YaffjZEXetqg',
'SID' : 'FwUbfPob8j7z4QAbMUsPD7HI4FwPoOM7wPo9cyTwyAveBe0fy86idhBKLMz8mn93l-pyPA.',
'SIDCC' : 'AE4kn7_ynLaaOiVILBfgv5-_j3I-18GWRw0_rgIiJGSRP0YLV8zwv3Me80u9dqJI_a58y8xXxMCyLlu7qBw2rQ',
'SSID' : 'ADQhejhWBta7RIl3q',
'__utma' : '99761801.1635127051.1447939999.1510562258.1510569245.10',
'__utmb' : '99761801.1.10.1510569245',
'__utmc' : '99761801',
'__utmt_UA-1858090-1' : '1',
'__utmz' : '99761801.1503872185.2.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)',
'_ga' : 'GA1.2.1635127051.1447939999',
'_gid' : 'GA1.2.216894857.1510518215',
'anj' : """dTM7k!M4/C@-S3@:5]qk`_#I2PQ7s]@!7PT-Q!$Rkf%Tam0H]QFpc6s(!H!k>jP5fr<Pn!Sse@DnFc!m#J[!_?'td[$Y+EO30JW48u`al*p]+pQ%.IaIz(eLt1a=RpN+IIuSI6>2pVG*(l/YR]55R_8*)f_>Bgu2u'nh]2pe>#n88><bUjXaW]vuR5bchvb:ei<F/=Ow02i-18D:GO_nsX`cEj282$Vr6Zz2%R'kHi(@n0veK7e2'oM:Nd`*Ju!l/-nAbGM6F3YP-^KnNXwhV1.yggNVGk!<5]]1?:av7hOQ/L#d9Q``r=MjWhED9SGjg%*!X1%w5kp?T<s?7j=KIM8Y$p1^aigtynxfq/5MBw3X2Bc8E5EILM(^k=p=B+%ra1D^CUB]=ExTXvZx7go4f/B(8iGBZRtY(q7_H=>?rY$IpGd).%<kME<mKK:vp7SPIctG0J8vv3y6MW*)3'5N9/Ju_Oiw98wU4016EdBD9et%n*bF_uZv<jF>'E9@VDsp3i^mu'h2R4Y8:2]ko2YdA.(6d3^+<+4Dyo=Z-Mi./o1lsVmD61*JVd'oKHFeNRD*7d:wbm(7!4vs*Ahw+.XI2d?c4#.oV1%5pnBbLG^!1N1qfcZMzhBp?xs_#Ch3o$L$hlWShx4HF@u5e9Agc*S[s.gz6MTIX6:[Rm`v-pH_<(#8YlG`P_(PHOI*v*N^mA!>5Nmh8U?<WbeUp48=ux1N@quBj7U2(Kz1W7/>7`Q2A-`C7X6N_.bl/V/43$YC8N6zsH:M$KCuLYMOsnBP<3v3h)rg2aA2V?PAMJ71Lc*oW?!I^KQ<CPZ#RByeri1*v-M93nj?6#tLa?5o-[E*.ys9IO/2KA)?C^vETDT%wlq#n=7kK9_U`Q=HTXf]UwV(h8H2]_x^UElV<-Jd.hF$pC#O#+2f1I6i44C<NiDE$C2Lhv=z#pWxmO?o4I*aMM2t])V=p>>i[n-caqAWXA`#7(h2Ka8_dV7bgcPn_h./y#T[!AyHDHC-4!t3+wnhevgtid(Y=^JnIDS6@J$jP$wO`bUt5^neF+TZMJNpbwFfE=<dL.627>80_)>!ViHO9vIz7]ebKf-WPz3-%7p]w_1nPBQvyJ_r*^cg['SV3T/t<?UK_ZqgOio<1W])V^'PW^Ev7^_`McptJ3`gig?Hh$DFMgh[yImN`wByK71`iKGSxl_8Fq!?p7WW)]R:8XVy+fJZ6zYJc@ro-!w@'_WN`A7'O]Slqu3gh/RPX!Ps[gJp9IA439Ki3Ak!II7Lgy997x07G][')<2wjcG9H3f'SwP/wKC>o=F7#3mMF9R#1U[5^SQCb:VD*7$G9pj9i7hJq:DMe)rg`lO-kf#2>mv3gzo_Jf?`*]*afAw3#21o/#YC<A9@/!41/zOjJ'<W2Z@?6EN2's]ZiejVf$y!mpqR7b2[ulW:gR.5Bcv[6>C9'Gl7uJHY1%@j!sP>>!bCYTvKIL>wV*`Hq.2q/FBSY?F%_iT7I%AZ<.`7@*)oo+DWqHvHvnK?4OQEzdX+4g.Mrhj(H<WjfG<4gIa([228)?u)*nhKni6oaXD!1YJEEVg07mn1_Ad:6cO37jyJ3?2L?+)$PaLfTKocjN55L@E+cMm4)k3jv.$z_rTgYiga4/""",
'finpopup2' : 'n=3&d=08%2E09%2E2017',
'fintargeting' : 'v=1&h=0x000000&hd=131117091117091117091117091117091117',
'finvisit' : 'v=5&p=16&d=13%2E11%2E2017',
'i00' : '002963d5e4108c32058a5679f0001%3B5a097588%3B5a3b7d3c',
'icu' : 'ChgI95w7EAoYAiACKAIwuPaX0AU4AkACSAIQuPaX0AUYAQ..',
'mjdkyj' : 'AKsRol8bjClrOgYBI9F53uwo0572ZvzG_ifsQhL0W6CGIPDli067aLH682nhCzAvZJLwgmI_hfnp1G_cO6_R7La4pHyjXqGz7w',
'sess' : '1',
'utag_main' : 'v_id:015e25c45b49001bad6f158e01b004066002a05e00838$_sn:10$_ss:0$_st:1510571043449$dc_visit:10$dc_visit_dip-main:10$dip_times_empty_enrichment:26$ses_id:1510569240676%3Bexp-session$_pn:1%3Bexp-session$collectCookieMode:3rdParty%3Bexp-session$dc_event:1%3Bexp-session$dip_events_this_session:1%3Bexp-session$dc_event_dip-main:1%3Bexp-session$dc_region_dip-main:eu-central-1%3Bexp-session$dc_region:eu-central-1%3Bexp-session',
'uuid2' : '869611879133359501',
'xdefcc' : 'G18e8ffb3a46fc000807955c49556bf4cc'}
如果我在cookie中提供任何个人信息,请通知我。感谢。
我运行我的脚本并期望获得相同的数据,例如在我的浏览器中手动处理表单,但这不起作用。
在我的控制台中,我得到:
Test: download()
FAIL!
Done.
由我的脚本生成的output.html文件包含“BittewählenSiedas Start- und Enddatum des Zeitraumes,fürdenSie historische Kursdatenanzeigenmöchten。” (=“请选择您要显示历史价格的开始和结束日期”)而不是数据。这个消息是错误消息,并告诉我将数据发送到表单 - 但我已经将数据发送到网站,你可以看到!
目前我不明白我可以更改什么来获取数据而不是此消息。拜托,请你帮个忙吗?感谢您的努力,请原谅我的英语!
答案 0 :(得分:2)
您可以选择selenium来获得相同的结果。所以我修改了你的代码以实现你想要获得的值。
# -*- coding: utf-8 -*-
import datetime
import requests
import time
import webbrowser
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import io
def getBoersenIndex(boerse):
"""Get index of given boerse.
To set the value of the boersen dropdown, you have to know the index of the chosen boerse.
Keyword arguments:
boerse -- name of boerse as you see in dropdown at the website
"""
boersen = [
'Budapest',
'Berlin',
'Baader Bank',
'BX Swiss',
'Bats',
'Chi-X',
'Düsseldorf',
'Frankfurt',
'Hamburg',
'Hannover',
'Lang und Schwarz',
'München',
'Mexiko',
'Nasdaq OTC',
'XETRA',
'Prag',
'Stuttgart',
'Swiss Exchange',
'Tradegate',
'Wien',
'Quotrix'
]
for index, boerseName in enumerate(boersen):
if boerseName == boerse:
return index+1
def download(startDay, startMonth, startYear, boerse, url):
today = datetime.date.today()
inTag1 = str(startDay)
inMonat1 = str(startMonth)
inJahr1 = str(startYear + 1 - 1998) # because the dropdown uses the index as a value starting with 1998 = 1
inTag2 = str(today.day)
inMonat2 = str(today.month)
inJahr2 = str(today.year + 1 - 1998) # same as starting year
strBoerse = str(getBoersenIndex(boerse))
# this is my webdriver implementation, you may use another one
options = webdriver.ChromeOptions()
options.add_argument('headless')
browser = webdriver.Chrome(<path_to_your_chrome_driver>, chrome_options=options)
browser.get(url)
time.sleep(1)
browser.find_element_by_xpath("//select[@name='inTag1']/option[" +inTag1+ "]").click()
browser.find_element_by_xpath("//select[@name='inMonat1']/option[" +inMonat1+ "]").click()
browser.find_element_by_xpath("//select[@name='inJahr1']/option[" +inJahr1+ "]").click()
browser.find_element_by_xpath("//select[@name='inTag2']/option[" +inTag2+ "]").click()
browser.find_element_by_xpath("//select[@name='inMonat2']/option[" +inMonat2+ "]").click()
browser.find_element_by_xpath("//select[@name='inJahr2']/option[" +inJahr2+ "]").click()
browser.find_element_by_xpath("//select[@name='strBoerse']/option[" +strBoerse+ "]").click()
browser.find_element_by_css_selector("span.button").click()
if "08.11.2017" in browser.page_source:
print("OK")
else:
print("FAIL!")
with io.open("output.html", "w", encoding='utf8') as f:
f.write(browser.page_source)
if __name__ == "__main__":
print("Test: download()")
download(1,1,1998,'XETRA', 'http://www.finanzen.net/historische-kurse/daimler')
print("Done.")
webbrowser.open("output.html")