这里有新的Python开发人员,我编写了这段简单的代码。
import requests
from bs4 import BeautifulSoup
def ColorRequest():
url = 'http://csgoroll.com/v1/roulette/state?token=xxx' # Could add a + pls str(pagesomething) to add on to the url so that it would update
sourcecode = requests.get(url) #requests the data from the site
plaintext = sourcecode.text #imports all of the data gathered
soup = BeautifulSoup(plaintext, 'lxml.parser') #This hold all of the data, and allows you to sort through all of the data, converts it
for links in soup.findAll():
print(links)
ColorRequest()
这导致这个页面给了我一些信息
{
"gameState": "2",
"currentGame": "9965904",
"startDate": "Mon Dec 05 2016 19:57:25 GMT+0000 (UTC)",
"rolls": [
{
"id": 9965905,
"hash": "f73d96099b1d0e56c1499c81a3d6d595315109b0616f68964186b515944f9005",
"roll": 1,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965906,
"hash": "b17c139731e2404ac3c2c791b1063885e639886ada49a3c51ea05381fcc774e2",
"roll": 6,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965907,
"hash": "8affa37e1b3ecc1a3201c36dcfc6e670725b209565a345b4df71242d68a043f2",
"roll": 14,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965908,
"hash": "755f7895e1f39835679753c881529ea88d37cec730a1d73fcd97155797fdf7d4",
"roll": 13,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965909,
"hash": "949b3e0141fce775b5f08debac3ee83cfe8135a1f4fa5ebfa7e110c21e2d8330",
"roll": 12,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965910,
"hash": "258c9a9dc3f46dd2de4746ce810f751fd1e175b00233268558cf70ad9ed750bb",
"roll": 13,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965911,
"hash": "61d20e3ff8da82bf0717505272ec41ea13801fd6f72d65d2562208664e47171e",
"roll": 1,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965912,
"hash": "ed922d803ef9d2d182bc68caad725c3b95722a9223ded6b544b0c715a58f7544",
"roll": 9,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965913,
"hash": "480f5cfb5fcd2483d08591f68021bdbc530696e7bad366414fb6fb1704cc45e5",
"roll": 14,
"state": 3,
"created_at": null,
"updated_at": null
},
{
"id": 9965914,
"hash": "31b9299695e4090e840ad2b1afb2f6d5840cb46cb2f028df8e5f539ecb3e8027",
"roll": 2,
"state": 3,
"created_at": null,
"updated_at": null
}
我对页面抓取很新,我想问一下 为什么我的html刮刀无法获取所提供的数据? 这可能是什么语言 - 如果它不是一种语言,我可以做些什么来纠正我的代码,以便我的程序将来能够阅读它?
答案 0 :(得分:1)
数据为JSON,而非HTML。 代码的工作片段可能如下所示:
import requests
import json
def ColorRequest():
url = 'http://csgoroll.com/v1/roulette/state?token=xxx' # Could add a + pls str(pagesomething) to add on to the url so that it would update
sourcecode = requests.get(url) #requests the data from the site
plaintext = sourcecode.text #imports all of the data gathered
obj = json.loads(plaintext)
for roll in obj['rolls']:
print(roll)
if __name__ == '__main__':
ColorRequest()