我正在使用Beautifulsoup从网站获取某些数据。我从网站上获取了一些数据,但我无法获取弹出的javascript数据。
网址为:http://www.afl.com.au/afl/stats/player-ratings/overall-standings#club/CD_T20
我附加了一个屏幕截图并指出了具有JS元素的链接。
当我点击链接(用红色箭头标记)时,我会找到一个弹出窗口。我需要在python中使用beautifulsoup4获取弹出的数据。
屏幕截图如下:
请帮帮我......
答案 0 :(得分:2)
该页面使用AJAX来获取玩家详细信息。响应实际上是JSON,因此您可以在Python中复制行为。
表格单元格具有data-playerid
属性:
<a data-playerid="CD_I271072" href="javascript:void(0);">Daniel Rich</a></td>
玩家ID加载了AJAX:
http://www.afl.com.au/api/cfs/afl/playerProfile/CD_I271072
和
http://www.afl.com.au/api/cfs/afl/playerRatings?playerId=CD_I271072&pageSize=100
回复包含JSON数据:
{
"playerProfile" : {
"id" : "CD_I271072",
"position" : "Left Half Back",
"surname" : "Rich",
"jumperNumber" : 10,
"milestones" : null,
"careerAverages" : {
"goals" : 0.7,
"behinds" : 0.7,
"superGoals" : null,
"kicks" : 11.3,
"handballs" : 7.8,
"disposals" : 19.1,
"marks" : 2.8,
"bounces" : 0.1,
"tackles" : 4.0,
"contestedPossessions" : 8.6,
"uncontestedPossessions" : 10.5,
"totalPossessions" : 19.1,
"inside50s" : 4.5,
"marksInside50" : 0.1,
"contestedMarks" : 0.2,
"hitouts" : 0.1,
"onePercenters" : 1.5,
"disposalEfficiency" : null,
"clangers" : 2.3,
"freesFor" : 0.8,
"freesAgainst" : 1.0,
"dreamTeamPoints" : 76.4,
"clearances" : {
"centreClearances" : 1.4,
"stoppageClearances" : 2.3,
"totalClearances" : 3.7
},
"rebound50s" : 1.6,
"goalAssists" : 0.6,
"goalAccuracy" : null,
"ratingPoints" : null,
"ranking" : null,
"interchangeCounts" : null
},
"firstName" : "Daniel",
"bio" : "<p>Daniel Rich is a high possession-winning in-and-under midfielder with a penetrating left foot and quality skills. The high-profile West Australian recruit received the AFL Rising Star Award in his debut season with the Lions and is now widely regarded as one of the most damaging midfielders in the AFL competition.</p>",
"photoUrl" : "http://m.afl.com.au/staticfile/AFL Tenant/BrisbaneLions/Player Profiles/2014 - Profiles/RICH Daniel.png",
"aflAwards" : null,
"clubAwards" : null,
"qa" : null,
"sponsor" : null,
"basicStats" : {
"dateOfBirth" : "1990-06-07T02:00:00.000+0000",
"draftYear" : "2008",
"heightInCm" : 183,
"weightInKg" : 84,
"recruitedFrom" : "Subiaco (WA)",
"debutYear" : "2009"
},
"careerStats" : {
"goals" : 67.0,
"behinds" : 66.0,
"superGoals" : null,
"kicks" : 1139.0,
"handballs" : 787.0,
"disposals" : 1926.0,
"marks" : 285.0,
"bounces" : 8.0,
"tackles" : 403.0,
"contestedPossessions" : 867.0,
"uncontestedPossessions" : 1060.0,
"totalPossessions" : 1927.0,
"inside50s" : 452.0,
"marksInside50" : 14.0,
"contestedMarks" : 24.0,
"hitouts" : 8.0,
"onePercenters" : 156.0,
"disposalEfficiency" : 69.2,
"clangers" : 237.0,
"freesFor" : 85.0,
"freesAgainst" : 101.0,
"dreamTeamPoints" : 7716.0,
"clearances" : {
"centreClearances" : 141.0,
"stoppageClearances" : 233.0,
"totalClearances" : 374.0
},
"rebound50s" : 166.0,
"goalAssists" : 59.0,
"goalAccuracy" : 44.4,
"ratingPoints" : null,
"ranking" : null,
"interchangeCounts" : null
},
"yearlySeasonStats" : [ {
"year" : "2014",
"seasonId" : "CD_S2014014",
"totalsAndAverages" : {
"averages" : {
"stats" : {
"goals" : 0.0,
"behinds" : 0.3,
"superGoals" : null,
"kicks" : 8.0,
"handballs" : 7.7,
"disposals" : 15.7,
"marks" : 3.7,
"bounces" : 0.0,
"tackles" : 2.7,
"contestedPossessions" : 9.7,
"uncontestedPossessions" : 6.0,
"totalPossessions" : 15.7,
"inside50s" : 0.7,
"marksInside50" : 0.0,
"contestedMarks" : 0.3,
"hitouts" : 0.0,
"onePercenters" : 2.3,
"disposalEfficiency" : null,
"clangers" : 1.7,
"freesFor" : 0.7,
"freesAgainst" : 0.7,
"dreamTeamPoints" : 60.0,
"clearances" : {
"centreClearances" : 0.7,
"stoppageClearances" : 1.7,
"totalClearances" : 2.3
},
"rebound50s" : 3.0,
"goalAssists" : 0.0,
"goalAccuracy" : null,
"ratingPoints" : null,
"ranking" : null,
"interchangeCounts" : null
},
"player" : {
"playerId" : "CD_I271072",
"playerName" : {
"givenName" : "Daniel",
"surname" : "Rich"
},
"captain" : false,
"playerJumperNumber" : null
},
"teamId" : "CD_T20",
"gamesPlayed" : 3.0,
"timeOnGroundPercentage" : null
},
"totals" : {
"stats" : {
"goals" : 0.0,
"behinds" : 1.0,
"superGoals" : null,
"kicks" : 24.0,
"handballs" : 23.0,
"disposals" : 47.0,
"marks" : 11.0,
"bounces" : 0.0,
"tackles" : 8.0,
"contestedPossessions" : 29.0,
"uncontestedPossessions" : 18.0,
"totalPossessions" : 47.0,
"inside50s" : 2.0,
"marksInside50" : 0.0,
"contestedMarks" : 1.0,
"hitouts" : 0.0,
"onePercenters" : 7.0,
"disposalEfficiency" : 72.3,
"clangers" : 5.0,
"freesFor" : 2.0,
"freesAgainst" : 2.0,
"dreamTeamPoints" : 180.0,
"clearances" : {
"centreClearances" : 2.0,
"stoppageClearances" : 5.0,
"totalClearances" : 7.0
},
"rebound50s" : 9.0,
"goalAssists" : 0.0,
"goalAccuracy" : 0.0,
"ratingPoints" : 495.3,
"ranking" : 22.0,
"interchangeCounts" : null
},
"player" : {
"playerId" : "CD_I271072",
"playerName" : {
"givenName" : "Daniel",
"surname" : "Rich"
},
"captain" : false,
"playerJumperNumber" : null
},
"teamId" : "CD_T20",
"gamesPlayed" : 3.0,
"timeOnGroundPercentage" : 63.3
}
}
}, // etc.
],
"seasonStats" : {
"goals" : 0.0,
"behinds" : 1.0,
"superGoals" : null,
"kicks" : 24.0,
"handballs" : 23.0,
"disposals" : 47.0,
"marks" : 11.0,
"bounces" : 0.0,
"tackles" : 8.0,
"contestedPossessions" : 29.0,
"uncontestedPossessions" : 18.0,
"totalPossessions" : 47.0,
"inside50s" : 2.0,
"marksInside50" : 0.0,
"contestedMarks" : 1.0,
"hitouts" : 0.0,
"onePercenters" : 7.0,
"disposalEfficiency" : 72.3,
"clangers" : 5.0,
"freesFor" : 2.0,
"freesAgainst" : 2.0,
"dreamTeamPoints" : 180.0,
"clearances" : {
"centreClearances" : 2.0,
"stoppageClearances" : 5.0,
"totalClearances" : 7.0
},
"rebound50s" : 9.0,
"goalAssists" : 0.0,
"goalAccuracy" : 0.0,
"ratingPoints" : 495.3,
"ranking" : 22.0,
"interchangeCounts" : null
},
"latestPlayerRating" : {
"position" : "MIDFIELDER",
"roundId" : "CD_R201401407",
"player" : {
"playerId" : "CD_I271072",
"playerName" : {
"givenName" : "Daniel",
"surname" : "Rich"
},
"captain" : false,
"playerJumperNumber" : null
},
"team" : {
"teamId" : "CD_T20",
"teamAbbr" : "BL",
"teamName" : "Brisbane Lions",
"teamNickname" : "Lions"
},
"detailedRatings" : [ {
"ratingPoints" : 478,
"ranking" : 28,
"ratingType" : "OVERALL",
"trend" : "FALLING_FAST"
}, {
"ratingPoints" : 478,
"ranking" : 1,
"ratingType" : "TEAM",
"trend" : "NO_CHANGE"
}, {
"ratingPoints" : 478,
"ranking" : 24,
"ratingType" : "POSITION",
"trend" : "FALLING_FAST"
} ]
},
"careerGamesPlayed" : 101
}
}
和
{
"playerRatings" : [
{
"position": "MIDFIELDER",
"roundId": "CD_R201401407",
"player": {
"playerId": "CD_I271072",
"playerName": {
"givenName": "Daniel",
"surname": "Rich"
},
"captain": false,
"playerJumperNumber": null
},
"team": {
"teamId": "CD_T20",
"teamAbbr": "BL",
"teamName": "Brisbane Lions",
"teamNickname": "Lions"
},
"detailedRatings": [
{
"ratingPoints": 478,
"ranking": 28,
"ratingType": "OVERALL",
"trend": "FALLING_FAST"
},
{
"ratingPoints": 478,
"ranking": 1,
"ratingType": "TEAM",
"trend": "NO_CHANGE"
},
{
"ratingPoints": 478,
"ranking": 24,
"ratingType": "POSITION",
"trend": "FALLING_FAST"
}
]
},
// etc.
],
"pageNum" : 1,
"pageSize" : 100,
"pagesTotal" : 1,
"ratingsTotal" : 61
}
使用此功能对您有利。 AJAX请求确实需要在请求标头中设置X-media-mis-token
标记;这是通过使用会话(跟踪cookie)和POST到API URL获得的。
使用带有BeautifulSoup的requests
库的示例脚本如下所示:
import requests
from bs4 import BeautifulSoup
page_url = 'http://www.afl.com.au/afl/stats/player-ratings/overall-standings'
token_url = 'http://www.afl.com.au/api/cfs/afl/WMCTok'
player_url = 'http://www.afl.com.au/api/cfs/afl/playerProfile/'
session = requests.Session()
r = session.get(page_url)
soup = BeautifulSoup(r.content)
token = session.post(token_url).json()['token']
for player in soup.find_all('a', {'data-playerid': True}):
playerid = player['data-playerid']
data_r = session.get(player_url + playerid, headers={
'X-media-mis-token': token})
profile = data_r.json()['playerProfile']
print profile['firstName'], profile['surname'], profile['position']
最后但并非最不重要的是,请注意令牌POST响应包含免责声明:
>>> print session.post(token_url).json()['disclaimer']
All content and material contained within this site is protected by copyright owned by or licensed to Telstra. Unauthorised reproduction, publishing, transmission, distribution, copying or other use is prohibited.
开始使用此数据时请考虑到这一点。
答案 1 :(得分:-2)