我想从Soundcloud配置文件中返回所有喜欢的曲目,并且我必须向下滚动到页面底部,以动态加载ul中的所有li元素。列表项数为2589。
我的问题是,它仅从列表中获得1978条音轨,因此第一条和最后一条音轨都在其中。这意味着之间缺少歌曲。
项目数量是否过多?
这是我的代码:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from time import sleep
SCROLL_PAUSE_TIME = 1
def browse_liked_tracks() -> list:
options = Options()
options.headless = True
firefox_profile = webdriver.FirefoxProfile()
firefox_profile.set_preference('permissions.default.image', 2)
firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', False)
driver = webdriver.Firefox(options=options, firefox_profile=firefox_profile)
driver.get('https://soundcloud.com/officialdekon/likes')
driver.implicitly_wait(5)
while True:
last_height = driver.execute_script("return document.body.scrollHeight")
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
# try again (can be removed)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
# check if the page height has remained the same
if new_height == last_height:
# if so, you are done
break
# if not, move on to the next loop
tracks = [t.get_attribute('aria-label').split('Track:')[1] for t in driver.find_elements_by_xpath('//div[@class="sound streamContext"]')]
driver.quit()
return tracks
def file_writer(tracks: list):
with open('dekon_liked_tracks.txt', 'w') as f:
for track in tracks:
f.write('{}\n'.format(track))
def main():
file_writer(browse_liked_tracks())
if __name__ == '__main__':
main()