python selenium返回的列表不完整

时间:2019-08-02 08:04:41

标签: python selenium scroll

我想从Soundcloud配置文件中返回所有喜欢的曲目,并且我必须向下滚动到页面底部,以动态加载ul中的所有li元素。列表项数为2589。

我的问题是,它仅从列表中获得1978条音轨,因此第一条和最后一条音轨都在其中。这意味着之间缺少歌曲。

项目数量是否过多?

这是我的代码:

from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from time import sleep

SCROLL_PAUSE_TIME = 1


def browse_liked_tracks() -> list:
    options = Options()
    options.headless = True
    firefox_profile = webdriver.FirefoxProfile()
    firefox_profile.set_preference('permissions.default.image', 2)
     firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', False)
    driver = webdriver.Firefox(options=options, firefox_profile=firefox_profile)
    driver.get('https://soundcloud.com/officialdekon/likes')
    driver.implicitly_wait(5)

    while True:
        last_height = driver.execute_script("return document.body.scrollHeight")

        # Scroll down to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait to load page
        sleep(SCROLL_PAUSE_TIME)

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:

            # try again (can be removed)
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

            # Wait to load page
            sleep(SCROLL_PAUSE_TIME)

            # Calculate new scroll height and compare with last scroll height
            new_height = driver.execute_script("return document.body.scrollHeight")

            # check if the page height has remained the same
            if new_height == last_height:
                # if so, you are done
                break
            # if not, move on to the next loop

    tracks = [t.get_attribute('aria-label').split('Track:')[1] for t in driver.find_elements_by_xpath('//div[@class="sound streamContext"]')]

    driver.quit()

    return tracks


def file_writer(tracks: list):
    with open('dekon_liked_tracks.txt', 'w') as f:
        for track in tracks:
            f.write('{}\n'.format(track))


def main():
    file_writer(browse_liked_tracks())


if __name__ == '__main__':
    main()

0 个答案:

没有答案