Question

此循环使用了大量的ram。对于20kb的文本文件，有人可以帮助我将其格式化为迭代而不是递归吗？当我进入3-4gb的ram使用时，我不断遇到递归错误。我尝试与open一起使用以关闭流并使其更具pythonic性。这个方法循环只能读取大约10分钟的数据，然后才对我退出。

def getgameticks():
    gameticksurl = 'https://pro.stubhub.com/simweb/sim/services/priceanalysis?eventId=' + variable + '&sectionId=0'
    print(gameticksurl)
    # options = Options()
    # options.add_argument("--headless")
    # browser = webdriver.Firefox()#firefox_options=options)
    browser.get(gameticksurl)
    global wait
    wait = WebDriverWait(browser, 30)
    sleep(3)
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(3)
    wait.until(expected_conditions.presence_of_element_located((By.ID, 'listingsPerPage')))
    browser.find_element_by_id('listingsPerPage').click
    sleep(2)
    select = Select(browser.find_element_by_id('listingsPerPage'))
    select.select_by_visible_text('150')
    gameinfo()
global trip
trip = False
def gameinfo():
    wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="filterBtn"]')))
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    html_doc = browser.page_source
    soup = BeautifulSoup(html_doc, 'html.parser')
    wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="listingPageNumber"]')))
    try:
        select = Select(browser.find_element_by_xpath('//*[@id="listingPageNumber"]'))
        current = select.all_selected_options[0].text
        last = [option.text for option in select.options][-1]
        pronto = False
    except:
        print('Something broke...Getting around it though...')
        gameinfo()

    if current == last:
        global trip
        trip = True
        browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
        wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="filterBtn"]')))
        browser.find_element_by_xpath('//*[@id="filterBtn"]').click()
        wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="filterBtn"]')))
        gameinfo()
    else:
        wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="listingNextBtn"]')))
        browser.find_element_by_xpath('//*[@id="listingNextBtn"]').click()
        pass
    dir_path = os.path.dirname(os.path.realpath(__file__))
    file_path = (dir_path+'\Sheets')
    try:
        os.makedirs(file_path)
    except:
        pass
    #######################
    for mytable in soup.find_all('table'):
        for trs in mytable.find_all('tr'):
            tds = trs.find_all('td')
            row1 = [elem.text.strip() for elem in tds]
            row = str(row1)
            cool = row.replace("[", "")
            coolp = cool.replace("]", "")
            cool2 = coolp.replace("'", "")
            cool3 = cool2.replace(" , ", "")
            row = cool3
            rowtest = (row.split(','))
            if len(rowtest) != 5:
                rowtest = ['NULL', 'NULL', 'NULL', 'NULL', 'NULL']
            row = (','.join(rowtest))
            rowtest0 = rowtest[:4] # LISTING WITHOUT DAYS LISTED
            rowtest1 = rowtest[0:1] # SECTION LOCATION
            rowtest2 = rowtest[1:2] # TICKET PRICE
            rowtest3 = rowtest[2:3] # ROW
            rowtest4 = rowtest[3:4] # TICKET QTY  
            rowtest5 = rowtest[4:5] # DAYS LISTED
            ###TABLE STUFF#

            row0 = (','.join(rowtest0)) #ROW STRING WITHOUT DAYS LISTED
            with open(file_path+'\\'+variable+'.txt', "a+") as openit:
                pass

            #TABLE STUFF
            with open(file_path+'\\'+variable+'.txt', "r+") as file:
                for line in file:
                    linez = (line.split(',')) #LINE AS LIST
                    linezprice = (linez[-3]) #LINE PRICE
                    if row0+"\n" in line:
                        break

                else:
                    file.write(row0+"\n") 
                    print(row)
                    if trip == False:
                        pass
                    else:
                        slack_token1 = 'xoxb-420561995540-420693438947-JAZmP1pdfg6FkqnTTziPdggr'
                        sc1 = SlackClient(slack_token1)

                        sc1.api_call(
                            "chat.postMessage",
                            channel=channame,
                            text=row
                        )




    while True:
        gameinfo()

Answer 1

您似乎想不断抓取某些网站- 只需删除除无穷循环之外对gameinfo的所有调用-没有理由将其作为递归进行

使函数迭代而不是递归

1 个答案: