此循环使用了大量的ram。对于20kb的文本文件,有人可以帮助我将其格式化为迭代而不是递归吗?当我进入3-4gb的ram使用时,我不断遇到递归错误。我尝试与open一起使用以关闭流并使其更具pythonic性。这个方法循环只能读取大约10分钟的数据,然后才对我退出。
def getgameticks():
gameticksurl = 'https://pro.stubhub.com/simweb/sim/services/priceanalysis?eventId=' + variable + '§ionId=0'
print(gameticksurl)
# options = Options()
# options.add_argument("--headless")
# browser = webdriver.Firefox()#firefox_options=options)
browser.get(gameticksurl)
global wait
wait = WebDriverWait(browser, 30)
sleep(3)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(3)
wait.until(expected_conditions.presence_of_element_located((By.ID, 'listingsPerPage')))
browser.find_element_by_id('listingsPerPage').click
sleep(2)
select = Select(browser.find_element_by_id('listingsPerPage'))
select.select_by_visible_text('150')
gameinfo()
global trip
trip = False
def gameinfo():
wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="filterBtn"]')))
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
html_doc = browser.page_source
soup = BeautifulSoup(html_doc, 'html.parser')
wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="listingPageNumber"]')))
try:
select = Select(browser.find_element_by_xpath('//*[@id="listingPageNumber"]'))
current = select.all_selected_options[0].text
last = [option.text for option in select.options][-1]
pronto = False
except:
print('Something broke...Getting around it though...')
gameinfo()
if current == last:
global trip
trip = True
browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="filterBtn"]')))
browser.find_element_by_xpath('//*[@id="filterBtn"]').click()
wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="filterBtn"]')))
gameinfo()
else:
wait.until(expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="listingNextBtn"]')))
browser.find_element_by_xpath('//*[@id="listingNextBtn"]').click()
pass
dir_path = os.path.dirname(os.path.realpath(__file__))
file_path = (dir_path+'\Sheets')
try:
os.makedirs(file_path)
except:
pass
#######################
for mytable in soup.find_all('table'):
for trs in mytable.find_all('tr'):
tds = trs.find_all('td')
row1 = [elem.text.strip() for elem in tds]
row = str(row1)
cool = row.replace("[", "")
coolp = cool.replace("]", "")
cool2 = coolp.replace("'", "")
cool3 = cool2.replace(" , ", "")
row = cool3
rowtest = (row.split(','))
if len(rowtest) != 5:
rowtest = ['NULL', 'NULL', 'NULL', 'NULL', 'NULL']
row = (','.join(rowtest))
rowtest0 = rowtest[:4] # LISTING WITHOUT DAYS LISTED
rowtest1 = rowtest[0:1] # SECTION LOCATION
rowtest2 = rowtest[1:2] # TICKET PRICE
rowtest3 = rowtest[2:3] # ROW
rowtest4 = rowtest[3:4] # TICKET QTY
rowtest5 = rowtest[4:5] # DAYS LISTED
###TABLE STUFF#
row0 = (','.join(rowtest0)) #ROW STRING WITHOUT DAYS LISTED
with open(file_path+'\\'+variable+'.txt', "a+") as openit:
pass
#TABLE STUFF
with open(file_path+'\\'+variable+'.txt', "r+") as file:
for line in file:
linez = (line.split(',')) #LINE AS LIST
linezprice = (linez[-3]) #LINE PRICE
if row0+"\n" in line:
break
else:
file.write(row0+"\n")
print(row)
if trip == False:
pass
else:
slack_token1 = 'xoxb-420561995540-420693438947-JAZmP1pdfg6FkqnTTziPdggr'
sc1 = SlackClient(slack_token1)
sc1.api_call(
"chat.postMessage",
channel=channame,
text=row
)
while True:
gameinfo()
答案 0 :(得分:1)
您似乎想不断抓取某些网站- 只需删除除无穷循环之外对gameinfo的所有调用-没有理由将其作为递归进行