Question

我在一些英超联赛中收集了这种类型的收藏：

{"HomeTeam": "Chelsea", "HTScore": 2, "FTR": 'D', "AwayTeam": "Everton", "ATScore":2},
{"HomeTeam": "Chelsea", "HTScore": 3, "FTR": 'H', "AwayTeam": "Wolves", "ATScore":1},
{"HomeTeam": "Chelsea", "HTScore": 2, "FTR": 'D', "AwayTeam": "Everton", "ATScore":2},
.....
{"HomeTeam": "Liverpool", "HTScore": 1, "FTR": 'A', "AwayTeam": "Aston Villa", "ATScore":3}
{"HomeTeam": "Liverpool", "HTScore": 6, "FTR": 'H', "AwayTeam": "Chelsea", "ATScore":3}
etc.

其中 HomeTeam 和 AwayTeam 代表比赛的两支球队，HTScore 代表 HomeTeam 得分，ATScore 代表 de AwayTeam 得分，FTR 代表全职结果（'A'：客队获胜，'D'：平局，'H'：主队获胜）

我想创建一个这种类型的表，在上面的例子中是这样的

HomeTeam  | Number of Victories | Number of Draws | Number of Defeats | Max Goals scored in a Match
---------------------------------------------------------------------------------------
Chelsea   |         1           |        2        |         0         |          3
---------------------------------------------------------------------------------------
Liverpool |         1           |        0        |         1         |          6

有没有办法通过迭代 FTR 的值来自动创建这些字段？预先非常感谢您

Answer 1

您可以通过这种方式使用聚合管道和 import codecs import csv import time from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from check_file import check_file project_path = 'C:\\Users\\Shyam\\PycharmProjects\\TNregi_Scrape\\' logs_path = f'{project_path}logs\\' def myscraper(mar_type, mar_place, mar_year): options = Options() options.headless = True options.add_argument("--window-size=1920,1200") print('loading...\n') print(f'--------------> {mar_place}\n') print("running myScraper...") start_no = check_file(mar_type, mar_place, mar_year) location = mar_place year = mar_year # < file_path = (f'{project_path}{mar_type}\\{mar_year}\\') file_name = f'{file_path}RECORDS_{mar_type}_{location}_{year}.csv' # FILENAME HERE <---- print(f'data will be saved to > {file_name}\n') driver = webdriver.Chrome(options=options, executable_path='/chromedriver') driver.get('https://tnreginet.gov.in/portal/') delay = 60 # seconds time.sleep(3) print('navigating to form...') en = driver.find_element_by_xpath('//*[@id="fontSelection"]').click() # change site to english # navigating to form more = driver.find_element_by_xpath('//*[@id="1195002"]/a') search1 = driver.find_element_by_xpath('//*[@id="8500020"]/a') hov_mar = driver.find_element_by_xpath('//*[@id="90000403"]/a') hover = ActionChains(driver).move_to_element(more).move_to_element(search1).move_to_element(hov_mar) hover.click().perform() time.sleep(0.5) # wait till load try: myElem = WebDriverWait(driver, delay).until(EC.invisibility_of_element_located((By.ID, 'statusbar'))) print("Page is ready!") except: print("navigating took too much time!") driver.quit() try: for x in range(start_no, 2000): # looping for each reg number print('__________START LOOP__________') # filling form m_type = driver.find_element_by_xpath('//*[@id="cmb_marrType"]').click() if mar_type == 'TMR1': sel = driver.find_element_by_xpath('//*[@id="cmb_marrType"]/option[3]').click() # TN MAR FORM I elif mar_type == 'HMR': sel = driver.find_element_by_xpath('//*[@id="cmb_marrType"]/option[2]').click() # HINDU MARRIAGE search_by = driver.find_element_by_xpath('//*[@id="Search_Criteria_Two"]') hover = ActionChains(driver).move_to_element(search_by) hover.click().perform() office = driver.find_element_by_xpath('//*[@id="cmb_sub_registrar_office"]').send_keys(location) in_reg_no = driver.find_element_by_xpath('//*[@id="RegNO1"]').send_keys(x) in_year = driver.find_element_by_xpath('//*[@id="Year"]').send_keys(year) submit = driver.find_element_by_xpath('//*[@id="CopyOfMarriageSearch"]/div[2]/div/div[18]/input') hover = ActionChains(driver).move_to_element(submit) hover.click().perform() # click submit print(f'Loading reg no: {x} in {location}') ###### WAIT till page load ###### time.sleep(0.5) try: myElem = WebDriverWait(driver, delay).until(EC.invisibility_of_element_located((By.ID, 'statusbar'))) #wait till loading gif disappear time.sleep(0.5) new_reg_no = driver.find_element_by_xpath('//*[@id="MarriageMstListDisp"]/tbody/tr/td[1]').text if res_reg_no == new_reg_no: #additional wait in case of duplicate old data due to javascript rendering time.sleep(3) print("table is ready!") except: print("Loading table took too much time!") ###### EXTRACT DATA FROM TABLE #####p print('Saerching for table to Extract data...') res_reg_no = driver.find_element_by_xpath('//*[@id="MarriageMstListDisp"]/tbody/tr/td[1]').text res_hus = driver.find_element_by_xpath('//*[@id="MarriageMstListDisp"]/tbody/tr/td[2]').text res_wife = driver.find_element_by_xpath('//*[@id="MarriageMstListDisp"]/tbody/tr/td[3]').text res_w_par = driver.find_element_by_xpath('//*[@id="MarriageMstListDisp"]/tbody/tr/td[8]').text print('Table Found') print('-----------------------') print(f'| {location} | {res_wife}') print('-----------------------') # mType.send_keys('- Select -') print('start csv write...') ##### write to CSV FILE ##### with codecs.open(file_name, mode='a', encoding='utf-8') as RECORDS_file: employee_writer = csv.writer(RECORDS_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) employee_writer.writerow([x, res_reg_no, res_hus, res_wife, res_w_par]) print('Write to CSV success !') print('**********END**********') driver.quit() except: # driver.quit() # if error caused by invalid reg number (reg number not present, max value reached) close the driver print('\n \n \n') print('+++++++++++++ REQUIRES ATTENTION +++++++++++++\n') print('\n \n \n') print('error in ---= >', x) with codecs.open(f"{logs_path}Completed_{mar_type}_{year}.txt", mode='a+', encoding='utf-8') as completed_file: print('error in --->', x) completed_file.write(f' Ended at ----->>>>> Place: {mar_place} | loop no: {x} | RegNo: {res_reg_no}| year: {year} \n') print(f" Ended at ----->>>>> Place: {mar_place} | loop no: {x} | RegNo: {res_reg_no}| year: {year} \n") print('error in --->', x) time.sleep(0.5) driver.quit() print('error in --->', x) print('\n') print('error in --->', x) driver.quit() driver.quit() # myscraper('TMR1', 1, 'ADAYAR', 2015) #REMOVE AT END for test run：

$group 获取主队数据。在这里，您为每个字段添加一个名为 $group 的新字段，用于存储最终结果，另一个名为 result 的字段用于存储进球数。
然后使用 goals 设置结果中有多少个“D”、“H”或“A”，即有多少胜利、失败或平局。
并且至少使用 $filter 来获取您想要的值。在这种情况下，数组的 $project 和进球的 size 值。

max

示例here

MongoDB .- 为数组的每个元素创建一个字段

1 个答案: