使用Selenium从csv自动填充表格

时间:2018-03-14 09:17:21

标签: python python-3.x selenium selenium-webdriver web-scraping

我在抓这个网站:

https://login.aviva.com.sg/directinsurance/homeinsurance.htm

我想填写csv文件中的所有表单元素(我们称之为“profil”。当我尝试用独特的“profil”填充表单时,一切都运行正常。但是当我在循环上执行不同的配置文件我有多个问题:

  • 有时我无法从邮政编码中“获取”完整地址(见下文),因此我无法获得最终报价

  • 司机很爽,但我没有得到profil中不同人的引用。 这是我的一种形式:

              profil = [["MRS     ","Corinne","SIMON","F","M","600     ","No, for myself and my family","72603190","2017-H1","CO      ","Ridout Road","10","91 - 124","27 - 38","099197","S4553141D","1958","5","1"],
    
              ["MS      ","Corinne","MOREAU","F","D","610     ","Yes, for myself","63856280","2017-H1","CO      ","Stevens","10","38 - 208","24 - 40","099198","S9186686B","1999","10","1"],
    
              ["MDM     ","Corinne","DUBOIS","F","W","620     ","Yes,for my family","71852991","2017-H1","CO      ","Stevens","10","38 - 208","24 - 40","099200","S2243858A","1974","2","1"]
    
        ]`
    

这是我做过的python代码:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from selenium.webdriver import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import pandas as pd
import csv



driver = webdriver.Firefox()

driver.get("https://login.aviva.com.sg/directinsurance/homeinsurance.htm")



for people in profil: 

  dropdown_salutation = 
  Select(driver.find_element_by_name("person.salutationRef"))
  dropdown_occupation = 
  Select(driver.find_element_by_name("person.occupationRef"))



  dropdown_maritalstatus = 
  Select(driver.find_element_by_name("person.maritalstat")) 
  dropdown_gender=Select(driver.find_element_by_name("person.gender"))
  dropdown_dobDay= Select(driver.find_element_by_name("dobDay"))
  dropdown_dobMonth=Select(driver.find_element_by_name("dobMonth")) 
  dropdown_dobYear=Select(driver.find_element_by_name("dobYear"))
  dropdown_declaration1=Select(driver.find_element_by_name("declaration1"))
  dropdown_declaration2=Select(driver.find_element_by_name("declaration2"))

#now we look for all the other element that we can fill (we select by id first)
  FamilyName_input = driver.find_element_by_id("surname")
  GivenName_input  = driver.find_element_by_id("givname")
  NRIC_input       = driver.find_element_by_id("nric")
  PostalCode       = driver.find_element_by_id("postalCode")
  MobileNo         = driver.find_element_by_id("textfield5")
  Email            = driver.find_element_by_id("email")


# Then we fill everything
  dropdown_salutation.select_by_value(people[0])
  GivenName_input.send_keys(people[1])
  FamilyName_input.send_keys(people[2])
  dropdown_gender.select_by_value(people[3])
  dropdown_maritalstatus.select_by_value(people[4])
  dropdown_occupation.select_by_value(people[5])
  MobileNo.send_keys(people[7])
  NRIC_input.send_keys(people[15])
  dropdown_dobYear.select_by_value("people[16]")
  dropdown_dobMonth.select_by_value(people[17])
  dropdown_dobDay.select_by_value(people[18])
  Email.send_keys("ada@hotmail.com")
  dropdown_declaration1.select_by_value("Y")
  dropdown_declaration2.select_by_value("Y")


  PostalCode.send_keys(people[14])
  wait = WebDriverWait(driver, 30)

# Now we can get the full address based on the postal code we provide
#here I have a first problem
  driver.find_element_by_id("btnAddress").click()

  wait = WebDriverWait(driver, 30)
  element = wait.until(EC.element_to_be_clickable((By.ID, 'immediateFamilySaf')))




  dropdown_declaration3= 
  Select(driver.find_element_by_name("policy.immediateFamilySaf"))
  dropdown_declaration3.select_by_value("N")

# Now we click on next to move forward on the second page of the form

  Next = driver.find_element_by_css_selector("a[onclick*=checkFirstTab]")
  Next.click()


  UnitNo = 
  driver.find_element_by_css_selector("a[onclick*=proceedNoUnitNo]")
  UnitNo.click()

  #Now we can fill the "cover needed" form 
  dropdown_plan=Select(driver.find_element_by_name("homeProd.planTypeRef"))
  dropdown_dwelling = 
  Select(driver.find_element_by_name("homeProd.dwellingTypeRef"))

  dropdown_insureadr= 
  Select(driver.find_element_by_name("homeProd.addressType"))
  dropdown_coverday=Select(driver.find_element_by_name("coverStartDay"))
  dropdown_covermonth=Select(driver.find_element_by_name("coverStartMonth"))
  dropdown_coveryear=Select(driver.find_element_by_name("coverStartYear"))
  dropdown_plan.select_by_value("HI      ")
  dropdown_dwelling.select_by_value(people[9])
  dropdown_insureadr.select_by_value("S")
  dropdown_coverday.select_by_value("1")
  dropdown_covermonth.select_by_value("4")
  dropdown_coveryear.select_by_value("2018")

# Now we can grab the next button and pass to the third tab
  SecondTab = driver.find_element_by_name("_target0")
  SecondTab.click()

#now we can grab the quote 
  ThirdTab = driver.find_element_by_name("_target1")
  ThirdTab.click()

  time.sleep(3)
  driver.save_screenshot('img' + people[2] + '.png')
  html= driver.page_source
  doc=

# We can feed that into Beautiful Soup
doc = BeautifulSoup(html, "html.parser")


rows = doc.find('table', id='table-termsofplan').find_all('td', attrs={'class': None})

premiums = []
for row in rows:

# Find the ones that don't have 'style' as an attribute
if 'style' in row.attrs:
    # Skip it! It's a header or footer row
    pass
else:
   premium={
           'type of plan': rows[1].text,
           '12 Months premium':rows[2].text,
           '24 Months premium':rows[3].text,
           '36 Months premium':rows[4].text,
           'Total Premium 12 Months':rows[10].text,
           'Total Premium 24 Months':rows[11].text,
           'Total Premium 36 Months':rows[12].text,
           'Goods and services Tax 12 Months':rows[14].text,
           'Goods and services Tax 24 Months':rows[15].text,
           'Goods and services Tax 36 Months':rows[16].text,
           'Single Payment 12 Months':rows[19].text,
           'Single Payment 24 Months':rows[20].text,
           'Single Payment 36 Months':rows[21].text,
                          }


premiums.append(premium)
driver.get("https://login.aviva.com.sg/directinsurance/homeinsurance.htm")

driver.close()

import pandas as pd

premium_df = pd.DataFrame(premiums)


premium_df.to_csv("premium.csv", index=False)

0 个答案:

没有答案