Python / Beautiful Soup与PhamtomJS结合无CSV输出

时间:2016-07-04 21:06:46

标签: python phantomjs

我们有一位来自前同事的继承python脚本。

我对蟒蛇,美丽的汤和PhamtonJS的知识非常有限。

以下是用于工作的代码,但现在它已不再有效。

我可以知道结果的原因是什么,而不是输出到CSV?

import re
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import requests
from bs4 import BeautifulSoup
import io
import datetime

i = datetime.datetime.now()
root = "https://www.realtor.ca/Residential/Map.aspx#CultureId=1&ApplicationId=1&RecordsPerPage=9&MaximumResults=9&PropertySearchTypeId=3&TransactionTypeId=3&StoreyRange=0-0&BedRange=0-0&BathRange=0-0&OwnershipTypeGroupId=2&LongitudeMin=-79.41555261611944&LongitudeMax=-79.38158512115484&LatitudeMin=43.638402570084274&LatitudeMax=43.6513524609693&SortOrder=A&SortBy=1&PolygonPoints=-79.40135836601263 43.64075673866837,-79.4046843051911 43.648784443139725,-79.39432024955755 43.65064758671808,-79.39120888710028 43.64339652776325,-79.40127253532415 43.64080332418719,-79.40135836601263 43.64075673866837&PolyZoomLevel=16&viewState=l&Longitude=-79.3954467773438&Latitude=43.6476898193359&ZoomLevel=12&CurrentPage=1&PropertyTypeGroupID="

browser = webdriver.PhantomJS('C:\\Users\\Documents\\Python Scripts\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')

with io.open('//baxter/shared/Revenue Management/Competition/Data Scrape Files/Fashion16-07-04.csv', 'a', encoding='utf8') as logfile:
        logfile.write("Company")
        logfile.write(",")
        logfile.write("Property")
        logfile.write(",")
        logfile.write("Suite Type")
        logfile.write(",")
        logfile.write("Size")
        logfile.write(",")
        logfile.write("Bathrooms")
        logfile.write(",")
        logfile.write("Rent")
        logfile.write(",")
        logfile.write("Date")
        logfile.write("\n")

        for prop in ["1","2","3","4","5","6","7","8","9","10",
                     "11","12","13","14","15","16","17","18","19","20",
                     "21","22","23","24","25","26","27","28","29","30",
                     "31","32","33","34","35","36","37","38","39","40",
                     "41","42","43","44","45","46","47","48","49","50",
                     "51","52","53","54","55","56","57","58","59","60",
                     "61","62","63","64","65","66","67","68","69","70",
                     "71","72","73","74","75","76","77","78","79","80",
                     "81","82","83","84","85","86","87","88","89","90"]:

                url = root + prop
                browser.get(url)
                time.sleep(2)

                html = browser.page_source
                soup = BeautifulSoup(html)

                Price = soup.find_all("div", {"class": "m_property_lst_cnt_property_price"})
                unit_type = soup.find_all("div", {"class": "m_property_lst_cnt_details_bed_bath"})

                address = soup.find_all("div", {"class": "m_property_lst_hdr_rgt"})
                description = soup.find_all("div", {"class": "m_property_lst_cnt_realtor_property_description"})

                link = soup.find_all ("a", {"class": "property_lst_cnt_property_lnktop"})
                for x in range(0, len(Price)):
                        Price2 = Price[x]
                        unit = unit_type[x].contents[0]
                        location = address[x].contents[0]
                        descript = description[x]
                        link2 = link[x]

                        logfile.write("Condo")
                        logfile.write(",")
                        logfile.write(location.text.replace(',',' ').strip())
                        logfile.write(",")
                        logfile.write(link2.get("href"))
                        logfile.write(",")
                        logfile.write(unit.text.strip())
                        logfile.write(",")
                        logfile.write(",")
                        logfile.write(Price2.text.replace('/Monthly','').replace(',','').strip())
                        logfile.write(",")
                        logfile.write("%s-%s-%s" % (i.year, i.month, i.day))
                        logfile.write("\n")

browser.quit()

请注意,我有一个三角形'除了以下。它说'...导入但未使用。',其中......是',' selenium.webdriver.com ...'等

import re    
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import requests

0 个答案:

没有答案