Python忽略字符并从for循环列表中打印下一个字符

时间:2016-10-01 10:16:51

标签: python beautifulsoup export-to-csv

我正在使用BeautifulSoup4并要求从网站上抓取信息。

然后我将所需信息存储在列表中,我从页面中抓取了两种不同类型信息的列表。

    try:
        for i in range(0,1000):
            location = dive_data1[((9*i)-7)].text
            locations.append(location)
            location = dive_data2[((9*i)-7)]
            locations.append(location)
            depth = dive_data1[((9*i)-6)].text
            depths.append(depth)
            depth = dive_data2[((9*i)-6)].text
            depths.append(depth)

    except:
        pass

之后,我尝试将这些列表传递给另一个for循环,以将内容写入CSV文件。

    try:
        writer = csv.writer(dive_log)
        writer.writerow( ("Locations and depths") )
        writer.writerow( ("Sourced from:", str(url_page)) )
        writer.writerow( ("Location", "Depth") )
        for i in range(len(locations)):
            writer.writerow( (locations[i], depths[i]) )

当我运行脚本时,我收到了这个错误:

writer.writerow( (locations[i], depths[i]) )
UnicodeEncodeError: 'ascii' codec can't encode characters in position 65-66:      ordinal not in range(128)

我试过这个传递无法编码的字符:

    writer = csv.writer(dive_log)
    writer.writerow( ("Locations and depths") )
    writer.writerow( ("Sourced from:", str(url_page)) )
    writer.writerow( ("Location", "Depth") )
    for i in range(len(locations)):
        try:
            writer.writerow( (locations[i], depths[i]) )

        except:
            pass

当我运行它时,只有for循环之前的行被激活,它完全通过了for循环的重复。

我的脚本中的全部代码都会在下面复制,以防它与我在其他地方没有看到的内容相关。

import csv
from bs4 import BeautifulSoup
import requests

dive_log = open("divelog.csv", "wt")
url_page = "https://en.divelogs.de/log/Mark_Gosling"
r = requests.get(url_page)
soup = BeautifulSoup(r.content)

dive_data1 = soup.find_all("tr", {"class": "td2"})
dive_data2 = soup.find_all("td", {"class": "td"})
locations = []
depths = []

try:
    for i in range(0,1000):
        location = dive_data1[((9*i)-7)].text
        locations.append(location)
        location = dive_data2[((9*i)-7)]
        locations.append(location)
        depth = dive_data1[((9*i)-6)].text
        depths.append(depth)
        depth = dive_data2[((9*i)-6)].text
        depths.append(depth)

except:
    pass

try:
    writer = csv.writer(dive_log)
    writer.writerow( ("Locations and depths") )
    writer.writerow( ("Sourced from:", str(url_page)) )
    writer.writerow( ("Location", "Depth") )
    for i in range(len(locations)):
        try:
            writer.writerow( (locations[i], depths[i]) )

        except:
            pass

finally:
    dive_log.close()

print open("divelog.csv", "rt").read()
print "\n\n"
print locations

2 个答案:

答案 0 :(得分:0)

编写时,您需要在循环中编码为 utf-8

for i in range(len(locations)):
        writer.writerow((locations[i].encode("utf-8"), depths[i].encode("utf-8")) )

答案 1 :(得分:-1)

像@yedpodtriztko一样注意到。您可以使用以下内容保留无法解码的字符:

而不是:

function addCounties(PolygonArr: GMSPolygon)->[county] {

    var counties: [county] = []
    for Index in 0...PolygonArr.count-1
    {
        let CoordinateString:String = PolygonArr[Index]
        let CoordinateArray = CoordinateString.componentsSeparatedByString(",")
        let MomentaryLatitude = (CoordinateArray[1] as NSString).doubleValue
        let MomentaryLongitude = (CoordinateArray[0] as NSString).doubleValue
        let countPolPlot = county(lat:MomentaryLatitude,long:MomentaryLongitude)
        counties.append(countPolPlot)
    }
    return counties
}

你可以用这个:

let newCounties = addCountries(PolygonArr)
counties.append(contentsOf: newCounties) // Swift 3
counties.appendContentsOf(newCounties) // Swift 2.x