---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
C:\Users\Deepayan\Desktop\Final_Dissertation\beauty-1.py in <module>()
71 print table
72
---> 73 table.to_csv('fout2', mode='a', header=False)
74
75 fout2.close()
C:\Users\Deepayan\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kwargs)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Users\Deepayan\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, quoting, quotechar, line_terminator, chunksize, tupleize_cols, date_format, doublequote, escapechar, **kwds)
1152 doublequote=doublequote,
1153 escapechar=escapechar)
-> 1154 formatter.save()
1155
1156 if path_or_buf is None:
C:\Users\Deepayan\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in save(self)
1398
1399 else:
-> 1400 self._save()
1401
1402 finally:
C:\Users\Deepayan\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in _save(self)
1498 break
1499
-> 1500 self._save_chunk(start_i, end_i)
1501
1502 def _save_chunk(self, start_i, end_i):
C:\Users\Deepayan\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\core\format.pyc in _save_chunk(self, start_i, end_i)
1520 date_format=self.date_format)
1521
-> 1522 lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
1523
1524 # from collections import namedtuple
C:\Users\Deepayan\AppData\Local\Enthought\Canopy\User\lib\site-packages\pandas\lib.pyd in pandas.lib.write_csv_rows (pandas\lib.c:16935)()
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 19: ordinal not in range(128)
我正在尝试将Python代码的输出导出到文件中,由于某种原因,我继续面对“'ascii'编解码器无法编码问题”。我尝试了很多改动,但出于某种原因,它只是不起作用。
from __future__ import unicode_literals
from bs4 import BeautifulSoup
import urllib2
import csv
import os
import re
import requests
import pandas as pd
import urlparse
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup
from BeautifulSoup import BeautifulStoneSoup
import urllib
import urlparse
import pdb
import codecs
from BeautifulSoup import UnicodeDammit
fout2 = codecs.open('data.csv','r','UTF-8')
for y in range(0,100,10):
url = "http://www.indeed.co.uk/jobs?q=%22data+science%22"
params = {'start':y}
url_parts = list(urlparse.urlparse(url))
query = dict(urlparse.parse_qsl(url_parts[4]))
query.update(params)
url_parts[4] = urllib.urlencode(query)
y = urlparse.urlunparse(url_parts)
url = urllib2.urlopen(y)
soup1 = BeautifulSoup(url)
job_id = []
job_title = []
company = []
location = []
for title in soup1.findAll('a'):
japan = (title.get('href'))
if "/rc/clk?" in japan:
job_id.append(title.get('href'))
for title in soup1.findAll('a',{'itemprop' : 'title'}):
job_title.append(title.text.strip())
for title in soup1.findAll('span',{'itemprop' : 'name'}):
company.append(title.text.strip())
for title in soup1.findAll('span',{'itemprop' : 'addressLocality'}):
location.append(title.text.strip())
df = pd.DataFrame(job_id, columns=['job_id'])
df1 = pd.DataFrame(job_title, columns=['job_title'])
df2 = pd.DataFrame(company, columns=['company'])
df3 = pd.DataFrame(location, columns=['location'])
table = pd.concat([df, df1, df2,df3], axis=1).reset_index(drop=True)
print table
table.to_csv('fout2', mode='a', header=False)
fout2.close()
答案 0 :(得分:0)
Unicode字符2013是一个短划线。错误消息是在输入中的某处有一个em dash,它无法写入。在调用to_csv()之前,您需要编码或删除有问题的字符。