修改
代码如下:
import sys
import pymysql
import pandas as pd
import numpy as np
conn = pymysql.connect(host='localhost', user='root', password=secret,
db='first_day', charset='utf8')
curs = conn.cursor(pymysql.cursors.DictCursor)
sql = "select * from first_day_datas"
curs.execute(sql)
rows = curs.fetchall()
df = pd.DataFrame(rows)
df = df[pd.notnull(df['longitude'])]
df.registerdate= df.registerdate.astype(str) # 칼럼 속성 바꾸기
df2 = pd.to_datetime(df['registerdate'])
df2 = df2.dt.strftime('%Y%m') # 2016-10-10 이런걸 20161010로 바꿔줌
df2_df = df2.to_frame() # 시리즈를 데이터프레임형식으로 변환
df2_df.index.names = ['ID_']# id 칼럼 만들기
df.index.names = ['ID_']
df = df.reset_index()# id 값넣기
df2_df = df2.reset_index()
df3 = df.merge(df2_df , on = 'ID_')
df3.registerdate_y = df3.registerdate_y.astype(int) # 칼럼 속성 바꾸기
df4 = df3[(df3['registerdate_y'] >= 201402 ) & (df3['registerdate_y'] < 201406 )] # 칼럼에 조건걸어 빼기
df5 = df4[df4['address'].str.contains('한남동')]
df6 = df5['blogtext'].astype(str).replace('\n', '') #\n을 바꿈
df7 = df6[(df6['blogtext'] != 'None' )] # 칼럼에 조건걸어 빼기
df7.to_csv(r'E:\내논문자료\wordcloud\test1\1402_06.csv')
with open(r'E:\내논문자료\wordcloud\test1\1402_06.txt', 'w', encoding='utf-8') as f:
for row in map(str, df7['blogtext']):
f.write(row + "\n")
但是当我在df6
上操作时,我收到错误
Traceback (most recent call last):
File "pandas\index.pyx", line 161, in pandas.index.IndexEngine.get_loc (pandas\index.c:4289)
File "pandas\src\hashtable_class_helper.pxi", line 404, in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:8534)
TypeError: an integer is required
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:/빅데이터 캠퍼스/untitled1/handling data.py", line 101, in <module>
df7 = df6[(df6['blogtext'] != 'None' )] # 칼럼에 조건걸어 빼기
File "C:\Python34\lib\site-packages\pandas\core\series.py", line 601, in __getitem__
result = self.index.get_value(self, key)
File "C:\Python34\lib\site-packages\pandas\indexes\base.py", line 2169, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas\index.pyx", line 105, in pandas.index.IndexEngine.get_value (pandas\index.c:3567)
File "pandas\index.pyx", line 113, in pandas.index.IndexEngine.get_value (pandas\index.c:3250)
File "pandas\index.pyx", line 163, in pandas.index.IndexEngine.get_loc (pandas\index.c:4373)
KeyError: 'blogtext'
Process finished with exit code 1
我该如何解决?
答案 0 :(得分:3)
从代码中删除.str
。 Python文档显示,指示字符串变量(或文字)应该在那里。字面意思是“str”。