问题似乎正在成为目标。一旦我想到一个解决了,下一个就会出现。尝试分析从学术平台导出的csv文件,列出热门标题,关键字,作者等。最新的包版广告=>查看出现的问题标题,代码和结果
准备读取并检查的文件与要读取的文件中的术语相同,但是代码返回消息,提示无法识别EDAT轴
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.set_style("white")
publication_data = pd.read_csv("C:\\Users\\jcst\\Desktop\\Private\\Python data\\My_Collection_test2.csv")
publication_data.dropna(subset=['EDAT'], inplace=True)
publication_data["Year"] = (
publication_data["EDAT"].astype(str).str[0:4].astype(int)
)
plt.figure(figsize=(10, 10), dpi=600)
# Top 10 authors
plt.subplot(2, 2, 1)
authors_flat = [
author
for authors in list(publication_data["FAU"].dropna())
for author in authors
]
top10authors = pd.DataFrame.from_records(
Counter(authors_flat).most_common(10), columns=["Name", "Count"]
)
sns.barplot(x="Count", y="Name", data=top10authors, palette="RdBu_r")
plt.title("Top 10 Authors")
# Publications over Time
plt.subplot(2, 2, 2)
yearly = pd.DataFrame(publication_data["Year"].value_counts().reset_index())
yearly.columns = ["Year", "Count"]
sns.lineplot(x="Year", y="Count", data=yearly)
plt.title("Publications over Time")
plt.xlim([1986, 2020])
plt.subplot(2, 2, 3)
# TOP 10 Journals
top10journals = pd.DataFrame.from_records(
Counter(publication_data["TA"]).most_common(10),
columns=["Journal", "Count"],
)
sns.barplot(x="Count", y="Journal", data=top10journals, palette="RdBu_r")
plt.title("Top 10 Journals")
# Top associated keywords
plt.subplot(2, 2, 4)
flat_kw = [
_.lower()
for kws in list(publication_data["OT"].dropna())
for kw in kws
for _ in kw.split(" ")
]
top10kw = pd.DataFrame.from_records(
Counter(flat_kw).most_common(10), columns=["Keyword", "Count"]
)
sns.barplot(x="Count", y="Keyword", data=top10kw, palette="RdBu_r")
plt.title("Top 10 Associated Keywords")
plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0.3, wspace=0.3)
plt.show()
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm 2019.1.2\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2019.1.2\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/jcst/PycharmProjects/TextMining/Test2.py", line 23, in <module>
publication_data.dropna('EDAT', inplace=True)
File "C:\Users\jcst\PycharmProjects\TextMining\venv\lib\site-packages\pandas\core\frame.py", line 4572, in dropna
axis = self._get_axis_number(axis)
File "C:\Users\jcst\PycharmProjects\TextMining\venv\lib\site-packages\pandas\core\generic.py", line 361, in _get_axis_number
.format(axis, type(cls)))
ValueError: No axis named EDAT for object type <class 'type'>