来自Creating a subset of words from a corpus in R,回答者可以轻松地将term-document matrix
轻松转换为文字云。
python库是否有类似的函数将原始文本文件或NLTK
语料库或Gensim
Mmcorpus带入文字云?
结果看起来有点像这样:
答案 0 :(得分:52)
答案 1 :(得分:13)
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)
def show_wordcloud(data, title = None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1 # chosen at random by flipping a coin; it was heads
).generate(str(data))
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
show_wordcloud(Samsung_Reviews_Negative['Reviews'])
show_wordcloud(Samsung_Reviews_positive['Reviews'])
答案 2 :(得分:10)
如果您需要这些单词云在网站或Web应用程序中显示它们,您可以将数据转换为json或csv格式并将其加载到JavaScript可视化库,例如d3。 Word Clouds on d3
如果没有,Marcin的回答是做你所描述的好方法。
答案 3 :(得分:8)
amueller的代码示例
在命令行/终端中:
sudo pip install wordcloud
然后运行python脚本:
## Simple WordCloud
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
text = 'all your base are belong to us all of your base base base'
def generate_wordcloud(text): # optionally add: stopwords=STOPWORDS and change the arg below
wordcloud = WordCloud(font_path='/Library/Fonts/Verdana.ttf',
relative_scaling = 1.0,
stopwords = {'to', 'of'} # set or space-separated string
).generate(text)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
generate_wordcloud(text)
答案 4 :(得分:2)
这是短代码
#make wordcoud
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)
def show_wordcloud(data, title = None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1 # chosen at random by flipping a coin; it was heads
).generate(str(data))
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
if __name__ == '__main__':
show_wordcloud(text_str)
答案 5 :(得分:0)
cv = CountVectorizer()
cvData = cv.fit_transform(DF["W"]).toarray()
cvDF = pd.DataFrame(data=cvData, columns=cv.get_feature_names())
cvDF["target"] = DF["T"]
def w_count(tar):
MO = cvDF[cvDF["target"] == tar].drop("target",axis=1)
x=[]
y=[]
for i in range(MO.shape[0]):
for j in cvDF.drop("target",axis=1):
if MO.iloc[i][j]>4:
x.append(j)
y.append(MO.iloc[i][j])
return x,y
for i in cvDF["target"]:
x,y = w_count(i)
plt.figure(figsize=(10, 6))
plt.title(i)
plt.xticks(rotation="vertical")
plt.bar(x,y)
plt.show()
for c in range(len(DF)):
w=[]
for i,j in zip(cvDF.T[c].index, cvDF.T[c].values):
a=[]
if j > 1:
a.append(i)
a.append(j)
w.append(a)
pd.DataFrame(w)
data = dict(w)
wc = WordCloud(width=800, height=400, max_words=200).generate_from_frequencies(data)
plt.figure(figsize=(10, 10))
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.title(DF['T'][c])
plt.show()