情绪分析 - 模式NLP

时间:2016-09-27 12:25:42

标签: matplotlib nlp nltk

尝试使用模式创建主题对象极性图。

from pattern.en import parse,sentiment
print sentiment('The movie attempts to be surreal by incorporating various time paradoxes')

(0.125, 0.75)

from pattern.en import sentiment
print sentiment("He is good.") 

(0.7, 0.6000000000000001)

from pattern.en import sentiment
print sentiment("The movie attempts to be surreal by incorporating various time paradoxes. He is good.") 

(0.31666666666666665, 0.7000000000000001)

据我所知,分析计算两个句子的极性并返回归一化值。它有可能计算得分并逐行返回,类似这样的

from pattern.en import sentiment
print sentiment("The movie attempts to be surreal by incorporating various time paradoxes. He is good.") 

(0.125, 0.75)
(0.7, 0.6000000000000001)

第二部分:我希望使用numpy和amtplotlib将这一系列的x1,y1值映射到散点图。有可能吗?

以您的代码为指导,我尝试通过添加模态值来改进现有代码。但我面对

修改1

for sentence in sentences:
        modality(sentence)
    #mind the difference for the last sentence, which contains two dots.         
    for sentence in complete_text.split("."):
        modality(sentence)
    b = np.array([ modality(sentence) for sentence in complete_text.split(".") ])
    print "Modality: ", b[:,0]

输出错误

print "Modality: ", b[:,0]
IndexError: too many indices for array

我正在尝试根据我在硬编码值时能够实现的模态范围来更改标记符号。尝试将您的方法扩展到许多句子的模态。

修改2

图表看起来不错,但其中一个重要特征并不存在。我需要点击标记点并想要返回点击所针对的特定句子,以便分析那些特定的选择句子。进入onclick(事件)以返回句子。

fig = plt.figure()
ax = fig.add_subplot(111)
def onclick(event):
    print('button=%d,' %(event.button))
cid = fig.canvas.mpl_connect('button_press_event', onclick)

不确定如何连接该特定标记的呼叫句子?这完成了我尝试极性分析的最后一个缺失部分。

编辑3

我非常注重使用红色表示负面情绪,使用绿色表示正面和标记表示四种类型的模态。我用以下函数修改了你的代码需要

print "polarities: ", a[:,0]
print "subjectivities: ", a[:,1]
print "modalities: ", a[:,2]
s = np.array(a[:,2])
r = np.array(a[:,1])
############ Plotting ############
def markers(s):
    if s > "0.5" and s< "1":
        return 'o'
    elif s > "0" or s < ".5":
        return 'x'
    elif s > "-.5" or s < "0":
        return 'v'
    else:
        return '^'

def colors(r):
    if r > "0" and r < "1":
        return "g"
    elif r < "0" or r > "-1":
        return "r"
    else:
        return "r"

fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(a[:,0], a[:,1], marker = markers(s), color= colors(r), s=100, picker=5)

但是图表会为所有变化返回红色的x标记。我不确定为什么?

编辑4:

ax=fig.add_subplot(111)
ax.scatter (p[(p>0.0)&(p<=1)&(m>0.5)&(m<=1)], s[(p>0.0)&(p<=1)&(m>0.5)&(m<=1)], marker = "o", color= 'g', s=100, picker=5)
ax.scatter (p[(p>0.0)&(p<=1)&(m>0.0)&(m<=0.5)], s[(p>0.0)&(p<=1)&(m>0.0)&(m<=0.5)], marker = "v", color= 'g', s=100, picker=5)


ax.scatter (p[(p>0.0)&(p<=1)&(m>-0.5)&(m<=0.0)], s[(p>0.0)&(p<=1)&(m>-0.5)&(m<=0.0)], marker = "s", color= 'g', s=100, picker=5)
ax.scatter (p[(p>0.0)&(p<=1)&(m>=-1.0)&(m<=-0.5)], s[(p>0.0)&(p<=1)&(m>=-1.0)&(m<=-0.5)], marker = "x", color= 'g', s=100, picker=5)
ax.scatter (p[(p>=-1.0)&(p<=0)&(m>0.5)&(m<=1)], s[(p>=-1.0)&(p<=0)&(m>0.5)&(m<=1)], marker = "o", color= 'r', s=100, picker=5)
ax.scatter (p[(p>=-1.0)&(p<=0)&(m>0.0)&(m<=0.5)], s[(p>=-1.0)&(p<=0)&(m>0.0)&(m<=0.5)], marker = "v", color= 'r', s=100, picker=5)
ax.scatter (p[(p>=-1.0)&(p<=0)&(m>-0.5)&(m<=0.0)], s[(p>=-1.0)&(p<=0)&(m>-0.5)&(m<=0.0)], marker = "s", color= 'r', s=100, picker=5)
ax.scatter (p[(p>=-1.0)&(p<=0)&(m>=-1.0)&(m<=-0.5)], s[(p>=-1.0)&(p<=0)&(m>=-1.0)&(m<=-0.5)], marker = "x", color= 'r', s=100, picker=5)

ax.set_xlabel("polarity")
ax.set_ylabel("subjectivity")
def onpick(event):
    index = event.ind
    for i in index:
        print i, sentences[i]
cid = fig.canvas.mpl_connect('pick_event', onpick)
plt.show()

不确定为什么def on pick不给出相关的句子但总是第一句话。

3 个答案:

答案 0 :(得分:3)

这是对最初问题及其Edit2的回答。有关Edit4的答案,请参阅底部。

我正在添加另一个答案来解决Edit2中提出的问题。 你没有说,你的意思是&#34;返回句子&#34;,所以我猜你想把它打印到控制台。这是执行该操作的代码

from pattern.en import sentiment, modality

sentences0 = ["In fact, I'm not convinced that blue is a color.", 
             "The car is blue.",
             "Precisely speaking, no random opinion is allowed.",
             "Democracy is dead. Long live the king."]

complete_text = " ".join(sentences0)
sentences = complete_text.split(".")[:-1]

import numpy as np
import matplotlib.pyplot as plt

a = np.array([ sentiment(sentence) for sentence in sentences ])
b = np.array([ modality(sentence) for sentence in sentences  ])

a = np.append(a, np.array([b]).T, axis=1)


print "polarities: ", a[:,0]
print "subjectivities: ", a[:,1]
print "modalities: ", a[:,2]


############ Plotting ############
def colors(x):
    return [(1-xi,0., xi) for xi in x]

fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(a[:,0], a[:,1], marker="s", color=colors(a[:,2]), s=100, picker=5)
ax.set_xlabel("polarity")
ax.set_ylabel("subjectivity")
def onpick(event):
    index = event.ind
    for i in index:
        print i, sentences[i]

cid = fig.canvas.mpl_connect('pick_event', onpick)
plt.show()

<强> Edit4

问题是返回的indizes是条件数组的那些,而sentences没有条件。 这是一个应该有希望做你想要的程序。

import numpy as np
import matplotlib.pyplot as plt

sentences = ["Sentence0", "Sentence1", "Sentence2", "Sentence3", "Sentence4", "Sentence5"]
p = np.array( [ 0. ,  0.2 ,  -0.3 ,  0.2, 0., 0.2] )
s = np.array( [ 0.1,  0.,   0.,   0.3 , 0.1, 0.] )
m = np.array( [ 1.,   -0.25,  1. ,  -0.6, 0.2,-0.25   ] )


colors = np.array([(0.8*(1-x), 0.7*x, 0) for x in np.ceil(p)])

cond = [(m>0.5)&(m<=1), (m>0.0)&(m<=0.5), (m>-0.5)&(m<=0.0), (m>=-1.0)&(m<=-0.5) ]
markers = ["o", "v", "s", "x"]


fig=plt.figure()
ax=fig.add_subplot(111)

sc=[]
for i in range(len(cond)):
    sc0 = ax.scatter(p[cond[i]], s[cond[i]], marker = markers[i], color= colors[cond[i]], s=100, picker=5)
    sc.append(sc0)

ax.set_xlabel("polarity")
ax.set_ylabel("subjectivity")

def onpick(event):
    index = event.ind
    artist = event.artist
    print len(index)
    for i in index:
        try:
            which = sc.index(artist)
            print i, sentences[int(np.arange(len(p))[cond[which]][i])]
        except:
            #raise
            print "no sentence found"

cid = fig.canvas.mpl_connect('pick_event', onpick)

plt.show()

答案 1 :(得分:2)

这是对最初问题的回答。

在这种情况下,您的线路是什么定义的? 如果我们可以假设点分隔句子,那么我们可以使用它将文本text.split(".")拆分成一个列表。然后可以通过

计算每个项目的情绪值
for sentence in complete_text.split("."):
    print sentiment(sentence)

请参阅此代码以获取工作示例以及绘图将如何工作。

from pattern.en import parse,sentiment

sentences = ["In fact, I'm not convinced that blue is a color.", 
             "The car is blue.",
             "Precisely speaking, no random opinion is allowed.",
             "Democracy is dead. Long live the king."]

complete_text = " ".join(sentences)


for sentence in sentences:
    print sentiment(sentence)

#mind the difference for the last sentence, which contains two dots.         
for sentence in complete_text.split("."):
    print sentiment(sentence)


import numpy as np
import matplotlib.pyplot as plt

a = np.array([ sentiment(sentence) for sentence in complete_text.split(".") ])

print "polarities: ", a[:,0]
print "subjectivities: ", a[:,1]

############ Plotting ############
fig=plt.figure()
ax=fig.add_subplot(111)
ax.plot(a[:,0], a[:,1], marker="s", linestyle="")
ax.set_xlabel("polarity")
ax.set_ylabel("subjectivity")
plt.show()

答案 2 :(得分:0)

    import numpy as np
    import matplotlib.pyplot as plt
    from pattern.en import sentiment,modality
    from matplotlib.pyplot import figure, show
    sentences = ["In fact, I'm not convinced that blue is a color.", 
                 "The car is blue.",
                 "Precisely speaking, no random opinion is allowed.",
                 "Democracy is dead. Long live the king."]
    complete_text = " ".join(sentences)
    for sentence in sentences:
        sentiment(sentence)    
    a = np.array([ sentiment(sentence) for sentence in complete_text.split(".") ])
    for sentence in sentences:
        modality(sentence)
    b = np.array([ modality(sentence) for sentence in complete_text.split(".") ])
    print "polarities: ", a[:,0]
    print "subjectivities: ", a[:,1]
    print "Modality: ", b
    print
    (x1) = a[:,0]
    (y1) = a[:,1]
    (z1) = b
    x = np.array([x1])
    y = np.array([y1])
    z = np.array([z1])
    i_opt1 = np.where((x >= 0) & (0.5 < z) & (z <= 1))
    i_opt2 = np.where((x >= 0) & (0 < z) & (z <= 0.5))  
    i_opt3 = np.where((x >= 0) & (-0.5 < z) & (z <= 0)) 
    i_opt4 = np.where((x >= 0) & (-1 < z) & (z <= -0.5))  
    i_opt5 = np.where((x < 0) &(0.5 < z) & (z <= 1))  
    i_opt6 = np.where((x < 0) &(0 < z) & (z <= 0.5))  
    i_opt7 = np.where((x < 0) &(-0.5 < z) & (z <= 0))  
    i_opt8 = np.where((x < 0) &(-1 < z) & (z <= -0.5))
    plt.scatter(x[i_opt1], y[i_opt1], label='Indicative', color='g', s=25, marker="o")
    plt.scatter(x[i_opt2], y[i_opt2], label='Imperative', color='g', s=25, marker="x")
    plt.scatter(x[i_opt3], y[i_opt3], label='Conditional', color='g', s=25, marker="^")
    plt.scatter(x[i_opt4], y[i_opt4], label='Subjunctive', color='g', s=25, marker="v")
    plt.scatter(x[i_opt5], y[i_opt5], color='r', s=25, marker="o")
    plt.scatter(x[i_opt6], y[i_opt6], color='r', s=25, marker="x")
    plt.scatter(x[i_opt7], y[i_opt7], color='r', s=25, marker="^")
    plt.scatter(x[i_opt8], y[i_opt8], color='r', s=25, marker="v")
    plt.xlabel('Sentiment polarity: negative -> positive')
    plt.ylabel('Subjectivity: objective -> subjective')
    plt.show()