import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.tree import export_graphviz
import graphviz
import pydotplus
import io
from scipy import misc
%matplotlib inline
data = pd.read_csv(r'''C:\Users\Pwego\Desktop\spotifyclassification2\data.csv''')
train, test = train_test_split(data, test_size = 0.15)
print("Training size: {}; Test size: {};".format(len(train), len(test)))
pos_tempo = data[data['target'] == 1]['tempo']
neg_tempo = data[data['target'] == 0]['tempo']
pos_danceability = data[data['target'] == 1]['danceability']
neg_danceability = data[data['target'] == 0]['danceability']
pos_duration = data[data['target'] == 1]['duration_ms']
neg_duration = data[data['target'] == 0]['duration_ms']
pos_energy = data[data['target'] == 1]['energy']
neg_energy = data[data['target'] == 0]['energy']
pos_instrumentalness = data[data['target'] == 1]['instrumentalness']
neg_instrumentalness = data[data['target'] == 0]['instrumentalness']
pos_key = data[data['target'] == 1]['key']
neg_key = data[data['target'] == 0]['key']
pos_liveness = data[data['target'] == 1]['liveness']
neg_liveness = data[data['target'] == 0]['liveness']
pos_loudness = data[data['target'] == 1]['loudness']
neg_loudness = data[data['target'] == 0]['loudness']
pos_mode = data[data['target'] == 1]['mode']
neg_mode = data[data['target'] == 0]['mode']
pos_speechiness = data[data['target'] == 1]['speechiness']
neg_speechiness = data[data['target'] == 0]['speechiness']
pos_time_signature = data[data['target'] == 1]['time_signature']
neg_time_signature = data[data['target'] == 0]['time_signature']
pos_valence = data[data['target'] == 1]['valence']
neg_valence = data[data['target'] == 0]['valence']
fig = plt.figure(figsize =(12, 8))
plt.title("Song Tempo Like / Dislike Distribution")
pos_tempo.hist(alpha = 0.7, bins = 30, label='positive', color ="green")
neg_tempo.hist(alpha = 0.7, bins = 30, label='negative', color ='red')
plt.legend(loc = "upper right")
fig2 = plt.figure(figsize=(15,15))
#Danceabiliy
ax3 = fig2.add_subplot(331)
ax3.set_xlabel('dancebility')
ax3.set_ylabel('count')
ax3.set_title("Song Dancebility Like Distribution")
pos_danceability.hist(alpha=0.5, bins=30)
neg_danceability.hist(alpha=0.5, bins=30)
ax4 = fig2.add_subplot(331)
ax4.set_xlabel('duration')
ax4.set_ylabel('count')
ax4.set_title("Song Duration Like Distribution")
pos_duration.hist(alpha=0.5, bins=30)
neg_duration.hist(alpha=0.5, bins=30)
ax5 = fig2.add_subplot(332)
ax5.set_xlabel('energy')
ax5.set_ylabel('count')
ax5.set_title("Song Energy Like Distribution")
pos_energy.hist(alpha=0.5, bins=30)
neg_energy.hist(alpha=0.5, bins=30)
ax6 = fig2.add_subplot(333)
ax6.set_xlabel('instrumentalness')
ax6.set_ylabel('count')
ax6.set_title("Song Instrumentalness Like Distribution")
pos_instrumentalness.hist(alpha=0.5, bins=30)
neg_instrumentalness.hist(alpha=0.5, bins=30)
ax7 = fig2.add_subplot(334)
ax7.set_xlabel('key')
ax7.set_ylabel('count')
ax7.set_title("Song Keys Like Distribution")
pos_key.hist(alpha=0.5, bins=30)
neg_key.hist(alpha=0.5, bins=30)
ax8= fig2.add_subplot(335)
ax8.set_xlabel('liveness')
ax8.set_ylabel('count')
ax8.set_title("Song Liveness Like Distribution")
pos_liveness.hist(alpha=0.5, bins=30)
neg_liveness.hist(alpha=0.5, bins=30)
ax9 = fig2.add_subplot(336)
ax9.set_xlabel('loudness')
ax9.set_ylabel('count')
ax9.set_title("Song Loudness Like Distribution")
pos_loudness.hist(alpha=0.5, bins=30)
neg_loudness.hist(alpha=0.5, bins=30)
ax10 = fig2.add_subplot(337)
ax10.set_xlabel('mode')
ax10.set_ylabel('count')
ax10.set_title("Song Mode Like Distribution")
pos_mode.hist(alpha=0.5, bins=30)
neg_mode.hist(alpha=0.5, bins=30)
ax11 = fig2.add_subplot(338)
ax11.set_xlabel('speechiness')
ax11.set_ylabel('count')
ax11.set_title("Song Speechiness Like Distribution")
pos_speechiness.hist(alpha=0.5, bins=30)
neg_speechiness.hist(alpha=0.5, bins=30)
ax12 = fig2.add_subplot(339)
ax12.set_xlabel('time_signature')
ax12.set_ylabel('count')
ax12.set_title("Song Time Signature over Distribution")
pos_time_signature.hist(alpha=0.5, bins=30)
neg_time_signature.hist(alpha=0.5, bins=30)
ax13 = fig2.add_subplot(339)
ax13.set_xlabel('valence')
ax13.set_ylabel('count')
ax13.set_title("Song Valence over Distribution")
pos_valence.hist(alpha=0.5, bins=30)
neg_valence.hist(alpha=0.5, bins=30)
c = DecisionTreeClassifier(min_samples_split=100)
features = ["danceability","loudness","valence","energy","instrumentalness","acousticness","k"]
X_train = train[features]
y_train = train['target']
X_test = test[features]
y_test = test['target']
def show_tree(tree, features, path):
f = io.StringIO()
export_graphviz(tree, out_file=f, feature_names=features)
pydotplus.graph_from_dot_data(f.getvalue()).write_png(path)
img = scipy.misc.inread(path)
plt.rcParams["figure.figsize"] = (20, 20)
plt.imgshow(img)
show_tree(dt, features, 'tree1.png')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-30-72a100e0eeec> in <module>()
----> 1 show_tree(dt, features, 'tree1.png')
<ipython-input-21-9c398f00bf98> in show_tree(tree, features, path)
3 export_graphviz(tree, out_file=f, feature_names=features)
4 pydotplus.graph_from_dot_data(f.getvalue()).write_png(path)
----> 5 img = scipy.misc.inread(path)
6 plt.rcParams["figure.figsize"] = (20, 20)
7 plt.imgshow(img)
AttributeError: module 'scipy.misc' has no attribute 'inread'
因此,我尝试为Spotify数据集创建此决策树,并尝试了多种安装这些库的方法。
我不断收到此错误,有人可以帮我吗?
我正在使用此python教程
https://www.youtube.com/watch?v=XDbj6PxaSf0&pbjreload=10
如果有人有更多机器学习资源,请给我发送邮件!
答案 0 :(得分:0)
我认为这是一个简单的错别字。 Scipy.misc没有名为inread
的函数。该函数称为imread
。将scipy.misc.inread(path)
替换为scipy.misc.imread(path)
答案 1 :(得分:-1)
在[(Title,Person)]
函数中
替换
img = scipy.misc.inread(path)
使用
img = scipy.misc.imread(path)
正确的拼写:scipy.misc.imread