设置我的决策树python

时间:2018-07-25 19:59:59

标签: python pandas scipy decision-tree pydot

import pandas as pd
import numpy as np

from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.tree import export_graphviz

import graphviz
import pydotplus
import io
from scipy import misc

%matplotlib inline

data = pd.read_csv(r'''C:\Users\Pwego\Desktop\spotifyclassification2\data.csv''')

train, test = train_test_split(data, test_size = 0.15)

print("Training size: {}; Test size: {};".format(len(train), len(test)))

pos_tempo = data[data['target'] == 1]['tempo']
neg_tempo = data[data['target'] == 0]['tempo']

pos_danceability = data[data['target'] == 1]['danceability']
neg_danceability = data[data['target'] == 0]['danceability']

pos_duration = data[data['target'] == 1]['duration_ms']
neg_duration = data[data['target'] == 0]['duration_ms']

pos_energy = data[data['target'] == 1]['energy']
neg_energy = data[data['target'] == 0]['energy']

pos_instrumentalness = data[data['target'] == 1]['instrumentalness']
neg_instrumentalness = data[data['target'] == 0]['instrumentalness']

pos_key = data[data['target'] == 1]['key']
neg_key = data[data['target'] == 0]['key']

pos_liveness = data[data['target'] == 1]['liveness']
neg_liveness = data[data['target'] == 0]['liveness']

pos_loudness = data[data['target'] == 1]['loudness']
neg_loudness = data[data['target'] == 0]['loudness']

pos_mode = data[data['target'] == 1]['mode']
neg_mode = data[data['target'] == 0]['mode']

pos_speechiness = data[data['target'] == 1]['speechiness']
neg_speechiness = data[data['target'] == 0]['speechiness']

pos_time_signature = data[data['target'] == 1]['time_signature']
neg_time_signature = data[data['target'] == 0]['time_signature']

pos_valence = data[data['target'] == 1]['valence']
neg_valence = data[data['target'] == 0]['valence']








fig = plt.figure(figsize =(12, 8))
plt.title("Song Tempo Like / Dislike Distribution")
pos_tempo.hist(alpha = 0.7, bins = 30, label='positive', color ="green")
neg_tempo.hist(alpha = 0.7, bins = 30, label='negative', color ='red')
plt.legend(loc = "upper right")

fig2 = plt.figure(figsize=(15,15))

#Danceabiliy
ax3 = fig2.add_subplot(331)
ax3.set_xlabel('dancebility')
ax3.set_ylabel('count')
ax3.set_title("Song Dancebility Like Distribution")
pos_danceability.hist(alpha=0.5, bins=30)
neg_danceability.hist(alpha=0.5, bins=30)

ax4 = fig2.add_subplot(331)
ax4.set_xlabel('duration')
ax4.set_ylabel('count')
ax4.set_title("Song Duration Like Distribution")
pos_duration.hist(alpha=0.5, bins=30)
neg_duration.hist(alpha=0.5, bins=30)

ax5 = fig2.add_subplot(332)
ax5.set_xlabel('energy')
ax5.set_ylabel('count')
ax5.set_title("Song Energy Like Distribution")
pos_energy.hist(alpha=0.5, bins=30)
neg_energy.hist(alpha=0.5, bins=30)

ax6 = fig2.add_subplot(333)
ax6.set_xlabel('instrumentalness')
ax6.set_ylabel('count')
ax6.set_title("Song Instrumentalness Like Distribution")
pos_instrumentalness.hist(alpha=0.5, bins=30)
neg_instrumentalness.hist(alpha=0.5, bins=30)

ax7 = fig2.add_subplot(334)
ax7.set_xlabel('key')
ax7.set_ylabel('count')
ax7.set_title("Song Keys Like Distribution")
pos_key.hist(alpha=0.5, bins=30)
neg_key.hist(alpha=0.5, bins=30)

ax8= fig2.add_subplot(335)
ax8.set_xlabel('liveness')
ax8.set_ylabel('count')
ax8.set_title("Song Liveness Like Distribution")
pos_liveness.hist(alpha=0.5, bins=30)
neg_liveness.hist(alpha=0.5, bins=30)

ax9 = fig2.add_subplot(336)
ax9.set_xlabel('loudness')
ax9.set_ylabel('count')
ax9.set_title("Song Loudness Like Distribution")
pos_loudness.hist(alpha=0.5, bins=30)
neg_loudness.hist(alpha=0.5, bins=30)

ax10 = fig2.add_subplot(337)
ax10.set_xlabel('mode')
ax10.set_ylabel('count')
ax10.set_title("Song Mode Like Distribution")
pos_mode.hist(alpha=0.5, bins=30)
neg_mode.hist(alpha=0.5, bins=30)

ax11 = fig2.add_subplot(338)
ax11.set_xlabel('speechiness')
ax11.set_ylabel('count')
ax11.set_title("Song Speechiness Like Distribution")
pos_speechiness.hist(alpha=0.5, bins=30)
neg_speechiness.hist(alpha=0.5, bins=30)

ax12 = fig2.add_subplot(339)
ax12.set_xlabel('time_signature')
ax12.set_ylabel('count')
ax12.set_title("Song Time Signature over Distribution")
pos_time_signature.hist(alpha=0.5, bins=30)
neg_time_signature.hist(alpha=0.5, bins=30)

ax13 = fig2.add_subplot(339)
ax13.set_xlabel('valence')
ax13.set_ylabel('count')
ax13.set_title("Song Valence over Distribution")
pos_valence.hist(alpha=0.5, bins=30)
neg_valence.hist(alpha=0.5, bins=30)

c = DecisionTreeClassifier(min_samples_split=100)

features = ["danceability","loudness","valence","energy","instrumentalness","acousticness","k"]

X_train = train[features]
y_train = train['target']

X_test = test[features]
y_test = test['target']

def show_tree(tree, features, path):
    f = io.StringIO()
    export_graphviz(tree, out_file=f, feature_names=features)
    pydotplus.graph_from_dot_data(f.getvalue()).write_png(path)
    img = scipy.misc.inread(path)
    plt.rcParams["figure.figsize"] = (20, 20)
    plt.imgshow(img)

show_tree(dt, features, 'tree1.png')

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-30-72a100e0eeec> in <module>()
----> 1 show_tree(dt, features, 'tree1.png')

<ipython-input-21-9c398f00bf98> in show_tree(tree, features, path)
      3     export_graphviz(tree, out_file=f, feature_names=features)
      4     pydotplus.graph_from_dot_data(f.getvalue()).write_png(path)
----> 5     img = scipy.misc.inread(path)
      6     plt.rcParams["figure.figsize"] = (20, 20)
      7     plt.imgshow(img)

AttributeError: module 'scipy.misc' has no attribute 'inread'

因此,我尝试为Spotify数据集创建此决策树,并尝试了多种安装这些库的方法。

我不断收到此错误,有人可以帮我吗?

我正在使用此python教程

https://www.youtube.com/watch?v=XDbj6PxaSf0&pbjreload=10

如果有人有更多机器学习资源,请给我发送邮件!

2 个答案:

答案 0 :(得分:0)

我认为这是一个简单的错别字。 Scipy.misc没有名为inread的函数。该函数称为imread。将scipy.misc.inread(path)替换为scipy.misc.imread(path)

答案 1 :(得分:-1)

[(Title,Person)]函数中


替换

img = scipy.misc.inread(path)


使用

img = scipy.misc.imread(path)


正确的拼写:scipy.misc.imread