目前正在编写一些机器学习,我正在使用sklearn,numpy和scipy。我能够解析我的数据库并准备数据集。但是,当进行预测和输出结果时,我收到以下错误:
类型错误:'元组'对象不可调用
我的代码如下:
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn import tree
from sklearn import gaussian_process
from sklearn import neural_network
from sklearn import preprocessing
from time import time
import numpy as np
t0 = time()
classifier = int(input(
"""
Enter number corresponding to classifier you would like to use:
1. Support Vector Machines
2. Gaussian Naive Bayes
3. Multinomial Naive Bayes
4. Stochastic Gradient Descent with Logistic Regression loss function
"""))
dataset = int(input(
"""
Enter number corresponding to data set you would like to use:
1. First half and second half
2. Alternating
3. Alternating with attack type
4. Alternating with attack type and target type
"""))
# Assign appropriate datasets
input_files = ['half', 'alternating', 'alternating-with-attacktype', 'alternating-all']
filename = input_files[dataset-1]
# Instantiate arrays for inputs and outputs
train_inputs = []
train_outputs = np.array([])
test_inputs = []
test_expected_outputs = np.array([])
test_actual_outputs = []
X = np.array([])
# Read training file
print ('Reading training file')
t = time()
for line in open('datasets/%s-train.txt' % filename):
inputs = line.split(' ')
outputs = inputs.pop()
train_outputs = np.append(train_outputs, int(outputs))
print ('Done. Time taken: %f secs.\n' % (time()-t))
# for line in open('datasets/%s-train.txt' % filename):
# inputs = line.split(' ')
# output = inputs.pop()
# train_outputs = np.append(train_outputs, int(output))
# print ('Done. Time taken: %f secs.\n' % (time()-t))
print ('Create classifier')
t = time()
clf = None
# No preprocessing for SVMs
# Otherwise, scale inputs (preprocessing to make more amenable for machine learning)
if classifier == 1: # Support vector machines
clf = SVC()
elif classifier == 2: # Gaussian Naive Bayes
train_inputs = preprocessing.scale(np.array(train_inputs))
clf = GaussianNB()
elif classifier == 3: # Multinomial Naive Bayes
clf = MultinomialNB()
elif classifier == 4: # Stochastic gradient descent with logistic regression
train_inputs = preprocessing.scale(np.array(train_inputs))
clf = SGDClassifier(loss='log')
print ('Done. Time taken: %f secs.\n' % (time()-t))
print ('Fit classifier')
t = time()
X.shape(1 -1)
clf.fit(train_inputs, train_outputs)
print ('Done. Time taken: %f secs.\n' % (time()-t))
# Read test file and scale inputs
print ('Reading test file')
t = time()
for line in open('datasets/%s-test.txt' % filename):
inputs = line.split(' ')
output = inputs.pop()
test_expected_outputs = np.append(test_expected_outputs, int(output))
test_inputs.append(map(float, inputs))
# Same here: no preprocessing for SVMs
# Otherwise, scale inputs (preprocessing to make more amenable for machine learning)
if classifier != 1:
test_inputs = preprocessing.scale(np.array(test_inputs))
print ('Done. Time taken: %f secs.\n' % (time()-t))
print ('Predict for test file')
t = time()
test_actual_outputs = [clf.predict(i)[0] for i in test_inputs]
print ('Done. Time taken: %f secs.\n' % (time()-t))
print ('Compare outputs')
t = time()
right = sum(test_actual_outputs == test_expected_outputs)
wrong = len(test_actual_outputs) - right
print ('Done. Time taken: %f secs.\n' % (time()-t))
print ('Number right: %d\nNumber wrong: %d' % (right, wrong))
print ('Prediction rate: %.2f%%' % (100.0 * right/len(test_actual_outputs)))
print ('Total time taken: %f secs.\n' % (time()-t0))
我知道我需要添加array.reshape(-1 1)或array.reshape(1 -1),但不知道这会做什么。
任何有关如何解决这个问题的建议都会受到欢迎。
答案 0 :(得分:0)
此错误来自以下行:
X.shape(1 -1)
改为使用X.shape
。
以下是该问题的复制品:
X = pd.Series()
X.shape(1 -1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-105-1bb3332dc7d5> in <module>()
1 X = pd.Series()
----> 2 X.shape(1 -1)
TypeError: 'tuple' object is not callable