我认为机器学习的输入不能是.fit中的文本,那么我应该使用什么或者应该如何更改代码?这ai应该用年月日和天来训练,并且应该作为输出作物(第一,第二,第三)(我不知道要添加更多的详细信息,但我需要发布更多的详细信息,所以我只是在上面输入随机的东西。时刻。 代码:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
event_data = pd.read_excel("Jacob's Farming Contest.xlsx")
event_data.fillna(0, inplace=True)
X = event_data.drop(columns=['First Crop', 'Second Crop', 'Third Crop'])
y = event_data.drop(columns=['Year', 'Month', 'Day'])
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
# predictions = model.predict(X_test)
错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-44-3fbcae548642> in <module>
11
12 model = DecisionTreeClassifier()
---> 13 model.fit(X_train, y_train)
14 # predictions = model.predict(X_test)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
888 """
889
--> 890 super().fit(
891 X, y,
892 sample_weight=sample_weight,
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
179
180 if is_classification:
--> 181 check_classification_targets(y)
182 y = np.copy(y)
183
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in check_classification_targets(y)
167 y : array-like
168 """
--> 169 y_type = type_of_target(y)
170 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
171 'multilabel-indicator', 'multilabel-sequences']:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in type_of_target(y)
248 raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")
249
--> 250 if is_multilabel(y):
251 return 'multilabel-indicator'
252
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in is_multilabel(y)
150 _is_integral_float(np.unique(y.data))))
151 else:
--> 152 labels = np.unique(y)
153
154 return len(labels) < 3 and (y.dtype.kind in 'biu' or # bool, int, uint
<__array_function__ internals> in unique(*args, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
261 ar = np.asanyarray(ar)
262 if axis is None:
--> 263 ret = _unique1d(ar, return_index, return_inverse, return_counts)
264 return _unpack_tuple(ret)
265
C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
309 aux = ar[perm]
310 else:
--> 311 ar.sort()
312 aux = ar
313 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'str' and 'int'
这是Jacob的农业竞赛。xlsx:
Year Month Day First Crop Second Crop Third Crop
0 101 1 1 0 0 0
1 101 1 2 Cactus Carrot Cocoa Beans
2 101 1 3 0 0 0
3 101 1 4 0 0 0
4 101 1 5 Mushroom Sugar CaNether Wart Wheat
... ... ... ... ... ... ...
367 101 12 27 Cactus Carrot Mushroom
368 101 12 28 0 0 0
369 101 12 29 0 0 0
370 101 12 30 Cocoa Beans Pumpkin Sugar CaNether Wart
371 101 12 31 0 0 0