嗨,我的代码有问题,我必须用csv创建一个数据集,然后再输入ANN。
这是代码
#load files
datos1=pd.read_csv(r'C:\Users\rafa_\Desktop\MFPT_healthy.csv',usecols=[0,1])
datos2=pd.read_csv(r'C:\Users\rafa_\Desktop\MFPT_IRF.csv',usecols=[0,1])
datos3=pd.read_csv(r'C:\Users\rafa_\Desktop\MFPT_ORF.csv',usecols=[0,1])
#extraigo los valores de dataframe y creo una lista con ellos
a = datos1.values
b = datos2.values
c = datos3.values
def split(a, n):
"""
funcion que divide un vector ''a'' en ''n'' partes
"""
avg = len(a) / float(n)
out = []
last = 0.0
while last < len(a):
out.append(a[int(last):int(last + avg)])
last += avg
return out
def dataset(data1,data2,data3,n):
"""
funcion que crea el dataser a partir de los 3 vectores con datos,
los divide en vecores de igual tamaño
clase 0 es es doble de grande que las otras, por lo que va a generar el doble de datos
clase 0 = heathy
clase 1 = IRF
clase 2 = ORF
Luego guarda el dataset en formato .npz
"""
a=split(data1,n)
b=split(data2,n/2)
c=split(data3,n/2)
zeros=np.zeros(len(a))
ones=np.ones(len(b))
twos=np.ones(len(c))*2
y=np.concatenate((zeros, ones, twos), axis=0) #etiquetas
x=np.concatenate((a, b, c),axis=0) #datos
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.25, random_state = 0)
dataset=[xtrain, xtest, ytrain, ytest]
np.savez_compressed('dataset', dataset[0],dataset[1],dataset[2],dataset[3])
dataset(a,b,c,n)
npzfile = np.load('dataset.npz')
xtrain = npzfile['arr_0']
xtest = npzfile['arr_1']
ytrain = npzfile['arr_2']
ytest = npzfile['arr_3']
#onehot encoding
ytrain = np_utils.to_categorical(ytrain)
ytest = np_utils.to_categorical(ytest)
#normalizar data
sc = MinMaxScaler(feature_range=(-1,1))
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)
然后我的代码出现以下错误
ValueError Traceback (most recent call last)
<ipython-input-295-47870eaf4789> in <module>
10 #normalizar data
11 sc = MinMaxScaler(feature_range=(-1,1))
---> 12 xtrain = sc.fit_transform(xtrain)
13 xtest = sc.transform(xtest)
D:\Anaconda\envs\KerasGPU\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
460 if y is None:
461 # fit method of arity 1 (unsupervised transformation)
--> 462 return self.fit(X, **fit_params).transform(X)
463 else:
464 # fit method of arity 2 (supervised transformation)
D:\Anaconda\envs\KerasGPU\lib\site-packages\sklearn\preprocessing\data.py in fit(self, X, y)
321 # Reset internal state before fitting
322 self._reset()
--> 323 return self.partial_fit(X, y)
324
325 def partial_fit(self, X, y=None):
D:\Anaconda\envs\KerasGPU\lib\site-packages\sklearn\preprocessing\data.py in partial_fit(self, X, y)
349 X = check_array(X, copy=self.copy, warn_on_dtype=True,
350 estimator=self, dtype=FLOAT_DTYPES,
--> 351 force_all_finite="allow-nan")
352
353 data_min = np.nanmin(X, axis=0)
D:\Anaconda\envs\KerasGPU\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
520 try:
521 warnings.simplefilter('error', ComplexWarning)
--> 522 array = np.asarray(array, dtype=dtype, order=order)
523 except ComplexWarning:
524 raise ValueError("Complex data not supported\n"
D:\Anaconda\envs\KerasGPU\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
499
500 """
--> 501 return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
问题出在xtrain
和xtest
中,但我不知道问题出在这些数组的构造上还是我如何在函数fit_transform()
中使用它
所有3个数据都有2列,时间和加速度。
0级(健康)是来自健康的数据。
1级(IRF)是内圈失效的轴承。
2级(ORF)是外圈故障的轴承。
我希望有人能帮助我修复它,谢谢。
这是类别0(datos1)的数据样本。
datos1:
Time Acceleration
0 0.000000 0.831588
1 0.000205 -1.511027
2 0.000410 1.177196
3 0.000614 -0.890607
4 0.000819 -0.658249
5 0.001024 2.287067
6 0.001229 0.638904
7 0.001434 -1.051136
8 0.001638 0.215930
9 0.001843 0.408084
10 0.002048 -0.163162
11 0.002253 1.058347
12 0.002458 0.634708
13 0.002662 -1.790775
14 0.002867 -0.905238
15 0.003072 0.750530
16 0.003277 -0.356351
17 0.003482 0.105018
18 0.003686 -1.357193
19 0.003891 -0.770428
20 0.004096 0.913545
... ... ...
29285 5.997583 0.538809
29286 5.997788 -0.438271
29287 5.997993 0.648675
29288 5.998198 0.313010
29289 5.998403 -0.336502
29290 5.998607 -0.438529
29291 5.998812 -0.736255
29292 5.999017 -0.431761
29293 5.999222 -1.129413
29294 5.999427 0.108057
29295 5.999631 -0.291278
29296 5.999836 -0.786278
29297 rows × 2 columns