Tensorflow LSTM用于时间序列数据分类

时间:2019-10-25 09:44:31

标签: csv tensorflow lstm

当前在准备数据集以训练模型方面遇到问题。

需要重塑数据集,但仍会遇到:

ValueError:无法将尺寸为225的序列复制到尺寸为3的数组轴上。

这是我在笔记本上的全部代码:

第一个区块:

import csv
import pandas as pd
import numpy as np
import numpy
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

DATADIR = r"E:\Data\csv"

CATEGORIES = ['1', '2', '3']

第二个区块:

training_data = []

def create_training_data():
    for category in CATEGORIES:  

        path = os.path.join(DATADIR,category)  # create path to categories
        class_num = CATEGORIES.index(category)  # get the classification 

        for csv in tqdm(os.listdir(path)):  # iterate over each image per category
            try:
                traindata = pd.read_csv(os.path.join(path,csv), header=None,names=['x', 'y', 'z'])
                training_data.append([traindata, class_num])  # add this to our training_data

            except Exception as e:  # in the interest in keeping the output clean...
                pass
            except OSError as e:
                print("OSError Bad csv most likely", e, os.path.join(path,csv))
            except Exception as e:
                print("general exception", e, os.path.join(path,csv))

create_training_data()

print(len(training_data))

第三段:

import random

random.shuffle(training_data)
for sample in training_data[:10]:
    print(sample[1])    

第四个区块:

X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)
X = np.array(X).reshape(1,3,1)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-8-42d61f854bf2> in <module>
      5     X.append(features)
      6     y.append(label)
----> 7 X = np.array(X).reshape(1,3,1)
      8 #X = numpy.reshape(X, (1, 1, 1))

ValueError: cannot copy sequence with size 225 to array axis with dimension 3

如果我不重塑形状,那么如果我打印出X,我的第4个块将看起来像这样:

X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)
print(X)

[[],           x      y     z
0     -1279 -12031   513
1     -5119 -16127   769
2      1025 -15871 -2559
3     -4863 -17407 -7167
4     -7167 -16639 -5887
...     ...    ...   ...
52196 -1535 -16127 -6399
52197 -4095 -15359 -3071
52198 -2303 -13823  2817
52199 -2815 -14079  -767
52200 -3071 -12799 -6655

[52201 rows x 3 columns]]

第五块:

import pickle

pickle_out = open("X.pickle","wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle","wb")
pickle.dump(y, pickle_out)
pickle_out.close()

第6块:

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM
import pickle

pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)



model = Sequential()
model.add(LSTM(64, input_shape=X.shape[1:], return_sequences=True))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(LSTM(64, return_sequences=True))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
          optimizer='adam',
          metrics=['accuracy'])

model.fit(X, y, batch_size=10, epochs=20, validation_split=0.1, verbose=1)

0 个答案:

没有答案