Question

我在下面的脚本中尝试将数据帧插入到sequential model中，该数据帧由以下结构组成：

m1 = 12次读数的平均值（在60个数据的窗口中）-示例（（2 + 5 + 7 ...）/ 12）

d1 = 12个数据的标准偏差

依此类推，直到形成 m5，d5（12x5 = 60）

0-20 =在接下来的30个读数中，值在0到20的范围内重复多少次（61,62,62行...）< / em>

依次类推，直到形成 20-40.40-60.60-80.80-100

关于我在方法中哪里出错的任何建议或提示？

目标是预测整个数据集的下60个读数

脚本：

# -*- coding: utf-8 -*- import operator import statistics import collections import pandas as pd import numpy as np import matplotlib.pyplot as plt import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers def histogramNeuronsInput(result): """ Generates histogram of input neurons """ fig, ax = plt.subplots(figsize=(10,6)) for i in range(len(result)): data = np.array(result[i]) x=np.arange(len(data)) + i*6 # draw averages ax.bar(x-0.2, data[:,0], color='C0', width=0.4) # draw std ax.bar(x+0.2, data[:,1], color='C1', width=0.4) # separation line if i < len(result) - 1: ax.axvline(4.75 + i*6, color='black') # turn off xticks ax.set_xticks([]) ax.legend(labels=['Average', 'Standard deviation']) leg = ax.get_legend() leg.legendHandles[0].set_color('C0') leg.legendHandles[1].set_color('C1') plt.title("Histogram: Average versus Standard Deviation") plt.ylabel('Consume') plt.xlabel('Number of elements (Every 5 is a new block)') return plt.show() def histogramNeuronsOutput(result): """ Generates histogram of output neurons """ colors = ['blue', 'green', 'yellow', 'orange', 'red'] labels = ['0-20', '20-40', '40-60', '60-80', '80-100'] fig, ax = plt.subplots(figsize=(10, 6)) for i, data in enumerate(result): x = np.arange(len(data)) + i*6 bars = ax.bar(x, data, color=colors, width=0.4) if i == 0: for bar, label in zip(bars, labels): bar.set_label(label) if i < len(result) - 1: # separation line after each part, but not after the last ax.axvline(4.75 + i*6, color='black') ax.set_xticks([]) ax.legend() ax.set_title("Histogram") ax.set_ylabel('Consume') ax.set_xlabel('Percent') plt.show() def standardDeviation(data): """ Calculates standard deviation """ return statistics.stdev(data) def average(data): """ Calculates average """ return statistics.mean(data) def captureOcurrences(elements, n): """ Capture an X number of elements within a list """ L = len(elements) return [elements[i: i+n] for i in range(0, L, n)] def neuronsInput(elements): """ Generates input neuron modeling (5 averages, 5 standard deviations - Between 12 occurrences in a window of 60 readings) """ result = [] temp = [] start = 0 limit = 60 size = int(len(elements)) TargetDivision = int(size / 30) repetitions = 0 five = 0 while repetitions < TargetDivision: temp = [] five += 1 ocurrences = captureOcurrences(elements[start: limit],12) for i in ocurrences: print("[INFO] 12 Ocurrences: {}".format(i)) print("[INFO] Average: {}".format(average(i))) m = average(i) print("[INFO] Standard Deviation: {}".format(standardDeviation(i))) sd = standardDeviation(i) print("Result: [{},{}]\n\n".format(m,sd)) temp.append([m,sd]) print("[INFO] Cycle Result {}: \n{}\n\n".format(repetitions+1,result)) print(temp) result.append(temp) repetitions += 1 limit += 10 start += 10 print("[INFO] Final result of phase Neurons Input: \n{}\n".format(result)) return result def neuronsOutput(elements): """ Generates output neuron modeling (Histogram of the next 30 data readings) """ result = [] start = 61 limit = 90 size = int(len(elements)) TargetDivision = int(size / 30) repetitions = 0 while repetitions < TargetDivision: print("[INFO] Reading [{}:{}]".format(start, limit)) print("[INFO] Elements:\n{}".format(elements[start: limit])) counter=collections.Counter(elements[start: limit]) consumption0_20 = 0 consumption20_40 = 0 consumption40_60 = 0 consumption60_80 = 0 consumption80_100 = 0 for key in counter: if key <= 20: consumption0_20 += int(counter[key]) elif key > 20 and key < 40: consumption20_40 += int(counter[key]) elif key > 40 and key < 60: consumption40_60 += int(counter[key]) elif key > 60 and key < 80: consumption60_80 += int(counter[key]) elif key > 80 and key < 100: consumption80_100 += int(counter[key]) print("[INFO] Histogram: 0-20 [{}], 20-40 [{}], 40-60 [{}], 60-80 [{}], 80-100 [{}]\n\n".format(consumption0_20,consumption20_40,consumption40_60,consumption60_80,consumption80_100)) result.append([consumption0_20,consumption20_40,consumption40_60,consumption60_80,consumption80_100]) repetitions += 1 limit += 10 start += 10 print("[INFO] Final result of phase Neurons Output: \n{}\n".format(result)) return result def binaryInput(data): """ I divided the values of each column by the highest occurrence in the column """ max_average = 0 max_deviation = 0 for j in range(len(data[0])): for i in range(len(data)): if data[i][j][0] > max_average: max_average = data[i][j][0] if data[i][j][1] > max_deviation: max_deviation = data[i][j][1] for p in range(len(data)): if max_average != 0: data[p][j][0] = round(data[p][j][0] / max_average, 3) if max_deviation != 0: data[p][j][1] = round(data[p][j][1] / max_deviation, 3) max_average = 0 max_deviation = 0 return data def binaryOutput(data): """ I divided the values of each column by the highest occurrence in the column """ max_consume = 0 for j in range(len(data[0])): for i in range(len(data)): if data[i][j] > max_consume: max_consume = data[i][j] for p in range(len(data)): if max_consume != 0: data[p][j] = round(data[p][j] / max_consume, 3) max_consume = 0 return data def conversionDataframe(dataNeuronInput,dataNeuronOutput): """ Converts data to a dataframe pandas """ ni = pd.DataFrame(data= dataNeuronInput) ni.columns = ['m1,d1', 'm2,d2', 'm3,d3', 'm4,d4', 'm5,d5'] no = pd.DataFrame(data= dataNeuronOutput) no.columns = ['0-20', '20-40', '40-60', '60-80', '80-100'] return pd.concat([ni, no], axis=1) def modeling(data): """ Generates the initial model for training the neural network """ readings = data.iloc[:, 1].values dataNeuronInput = neuronsInput(readings) #histogramNeuronsInput(dataNeuronInput) dataNeuronOutput = neuronsOutput(readings) #histogramNeuronsOutput(dataNeuronOutput) dataFrameNoBinary = conversionDataframe(dataNeuronInput, dataNeuronOutput) print("[INFO] Viewing non-binary data: \n{}\n\n".format(dataFrameNoBinary)) binaryNeuronInput = binaryInput(dataNeuronInput) print(binaryNeuronInput) binaryNeuronOutput = binaryOutput(dataNeuronOutput) print(binaryNeuronOutput) dataFrameBinary = conversionDataframe(binaryNeuronInput, binaryNeuronOutput) print("[INFO] Converting to binary data frame: \n{}\n\n".format(dataFrameBinary)) return dataFrameBinary def neural(): df = modeling(pd.read_csv('/home/user/Desktop/classification/data/minute.csv')) # Define Sequential model with 3 layers model = keras.Sequential( [ layers.Dense(2, activation="relu", name="layer1"), layers.Dense(3, activation="relu", name="layer2"), layers.Dense(4, name="layer3"), ] ) # Call model on a test input x = tf.ones((3, 3)) y = model(df) def main(): """ Initializes the script """ print("[INFO] Start *******************************************************************************") neural() print("[INFO] End *********************************************************************************") if __name__ == '__main__': main()

数据帧头和dtypes

m1,d1 m2,d2 m3,d3 m4,d4 m5,d5 0-20 20-40 40-60 60-80 80-100 0 [0.573, 0.699] [0.412, 0.224] [0.696, 0.512] [0.326, 0.314] [0.79, 0.685] 1.000 0.5 0 0 0 1 [0.456, 0.251] [0.629, 0.523] [0.344, 0.286] [0.8, 0.699] [0.721, 1.0] 1.000 0.5 0 0 0 2 [0.658, 0.531] [0.339, 0.282] [0.592, 0.614] [0.859, 1.0] [0.365, 0.283] 1.000 0.5 0 0 0 3 [0.396, 0.314] [0.29, 0.201] [1.0, 1.0] [0.34, 0.288] [0.886, 0.647] 1.000 0.5 0 0 0 4 [0.379, 0.315] [1.0, 1.0] [0.302, 0.248] [0.929, 0.655] [0.328, 0.308] 1.000 0.5 0 0 0 5 [1.0, 1.0] [0.274, 0.249] [0.679, 0.536] [0.52, 0.413] [0.382, 0.337] 1.000 0.5 0 0 0 6 [0.657, 0.898] [0.324, 0.244] [0.796, 0.565] [0.336, 0.337] [0.594, 0.48] 1.000 0.5 0 0 0 7 [0.397, 0.312] [0.731, 0.583] [0.358, 0.296] [0.586, 0.495] [0.263, 0.228] 1.000 0.5 0 0 0 8 [0.792, 0.589] [0.343, 0.296] [0.472, 0.434] [0.278, 0.227] [0.432, 0.316] 1.000 0.5 0 0 0 9 [0.395, 0.366] [0.301, 0.247] [0.449, 0.413] [0.438, 0.324] [0.798, 0.545] 1.000 0.5 0 0 0 10 [0.411, 0.347] [0.436, 0.417] [0.373, 0.276] [0.78, 0.565] [0.399, 0.274] 1.000 0.5 0 0 0 11 [0.521, 0.443] [0.221, 0.206] [0.658, 0.476] [0.512, 0.35] [0.364, 0.264] 1.000 0.5 0 0 0 12 [0.277, 0.271] [0.364, 0.264] [0.729, 0.477] [0.325, 0.265] [0.544, 0.447] 0.964 1.0 0 0 0 13 [0.409, 0.312] [0.693, 0.484] [0.333, 0.234] [0.551, 0.458] [0.297, 0.246] 0.964 1.0 0 0 0 14 [0.728, 0.487] [0.332, 0.233] [0.439, 0.403] [0.309, 0.245] [0.327, 0.231] 1.000 0.5 0 0 0 15 [0.368, 0.287] [0.279, 0.229] [0.46, 0.396] [0.309, 0.235] [0.854, 0.675] 1.000 0.5 0 0 0 16 [0.342, 0.283] [0.454, 0.397] [0.268, 0.203] [0.888, 0.685] [0.208, 0.117] 1.000 0.5 0 0 0 17 [0.492, 0.422] [0.238, 0.201] [0.676, 0.585] [0.329, 0.233] [0.24, 0.147] 1.000 0.5 0 0 0 18 [0.311, 0.272] [0.389, 0.468] [0.567, 0.433] [0.21, 0.145] [0.561, 0.435] 1.000 0.5 0 0 0 19 [0.456, 0.485] [0.518, 0.447] [0.223, 0.131] [0.552, 0.447] [0.254, 0.185] 1.000 0.5 0 0 0 20 [0.689, 0.597] [0.23, 0.126] [0.386, 0.392] [0.363, 0.2] [0.276, 0.211] 1.000 0.5 0 0 0 21 [0.202, 0.129] [0.228, 0.158] [0.503, 0.376] [0.258, 0.215] [0.813, 0.551] 1.000 0.5 0 0 0 22 [0.248, 0.173] [0.493, 0.379] [0.232, 0.187] [0.804, 0.571] [0.295, 0.223] 1.000 0.5 0 0 0 23 [0.484, 0.392] [0.216, 0.18] [0.583, 0.46] [0.46, 0.342] [0.281, 0.226] 1.000 0.5 0 0 0 24 [0.255, 0.215] [0.311, 0.288] [0.658, 0.444] [0.248, 0.227] [0.582, 0.523] 1.000 0.5 0 0 0 25 [0.357, 0.316] [0.619, 0.456] [0.258, 0.2] [0.562, 0.54] [0.29, 0.235] 1.000 0.5 0 0 0 26 [0.71, 0.49] [0.246, 0.199] [0.45, 0.473] [0.314, 0.237] [0.343, 0.235] 1.000 0.5 0 0 0 27 [0.284, 0.247] [0.256, 0.227] [0.484, 0.458] [0.308, 0.239] [0.842, 0.54] 1.000 0.5 0 0 0 28 [0.316, 0.272] [0.476, 0.46] [0.268, 0.206] [0.835, 0.56] [0.296, 0.168] 1.000 0.5 0 0 0 29 [0.529, 0.483] [0.225, 0.202] [0.602, 0.45] [0.433, 0.314] [0.291, 0.187] 1.000 0.5 0 0 0 30 [0.288, 0.273] [0.324, 0.247] [0.647, 0.45] [0.256, 0.182] [0.743, 0.572] 1.000 0.5 0 0 0 31 [0.37, 0.292] [0.619, 0.459] [0.268, 0.16] [0.589, 0.532] [0.569, 0.341] 1.000 0.5 0 0 0 32 [0.688, 0.479] [0.266, 0.158] [0.451, 0.47] [0.579, 0.344] [0.426, 0.234] 1.000 0.5 0 0 0 33 [0.259, 0.174] [0.237, 0.159] [0.696, 0.495] [0.421, 0.231] [0.855, 0.551] 1.000 0.5 0 0 0 34 [0.261, 0.193] [0.67, 0.502] [0.375, 0.197] [0.714, 0.511] [0.566, 0.401] 1.000 0.5 0 0 0 35 [0.635, 0.509] [0.394, 0.199] [0.571, 0.45] [0.515, 0.39] [0.421, 0.28] 0.964 1.0 0 0 0 36 [0.419, 0.231] [0.36, 0.232] [0.648, 0.499] [0.405, 0.28] [0.551, 0.452] 0.964 1.0 0 0 0 37 [0.409, 0.28] [0.659, 0.496] [0.357, 0.235] [0.531, 0.464] [0.308, 0.25] 0.964 1.0 0 0 0 38 [0.741, 0.5] [0.343, 0.238] [0.414, 0.41] [0.324, 0.247] [0.531, 0.438] 1.000 0.5 0 0 0 39 [0.393, 0.291] [0.249, 0.213] [0.468, 0.403] [0.336, 0.256] [1.0, 0.588] 1.000 0.5 0 0 0 40 [0.322, 0.27] [0.452, 0.407] [0.293, 0.22] [1.0, 0.612] [0.346, 0.247] 1.000 0.5 0 0 0 41 [0.5, 0.427] [0.243, 0.215] [0.739, 0.508] [0.479, 0.35] [0.278, 0.161] 1.000 0.5 0 0 0 42 [0.307, 0.285] [0.469, 0.399] [0.678, 0.449] [0.248, 0.159] [0.801, 0.708] 1.000 0.5 0 0 0 43 [0.51, 0.428] [0.655, 0.456] [0.257, 0.142] [0.804, 0.724] [0.23, 0.144] 1.000 0.5 0 0 0 44 [0.726, 0.47] [0.245, 0.14] [0.501, 0.618] [0.44, 0.299] [0.276, 0.17] 1.000 0.5 0 0 0 45 [0.288, 0.226] [0.215, 0.121] [0.68, 0.625] [0.249, 0.171] [0.701, 0.716] 1.000 0.5 0 0 0 46 [0.229, 0.148] [0.683, 0.624] [0.222, 0.146] [0.687, 0.732] [0.296, 0.184] 1.000 0.5 0 0 0 47 [0.646, 0.627] [0.204, 0.152] [0.542, 0.632] [0.36, 0.224] [0.291, 0.215] 1.000 0.5 0 0 0 m1,d1 object m2,d2 object m3,d3 object m4,d4 object m5,d5 object 0-20 float64 20-40 float64 40-60 int64 60-80 int64 80-100 int64

输入层： [m1，d1]，[m2，d2]，[m3，d3]，[m4，d4]，[m5， d5]

输出层： 0-20、20-40、40-60、60-80、80-100

错误：

Call initializer instance with the dtype argument instead of passing it to the constructor Traceback (most recent call last): File "script.py", line 233, in <module> main() File "script.py", line 229, in main neural() File "script.py", line 223, in neural y = layer(df) File "/home/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/base_layer.py", line 676, in __call__ self._maybe_build(inputs) File "/home/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1865, in _maybe_build self.input_spec, inputs, self.name) File "/home/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/input_spec.py", line 109, in assert_input_compatibility if x.shape.ndims is None: AttributeError: 'tuple' object has no attribute 'ndims'

注意：仅供参考， data.csv 由1440行组成，其中两列分别为日期和值，其中日期在{{1 }} （1天= 1440分钟），值在1 minute （CPU消耗）

CSV FIle

Answer 1

我指出了以下代码的关键问题：

您正在尝试将dataframe馈入神经网络。这不可能。 Keras架构的输入必须通过NumPy矩阵传递。
在sequential模型中，要素的形状必须在顺序模型的第一层中用参数input_shape表示。
模型的输出必须与顺序模型的最后一个密集层中的节点数相同。

假设我必须预测mi, ri时间的(i+1)'th，我已经更正了代码。给我上次mi ri的{{1}}值。代码如下。

尽管如此，这不能满足您的尝试。因此，我想给出一些提示。给定上次的值序列，您正在尝试预测将来的值序列。我认为，您应该尝试实现递归体系结构（我的最佳选择LSTM）。我希望这应该适合该问题。如有任何其他查询，请随时答复。谢谢。

Answer 2

您的功能神经元几乎没有问题。

顺序模型不会直接从pandas数据框中获取输入。因此，首先将您的数据转换为numpy数组。

未经训练就无法直接预测，因此必须首先训练模型。然后，您可以将模型用于预测。

以下是更正的功能：

def neural():

    df = modeling(pd.read_csv('data.csv'))
    
    # Convert Data into numpy arrays
    X = np.array(df['m1,d1'].values.tolist())
    X = np.append(X, np.array(df['m2,d2'].values.tolist()), axis = 1)
    X = np.append(X, np.array(df['m3,d3'].values.tolist()), axis = 1)
    X = np.append(X, np.array(df['m4,d4'].values.tolist()), axis = 1)
    X = np.append(X, np.array(df['m5,d5'].values.tolist()), axis = 1)
    print(X)
    
    Y = []
    Y.append(np.asarray(df['0-20'], dtype=np.float32))
    Y.append(np.asarray(df['20-40'], dtype=np.float32))
    Y.append(np.asarray(df['40-60'], dtype=np.float32))
    Y.append(np.asarray(df['60-80'], dtype=np.float32))
    Y = np.transpose(np.asarray(Y))
    print(Y)
    
    # Define Sequential model with 3 layers
    model = keras.Sequential(
        [
            layers.Dense(16, activation="relu", name="layer1"),
            layers.Dense(8, activation="relu", name="layer2"),
            layers.Dense(4, name="layer3"),
        ]
    )
    # Compile and train the model
    model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['mae'])
    model.fit(X, Y, epochs = 100, batch_size = 4)
    
    # Use the model for predictions
    # This is the first input of your dataset which I have used for prediction
    # Your input should be of shape (No.of examples you are predicting, 10)
    x = [[0.573, 0.69, 0.412, 0.224, 0.696, 0.512, 0.326, 0.314, 0.79, 0.685]]
    y = model.predict(x)
    print(y)
    # Output is [[ 1.0625525e+00  5.0397384e-01  2.1531060e-04 -1.0078825e-02]] which is very close to the actual output, but you should test on unseen data, it will be better.

我已经改进了模型，但是您应该使用大型数据集进行检查，以免出现过度夸大或其他问题。

无法插入顺序模型层

2 个答案: