我在下面的脚本中尝试将数据帧插入到sequential model中,该数据帧由以下结构组成:
m1 = 12次读数的平均值(在60个数据的窗口中)-示例((2 + 5 + 7 ...)/ 12)
d1 = 12个数据的标准偏差
依此类推,直到形成 m5,d5(12x5 = 60)
0-20 =在接下来的30个读数中,值在0到20的范围内重复多少次(61,62,62行...)< / em>
依次类推,直到形成 20-40.40-60.60-80.80-100
关于我在方法中哪里出错的任何建议或提示?
目标是预测整个数据集的下60个读数
脚本:
# -*- coding: utf-8 -*-
import operator
import statistics
import collections
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
def histogramNeuronsInput(result):
""" Generates histogram of input neurons """
fig, ax = plt.subplots(figsize=(10,6))
for i in range(len(result)):
data = np.array(result[i])
x=np.arange(len(data)) + i*6
# draw averages
ax.bar(x-0.2, data[:,0], color='C0', width=0.4)
# draw std
ax.bar(x+0.2, data[:,1], color='C1', width=0.4)
# separation line
if i < len(result) - 1:
ax.axvline(4.75 + i*6, color='black')
# turn off xticks
ax.set_xticks([])
ax.legend(labels=['Average', 'Standard deviation'])
leg = ax.get_legend()
leg.legendHandles[0].set_color('C0')
leg.legendHandles[1].set_color('C1')
plt.title("Histogram: Average versus Standard Deviation")
plt.ylabel('Consume')
plt.xlabel('Number of elements (Every 5 is a new block)')
return plt.show()
def histogramNeuronsOutput(result):
""" Generates histogram of output neurons """
colors = ['blue', 'green', 'yellow', 'orange', 'red']
labels = ['0-20', '20-40', '40-60', '60-80', '80-100']
fig, ax = plt.subplots(figsize=(10, 6))
for i, data in enumerate(result):
x = np.arange(len(data)) + i*6
bars = ax.bar(x, data, color=colors, width=0.4)
if i == 0:
for bar, label in zip(bars, labels):
bar.set_label(label)
if i < len(result) - 1:
# separation line after each part, but not after the last
ax.axvline(4.75 + i*6, color='black')
ax.set_xticks([])
ax.legend()
ax.set_title("Histogram")
ax.set_ylabel('Consume')
ax.set_xlabel('Percent')
plt.show()
def standardDeviation(data):
""" Calculates standard deviation """
return statistics.stdev(data)
def average(data):
""" Calculates average """
return statistics.mean(data)
def captureOcurrences(elements, n):
""" Capture an X number of elements within a list """
L = len(elements)
return [elements[i: i+n] for i in range(0, L, n)]
def neuronsInput(elements):
""" Generates input neuron modeling (5 averages, 5 standard deviations - Between 12 occurrences in a window of 60 readings) """
result = []
temp = []
start = 0
limit = 60
size = int(len(elements))
TargetDivision = int(size / 30)
repetitions = 0
five = 0
while repetitions < TargetDivision:
temp = []
five += 1
ocurrences = captureOcurrences(elements[start: limit],12)
for i in ocurrences:
print("[INFO] 12 Ocurrences: {}".format(i))
print("[INFO] Average: {}".format(average(i)))
m = average(i)
print("[INFO] Standard Deviation: {}".format(standardDeviation(i)))
sd = standardDeviation(i)
print("Result: [{},{}]\n\n".format(m,sd))
temp.append([m,sd])
print("[INFO] Cycle Result {}: \n{}\n\n".format(repetitions+1,result))
print(temp)
result.append(temp)
repetitions += 1
limit += 10
start += 10
print("[INFO] Final result of phase Neurons Input: \n{}\n".format(result))
return result
def neuronsOutput(elements):
""" Generates output neuron modeling (Histogram of the next 30 data readings) """
result = []
start = 61
limit = 90
size = int(len(elements))
TargetDivision = int(size / 30)
repetitions = 0
while repetitions < TargetDivision:
print("[INFO] Reading [{}:{}]".format(start, limit))
print("[INFO] Elements:\n{}".format(elements[start: limit]))
counter=collections.Counter(elements[start: limit])
consumption0_20 = 0
consumption20_40 = 0
consumption40_60 = 0
consumption60_80 = 0
consumption80_100 = 0
for key in counter:
if key <= 20:
consumption0_20 += int(counter[key])
elif key > 20 and key < 40:
consumption20_40 += int(counter[key])
elif key > 40 and key < 60:
consumption40_60 += int(counter[key])
elif key > 60 and key < 80:
consumption60_80 += int(counter[key])
elif key > 80 and key < 100:
consumption80_100 += int(counter[key])
print("[INFO] Histogram: 0-20 [{}], 20-40 [{}], 40-60 [{}], 60-80 [{}], 80-100 [{}]\n\n".format(consumption0_20,consumption20_40,consumption40_60,consumption60_80,consumption80_100))
result.append([consumption0_20,consumption20_40,consumption40_60,consumption60_80,consumption80_100])
repetitions += 1
limit += 10
start += 10
print("[INFO] Final result of phase Neurons Output: \n{}\n".format(result))
return result
def binaryInput(data):
""" I divided the values of each column by the highest occurrence in the column """
max_average = 0
max_deviation = 0
for j in range(len(data[0])):
for i in range(len(data)):
if data[i][j][0] > max_average:
max_average = data[i][j][0]
if data[i][j][1] > max_deviation:
max_deviation = data[i][j][1]
for p in range(len(data)):
if max_average != 0:
data[p][j][0] = round(data[p][j][0] / max_average, 3)
if max_deviation != 0:
data[p][j][1] = round(data[p][j][1] / max_deviation, 3)
max_average = 0
max_deviation = 0
return data
def binaryOutput(data):
""" I divided the values of each column by the highest occurrence in the column """
max_consume = 0
for j in range(len(data[0])):
for i in range(len(data)):
if data[i][j] > max_consume:
max_consume = data[i][j]
for p in range(len(data)):
if max_consume != 0:
data[p][j] = round(data[p][j] / max_consume, 3)
max_consume = 0
return data
def conversionDataframe(dataNeuronInput,dataNeuronOutput):
""" Converts data to a dataframe pandas """
ni = pd.DataFrame(data= dataNeuronInput)
ni.columns = ['m1,d1', 'm2,d2', 'm3,d3', 'm4,d4', 'm5,d5']
no = pd.DataFrame(data= dataNeuronOutput)
no.columns = ['0-20', '20-40', '40-60', '60-80', '80-100']
return pd.concat([ni, no], axis=1)
def modeling(data):
""" Generates the initial model for training the neural network """
readings = data.iloc[:, 1].values
dataNeuronInput = neuronsInput(readings)
#histogramNeuronsInput(dataNeuronInput)
dataNeuronOutput = neuronsOutput(readings)
#histogramNeuronsOutput(dataNeuronOutput)
dataFrameNoBinary = conversionDataframe(dataNeuronInput, dataNeuronOutput)
print("[INFO] Viewing non-binary data: \n{}\n\n".format(dataFrameNoBinary))
binaryNeuronInput = binaryInput(dataNeuronInput)
print(binaryNeuronInput)
binaryNeuronOutput = binaryOutput(dataNeuronOutput)
print(binaryNeuronOutput)
dataFrameBinary = conversionDataframe(binaryNeuronInput, binaryNeuronOutput)
print("[INFO] Converting to binary data frame: \n{}\n\n".format(dataFrameBinary))
return dataFrameBinary
def neural():
df = modeling(pd.read_csv('/home/user/Desktop/classification/data/minute.csv'))
# Define Sequential model with 3 layers
model = keras.Sequential(
[
layers.Dense(2, activation="relu", name="layer1"),
layers.Dense(3, activation="relu", name="layer2"),
layers.Dense(4, name="layer3"),
]
)
# Call model on a test input
x = tf.ones((3, 3))
y = model(df)
def main():
""" Initializes the script """
print("[INFO] Start *******************************************************************************")
neural()
print("[INFO] End *********************************************************************************")
if __name__ == '__main__':
main()
数据帧头和dtypes
m1,d1 m2,d2 m3,d3 m4,d4 m5,d5 0-20 20-40 40-60 60-80 80-100
0 [0.573, 0.699] [0.412, 0.224] [0.696, 0.512] [0.326, 0.314] [0.79, 0.685] 1.000 0.5 0 0 0
1 [0.456, 0.251] [0.629, 0.523] [0.344, 0.286] [0.8, 0.699] [0.721, 1.0] 1.000 0.5 0 0 0
2 [0.658, 0.531] [0.339, 0.282] [0.592, 0.614] [0.859, 1.0] [0.365, 0.283] 1.000 0.5 0 0 0
3 [0.396, 0.314] [0.29, 0.201] [1.0, 1.0] [0.34, 0.288] [0.886, 0.647] 1.000 0.5 0 0 0
4 [0.379, 0.315] [1.0, 1.0] [0.302, 0.248] [0.929, 0.655] [0.328, 0.308] 1.000 0.5 0 0 0
5 [1.0, 1.0] [0.274, 0.249] [0.679, 0.536] [0.52, 0.413] [0.382, 0.337] 1.000 0.5 0 0 0
6 [0.657, 0.898] [0.324, 0.244] [0.796, 0.565] [0.336, 0.337] [0.594, 0.48] 1.000 0.5 0 0 0
7 [0.397, 0.312] [0.731, 0.583] [0.358, 0.296] [0.586, 0.495] [0.263, 0.228] 1.000 0.5 0 0 0
8 [0.792, 0.589] [0.343, 0.296] [0.472, 0.434] [0.278, 0.227] [0.432, 0.316] 1.000 0.5 0 0 0
9 [0.395, 0.366] [0.301, 0.247] [0.449, 0.413] [0.438, 0.324] [0.798, 0.545] 1.000 0.5 0 0 0
10 [0.411, 0.347] [0.436, 0.417] [0.373, 0.276] [0.78, 0.565] [0.399, 0.274] 1.000 0.5 0 0 0
11 [0.521, 0.443] [0.221, 0.206] [0.658, 0.476] [0.512, 0.35] [0.364, 0.264] 1.000 0.5 0 0 0
12 [0.277, 0.271] [0.364, 0.264] [0.729, 0.477] [0.325, 0.265] [0.544, 0.447] 0.964 1.0 0 0 0
13 [0.409, 0.312] [0.693, 0.484] [0.333, 0.234] [0.551, 0.458] [0.297, 0.246] 0.964 1.0 0 0 0
14 [0.728, 0.487] [0.332, 0.233] [0.439, 0.403] [0.309, 0.245] [0.327, 0.231] 1.000 0.5 0 0 0
15 [0.368, 0.287] [0.279, 0.229] [0.46, 0.396] [0.309, 0.235] [0.854, 0.675] 1.000 0.5 0 0 0
16 [0.342, 0.283] [0.454, 0.397] [0.268, 0.203] [0.888, 0.685] [0.208, 0.117] 1.000 0.5 0 0 0
17 [0.492, 0.422] [0.238, 0.201] [0.676, 0.585] [0.329, 0.233] [0.24, 0.147] 1.000 0.5 0 0 0
18 [0.311, 0.272] [0.389, 0.468] [0.567, 0.433] [0.21, 0.145] [0.561, 0.435] 1.000 0.5 0 0 0
19 [0.456, 0.485] [0.518, 0.447] [0.223, 0.131] [0.552, 0.447] [0.254, 0.185] 1.000 0.5 0 0 0
20 [0.689, 0.597] [0.23, 0.126] [0.386, 0.392] [0.363, 0.2] [0.276, 0.211] 1.000 0.5 0 0 0
21 [0.202, 0.129] [0.228, 0.158] [0.503, 0.376] [0.258, 0.215] [0.813, 0.551] 1.000 0.5 0 0 0
22 [0.248, 0.173] [0.493, 0.379] [0.232, 0.187] [0.804, 0.571] [0.295, 0.223] 1.000 0.5 0 0 0
23 [0.484, 0.392] [0.216, 0.18] [0.583, 0.46] [0.46, 0.342] [0.281, 0.226] 1.000 0.5 0 0 0
24 [0.255, 0.215] [0.311, 0.288] [0.658, 0.444] [0.248, 0.227] [0.582, 0.523] 1.000 0.5 0 0 0
25 [0.357, 0.316] [0.619, 0.456] [0.258, 0.2] [0.562, 0.54] [0.29, 0.235] 1.000 0.5 0 0 0
26 [0.71, 0.49] [0.246, 0.199] [0.45, 0.473] [0.314, 0.237] [0.343, 0.235] 1.000 0.5 0 0 0
27 [0.284, 0.247] [0.256, 0.227] [0.484, 0.458] [0.308, 0.239] [0.842, 0.54] 1.000 0.5 0 0 0
28 [0.316, 0.272] [0.476, 0.46] [0.268, 0.206] [0.835, 0.56] [0.296, 0.168] 1.000 0.5 0 0 0
29 [0.529, 0.483] [0.225, 0.202] [0.602, 0.45] [0.433, 0.314] [0.291, 0.187] 1.000 0.5 0 0 0
30 [0.288, 0.273] [0.324, 0.247] [0.647, 0.45] [0.256, 0.182] [0.743, 0.572] 1.000 0.5 0 0 0
31 [0.37, 0.292] [0.619, 0.459] [0.268, 0.16] [0.589, 0.532] [0.569, 0.341] 1.000 0.5 0 0 0
32 [0.688, 0.479] [0.266, 0.158] [0.451, 0.47] [0.579, 0.344] [0.426, 0.234] 1.000 0.5 0 0 0
33 [0.259, 0.174] [0.237, 0.159] [0.696, 0.495] [0.421, 0.231] [0.855, 0.551] 1.000 0.5 0 0 0
34 [0.261, 0.193] [0.67, 0.502] [0.375, 0.197] [0.714, 0.511] [0.566, 0.401] 1.000 0.5 0 0 0
35 [0.635, 0.509] [0.394, 0.199] [0.571, 0.45] [0.515, 0.39] [0.421, 0.28] 0.964 1.0 0 0 0
36 [0.419, 0.231] [0.36, 0.232] [0.648, 0.499] [0.405, 0.28] [0.551, 0.452] 0.964 1.0 0 0 0
37 [0.409, 0.28] [0.659, 0.496] [0.357, 0.235] [0.531, 0.464] [0.308, 0.25] 0.964 1.0 0 0 0
38 [0.741, 0.5] [0.343, 0.238] [0.414, 0.41] [0.324, 0.247] [0.531, 0.438] 1.000 0.5 0 0 0
39 [0.393, 0.291] [0.249, 0.213] [0.468, 0.403] [0.336, 0.256] [1.0, 0.588] 1.000 0.5 0 0 0
40 [0.322, 0.27] [0.452, 0.407] [0.293, 0.22] [1.0, 0.612] [0.346, 0.247] 1.000 0.5 0 0 0
41 [0.5, 0.427] [0.243, 0.215] [0.739, 0.508] [0.479, 0.35] [0.278, 0.161] 1.000 0.5 0 0 0
42 [0.307, 0.285] [0.469, 0.399] [0.678, 0.449] [0.248, 0.159] [0.801, 0.708] 1.000 0.5 0 0 0
43 [0.51, 0.428] [0.655, 0.456] [0.257, 0.142] [0.804, 0.724] [0.23, 0.144] 1.000 0.5 0 0 0
44 [0.726, 0.47] [0.245, 0.14] [0.501, 0.618] [0.44, 0.299] [0.276, 0.17] 1.000 0.5 0 0 0
45 [0.288, 0.226] [0.215, 0.121] [0.68, 0.625] [0.249, 0.171] [0.701, 0.716] 1.000 0.5 0 0 0
46 [0.229, 0.148] [0.683, 0.624] [0.222, 0.146] [0.687, 0.732] [0.296, 0.184] 1.000 0.5 0 0 0
47 [0.646, 0.627] [0.204, 0.152] [0.542, 0.632] [0.36, 0.224] [0.291, 0.215] 1.000 0.5 0 0 0
m1,d1 object
m2,d2 object
m3,d3 object
m4,d4 object
m5,d5 object
0-20 float64
20-40 float64
40-60 int64
60-80 int64
80-100 int64
输入层: [m1,d1],[m2,d2],[m3,d3],[m4,d4],[m5, d5]
输出层: 0-20、20-40、40-60、60-80、80-100
错误:
Call initializer instance with the dtype argument instead of passing it to the constructor Traceback (most recent call last): File "script.py", line 233, in <module>
main() File "script.py", line 229, in main
neural() File "script.py", line 223, in neural
y = layer(df) File "/home/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/base_layer.py", line 676, in __call__
self._maybe_build(inputs) File "/home/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1865, in _maybe_build
self.input_spec, inputs, self.name) File "/home/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/input_spec.py", line 109, in assert_input_compatibility
if x.shape.ndims is None: AttributeError: 'tuple' object has no attribute 'ndims'
注意:仅供参考, data.csv 由1440行组成,其中两列分别为日期和值,其中日期在{{1 }} (1天= 1440分钟),值在1 minute
(CPU消耗)
答案 0 :(得分:2)
我指出了以下代码的关键问题:
dataframe
馈入神经网络。这不可能。 Keras架构的输入必须通过NumPy矩阵传递。sequential
模型中,要素的形状必须在顺序模型的第一层中用参数input_shape
表示。假设我必须预测mi, ri
时间的(i+1)'th
,我已经更正了代码。给我上次mi ri
的{{1}}值。代码如下。
i
尽管如此,这不能满足您的尝试。因此,我想给出一些提示。给定上次的值序列,您正在尝试预测将来的值序列。我认为,您应该尝试实现递归体系结构(我的最佳选择LSTM)。我希望这应该适合该问题。如有任何其他查询,请随时答复。谢谢。
答案 1 :(得分:0)
您的功能神经元几乎没有问题。
顺序模型不会直接从pandas数据框中获取输入。因此,首先将您的数据转换为numpy数组。
未经训练就无法直接预测,因此必须首先训练模型。然后,您可以将模型用于预测。
以下是更正的功能:
def neural():
df = modeling(pd.read_csv('data.csv'))
# Convert Data into numpy arrays
X = np.array(df['m1,d1'].values.tolist())
X = np.append(X, np.array(df['m2,d2'].values.tolist()), axis = 1)
X = np.append(X, np.array(df['m3,d3'].values.tolist()), axis = 1)
X = np.append(X, np.array(df['m4,d4'].values.tolist()), axis = 1)
X = np.append(X, np.array(df['m5,d5'].values.tolist()), axis = 1)
print(X)
Y = []
Y.append(np.asarray(df['0-20'], dtype=np.float32))
Y.append(np.asarray(df['20-40'], dtype=np.float32))
Y.append(np.asarray(df['40-60'], dtype=np.float32))
Y.append(np.asarray(df['60-80'], dtype=np.float32))
Y = np.transpose(np.asarray(Y))
print(Y)
# Define Sequential model with 3 layers
model = keras.Sequential(
[
layers.Dense(16, activation="relu", name="layer1"),
layers.Dense(8, activation="relu", name="layer2"),
layers.Dense(4, name="layer3"),
]
)
# Compile and train the model
model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['mae'])
model.fit(X, Y, epochs = 100, batch_size = 4)
# Use the model for predictions
# This is the first input of your dataset which I have used for prediction
# Your input should be of shape (No.of examples you are predicting, 10)
x = [[0.573, 0.69, 0.412, 0.224, 0.696, 0.512, 0.326, 0.314, 0.79, 0.685]]
y = model.predict(x)
print(y)
# Output is [[ 1.0625525e+00 5.0397384e-01 2.1531060e-04 -1.0078825e-02]] which is very close to the actual output, but you should test on unseen data, it will be better.
我已经改进了模型,但是您应该使用大型数据集进行检查,以免出现过度夸大或其他问题。