鉴于以下代码,我正在尝试实时预测,因为数据库上有更多信息。我想知道是否有人建议提高结果的准确性,或者是否可以让模型继续学习,因为有更多信息可用,同时,它同时进行预测。
#inclusiones
import numpy
import matplotlib.pyplot as plt
import math
import sys
import time
import MySQLdb
from pandas import read_csv
from pandas import read_sql_query
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.callbacks import TensorBoard
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from pytz import timezone
from datetime import datetime
#definimos zona horaria
est = timezone('UTC')
#definiciones db
sql_hn = "********"
sql_p = 3306
sql_uid = "********"
sql_pwd = "********"
sql_db = "********"
#conexión db
conn = MySQLdb.connect(
host = sql_hn,
port = sql_p,
user = sql_uid,
passwd = sql_pwd,
db = sql_db
)
#acomodamos la informacion
def acomodar_informacion(informacion, pasado=1):
informacion_x, informacion_y = [], []
for i in range(len(informacion)-pasado-1):
a = informacion[i:(i+pasado), 0]
informacion_x.append(a)
informacion_y.append(informacion[i + pasado, 0])
return numpy.array(informacion_x), numpy.array(informacion_y)
# fix random seed
numpy.random.seed(7)
# pedimos la información a db
#archivo = read_csv('dice_amplified/primeros_10_mil.csv', usecols=[0], engine='python')
archivo = read_sql_query("SELECT value FROM ( SELECT * FROM crawler ORDER BY round DESC LIMIT 30000) sub ORDER BY round ASC", conn)
informacion = archivo.values
informacion = informacion.astype('float32')
#normalización de la información
scaler = MinMaxScaler(feature_range=(0, 1))
#variables
verbose = 2
epochs = 200
ronda = 256
pasado = 1
#dividimos la información
aprender_size = int(len(informacion) * 0.75)
evaluar_size = len(informacion) - aprender_size
aprender, evaluar = informacion[0:aprender_size,:], informacion[aprender_size:len(informacion),:]
#acomodamos la informacion
aprender_x, aprender_y = acomodar_informacion(aprender, pasado)
evaluar_x, evaluar_y = acomodar_informacion(evaluar, pasado)
#reshape
aprender_x = numpy.reshape(aprender_x, (aprender_x.shape[0], 1, aprender_x.shape[1]))
evaluar_x = numpy.reshape(evaluar_x, (evaluar_x.shape[0], 1, evaluar_x.shape[1]))
#creacion de la red con LSTM
model = Sequential()
model.add(LSTM(100, input_shape=(1, pasado)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit( aprender_x,
aprender_y,
epochs=epochs,
batch_size=ronda,
validation_data=(evaluar_x, evaluar_y),
verbose=verbose
)
#volcar la informacion
for i in range(len(aprender_x)):
print("causa=%s, consecuencia=%s" % (aprender_x[i], aprender_y[i]))
print("******************* PREDICCIONES *******************")
while True:
#pedimos la informacion
archivo_xnew = read_sql_query("SELECT value FROM crawler ORDER BY round DESC LIMIT 0,1", conn)
informacion_xnew = archivo_xnew.values
informacion_xnew = informacion_xnew.astype('float32')
#limpiamos la cache
conn.commit()
#reshape
informacion_xnew = numpy.reshape(informacion_xnew, (informacion_xnew.shape[0], 1, informacion_xnew.shape[1]))
#realizar prediccion
ynew = model.predict(informacion_xnew)
#volcar la informacion
for i in range(len(informacion_xnew)):
print("causa=%s, prediccion=%s" % (informacion_xnew[i], ynew[i]))
(1)有谁知道我如何才能进一步提高这个比率?
(2)最重要的是:有没有办法让它在进行预测时进一步学习?可以在最后一行代码中看到。
提前致谢! 克里斯,