我有一个Keras神经网络,我想在Spark环境中使用包装器部署此模型。所以我尝试了以下教程here
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling2D, Dropout,Flatten
from keras import backend as K
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Expect to see a numpy n-dimentional array of (60000, 28, 28)
type(X_train), X_train.shape, type(X_train)
#This time however, we flatten each of our 28 X 28 images to a vector of 1, 784
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
# expect to see a numpy n-dimentional array of : (60000, 784) for Traning Data shape and (10000, 784) for Test Data shape
type(X_train), X_train.shape, X_test.shape
#We also use sklearn's MinMaxScaler for normalizing
from sklearn.preprocessing import MinMaxScaler
def scaleData(data):
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
return scaler.fit_transform(data)
X_train = scaleData(X_train)
X_test = scaleData(X_test)
# We define the same Keras model as earlier
input_shape = (1,28,28) if K.image_data_format() == 'channels_first' else (28,28, 1)
keras_model = Sequential()
keras_model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=input_shape, padding='same'))
keras_model.add(MaxPooling2D(pool_size=(2, 2)))
keras_model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
keras_model.add(MaxPooling2D(pool_size=(2, 2)))
keras_model.add(Flatten())
keras_model.add(Dense(512, activation='relu'))
keras_model.add(Dropout(0.5))
keras_model.add(Dense(10, activation='softmax'))
keras_model.summary()
# Import the Keras to DML wrapper and define some basic variables
from systemml.mllearn import Keras2DML
epochs = 5
batch_size = 100
samples = 60000
max_iter = int(epochs*math.ceil(samples/batch_size))
# Now create a SystemML model by calling the Keras2DML method and feeding it your spark session, Keras model, its input shape, and the # predefined variables. We also ask to be displayed the traning results every 10 iterations.
sysml_model = Keras2DML(spark, keras_model, input_shape=(1,28,28), weights='weights_dir', batch_size=batch_size, max_iter=max_iter, test_interval=0, display=10)
# Initiate traning. More spark workers and better machine configuration means faster training!
sysml_model.fit(X_train, y_train)
# Test your model's performance on the secluded test set, and re-iterate if required
sysml_model.score(X_test, y_test)
在from systemml.mllearn import Keras2DML
行
我得到的错误是
回溯(最近通话最近):文件 “ d:/ SparkJarDirectory /./ NNSpark.py”,第58行 在 从systemml.mllearn导入Keras2DML文件“ C:\ Users \ xyz \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ systemml \ mllearn__init __。py”, 第45行,在 从.estimators导入*文件“ C:\ Users \ xyz \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ systemml \ mllearn \ estimators.py”, 917线 def init (自我,sparkSession,keras_model,input_shape,transferUsingDF = False,load_keras_weights = True,weights = None, 标签=无,batch_size = 64,max_iter = 2000,test_iter = 10, test_interval = 500,显示= 100,lr_policy =“ step”,weight_decay = 5e-4, regularization_type =“ L2”): ^ SyntaxError:导入*仅在模块级别允许2019-03-12 20:25:48 INFO ShutdownHookManager:54-称为Shutdown挂钩 2019-03-12 20:25:48 INFO ShutdownHookManager:54-删除目录 C:\ Users \ xyz \ AppData \ Local \ Temp \ spark-2e1736f8-1798-42da-a157-cdf0ade1bf36
据我了解,我正在他们使用的图书馆使用库
from .estimators import *
__all__ = estimators.__all__
我不确定为什么包装器不起作用或需要什么修复程序。任何帮助表示赞赏。