是否有一种方法可以获取模型的当前权重,而无需对其进行评估或拟合?
model = keras.Sequential([
keras.layers.Input(400),
keras.layers.Dense(25, activation=tf.nn.sigmoid, kernel_regularizer=regularizers.l2(lambd)),
keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', # does the 1-hot encoding for us
metrics=['accuracy'])
model.set_weights([Theta1.T, np.zeros(25), Theta2.T, np.zeros(10)])
prob = model.predict(X)
pred = np.argmax(prob, axis=1).reshape(-1, 1)
pred_y = pred == y
print(f'Training Set Accuracy: {np.mean(pred_y)*100:.2f}%')
# How do I get the loss now?
This对我不起作用。
答案 0 :(得分:1)
您可以通过将model.predict(x)
的输出传递给损失函数的实现。此外,您将需要一个函数来计算模型正则化损失-reg_loss(model)
。以下是binary_crossentropy
,l1
,l2
和l1_l2
的所有层(包括经常性损失)的实现,但不包括{ {1}}的损失,不是体重的损失。您可以用自己的函数替换activity_regularizer
-例如binary_crossentropy
:
sparse_softmax_crossentropy_with_logits
def binary_crossentropy(y_true, y_pred, sample_weight=1):
if len(y_pred.shape)==1:
y_pred = np.atleast_2d(y_pred).T
y_pred = [max(min(pred[0], 1-K.epsilon()), K.epsilon()) for pred in y_pred]
y_true,y_pred,sample_weight = force_2d_shape([y_true,y_pred,sample_weight])
logits = np.log(y_pred) - np.log(1-y_pred) # sigmoid inverse
neg_abs_logits = -np.abs(logits)
relu_logits = (logits > 0)*logits
loss_vec = relu_logits - logits*y_true + np.log(1 + np.exp(neg_abs_logits))
return np.mean(sample_weight*loss_vec)
def force_2d_shape(arr_list):
for arr_idx, arr in enumerate(arr_list):
if len(np.array(arr).shape) != 2:
arr_list[arr_idx] = np.atleast_2d(arr).T
return arr_list
def l1l2_weight_loss(model):
l1l2_loss = 0
for layer in model.layers:
if 'layer' in layer.__dict__ or 'cell' in layer.__dict__:
l1l2_loss += _l1l2_rnn_loss(layer)
continue
if 'kernel_regularizer' in layer.__dict__ or \
'bias_regularizer' in layer.__dict__:
l1l2_lambda_k, l1l2_lambda_b = [0,0], [0,0] # defaults
if layer.__dict__['kernel_regularizer'] is not None:
l1l2_lambda_k = list(layer.kernel_regularizer.__dict__.values())
if layer.__dict__['bias_regularizer'] is not None:
l1l2_lambda_b = list(layer.bias_regularizer.__dict__.values())
if any([(_lambda != 0) for _lambda in (l1l2_lambda_k + l1l2_lambda_b)]):
W = layer.get_weights()
for idx,_lambda in enumerate(l1l2_lambda_k + l1l2_lambda_b):
if _lambda != 0:
_pow = 2**(idx % 2) # 1 if idx is even (l1), 2 if odd (l2)
l1l2_loss += _lambda*np.sum(np.abs(W[idx//2])**_pow)
return l1l2_loss
def _l1l2_rnn_loss(layer):
l1l2_loss = 0
if 'backward_layer' in layer.__dict__:
bidirectional = True
_layer = layer.layer
else:
_layer = layer
bidirectional = False
ldict = _layer.cell.__dict__
if 'kernel_regularizer' in ldict or \
'recurrent_regularizer' in ldict or \
'bias_regularizer' in ldict:
l1l2_lambda_k, l1l2_lambda_r, l1l2_lambda_b = [0,0], [0,0], [0,0]
if ldict['kernel_regularizer'] is not None:
l1l2_lambda_k = list(_layer.kernel_regularizer.__dict__.values())
if ldict['recurrent_regularizer'] is not None:
l1l2_lambda_r = list(_layer.recurrent_regularizer.__dict__.values())
if ldict['bias_regularizer'] is not None:
l1l2_lambda_b = list(_layer.bias_regularizer.__dict__.values())
all_lambda = l1l2_lambda_k + l1l2_lambda_r + l1l2_lambda_b
if any([(_lambda != 0) for _lambda in all_lambda]):
W = layer.get_weights()
idx_incr = len(W)//2 # accounts for 'use_bias'
for idx,_lambda in enumerate(all_lambda):
if _lambda != 0:
_pow = 2**(idx % 2) # 1 if idx is even (l1), 2 if odd (l2)
l1l2_loss += _lambda*np.sum(np.abs(W[idx//2])**_pow)
if bidirectional:
l1l2_loss += _lambda*np.sum(
np.abs(W[idx//2 + idx_incr])**_pow)
return l1l2_loss
from keras.layers import Input, Dense, LSTM, GRU, Bidirectional
from keras.models import Model
from keras.regularizers import l1, l2, l1_l2
import numpy as np
ipt = Input(shape=(1200,16))
x = LSTM(60, activation='relu', return_sequences=True,
recurrent_regularizer=l2(1e-3),)(ipt)
x = Bidirectional(GRU(60, activation='relu', bias_regularizer =l1(1e-4)))(x)
out = Dense(1, activation='sigmoid', kernel_regularizer =l1_l2(2e-4))(x)
model = Model(ipt,out)
model.compile(loss='binary_crossentropy', optimizer='adam')
X = np.random.rand(10,1200,16) # (batch_size, timesteps, input_dim)
Y = np.random.randint(0,2,(10,1))
class_weights = {'0':1, '1': 6}
sample_weights = np.array([class_weights[str(label[0])] for label in Y])
0.763822-keras_loss
0.763822-custom_loss