解决贝叶斯Logistic回归中的精度降低问题

时间:2019-05-14 02:52:34

标签: tensorflow

我在贝叶斯逻辑回归中具有以下代码,并将其应用于我的数字数据集。 该模型的精度在最初的初始值处给出了92%的合理值,但是随着迭代的增加,精度值开始上下波动,同时损失也随之降低,精度值将开始减小至非常低的价值。我想这可能是过度拟合的结果。我想尽早停止,看看性能是否会有所改善。但是,由于没有像其他张量流模型那样适合这里,因此我在应用它方面遇到了麻烦。如果还有其他事情可以导致此结果,我将乐于学习并了解如何最好地利用它来提高工作的准确性。

!pip install --upgrade -q gspread
from tensorboardcolab import *
import shutil
#clean out the directory
shutil.rmtree('./Graph', ignore_errors=True)
os.mkdir('./Graph')
#tf.reset_default_graph()
#will start the tunneling and will print out a link:
tbc=TensorBoardColab()

!pip show tensorflow
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

# Dependencies
import os
import warnings
#from absl import flags
import matplotlib
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import math
import pandas as pd
tfd = tfp.distributions
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials
#import python_utils

# clear graph (if any) before running
tf.reset_default_graph()

####Delete all flags before declare#####

def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()    
    keys_list = [keys for keys in flags_dict]    
    for keys in keys_list:
        FLAGS.__delattr__(keys)

del_all_flags(tf.flags.FLAGS)

flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
flags.DEFINE_float("learning_rate", default = 0.0001, help = "Initial learning rate.")
flags.DEFINE_integer("epochs", default = 700, help = "Number of epochs to train for")
flags.DEFINE_integer("batch_size", default =128, help = "Batch size.")
flags.DEFINE_integer("eval_freq", default = 400, help =" Frequency at which to validate the model.")
flags.DEFINE_float("kernel_posterior_scale_mean", default = -0.9, help = "Initial kernel posterior mean of the scale (log var) for q(w)")
flags.DEFINE_float("kernel_posterior_scale_constraint", default = 0.2, help = "Posterior kernel constraint for the scale (log var) for q(w)")
flags.DEFINE_float("kl_annealing", default = 50, help = "Epochs to anneal the KL term (anneals from 0 to 1)")
flags.DEFINE_integer("num_hidden_layers", default = 4, help = "Number of hidden layers")
flags.DEFINE_integer("num_monte_carlo",

                     default=50,

                     help="Network draws to compute predictive probabilities.")
tf.app.flags.DEFINE_string('f', '', 'kernel')
#initialize flags
#FLAGS = flags.FLAGS
print(FLAGS.learning_rate)
print(FLAGS.epochs)
print(FLAGS.num_monte_carlo)

def build_input_pipeline(X,y, batch_size):
  training_dataset = tf.data.Dataset.from_tensor_slices((X, y))
  training_batches = training_dataset.repeat().batch(batch_size)
  training_iterator = tf.data.make_one_shot_iterator(training_batches)
  batches_features, batches_labels = training_iterator.get_next()
  return batches_features, batches_labels

from google.colab import drive
drive.mount("/content/gdrive")

# Read in the dataset
df = pd.read_csv('/content/gdrive/My Drive/work2.csv').astype(np.float32)
change = df.query('Speed>0').sample(frac = .1).index
df.loc[change, 'Speed'] = 0
df.loc[change, 'Class'] = 0
df.to_csv('work2.csv', header = True, index =False)
df.shape

X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state =1)

#reshape y-data to become column vector
y_train = np.reshape(y_train, [-1,1])
y_test = np.reshape(y_test, [-1,1])

# Standardize the dataset
scalar_x_train = StandardScaler().fit(X_train)
scalar_x_test = StandardScaler().fit(X_test)
X_train = scalar_x_train.transform(X_train)
X_test = scalar_x_test.transform(X_test)
def main(argv):
  features, labels = build_input_pipeline(X,y, FLAGS.batch_size)
  #Building Bayesian Logistic
  with tf.name_scope("logistic_regression", values=[features]):
    layer = tfp.layers.DenseFlipout(
        units=1,
        activation= tf.nn.relu,
        kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
        bias_posterior_fn=tfp.layers.default_mean_field_normal_fn())

    logits = layer(features)
    labels_distribution = tfd.Bernoulli(logits=logits)
 #Compute the -ELBO as the loss, averaged over the batch size
    t = tf.Variable(0.0)
    kl_regularizer = t / (FLAGS.kl_annealing * len(X_train) / FLAGS.batch_size)

    #Compute the -ELBO as the loss. The kl term is annealed from 1 to 1 over
    # the epochs specified by the kl_annealing flag.
    log_likelihood = labels_distribution.log_prob(labels)
   #neg_log_likelihood = tf.reduce_mean(tf.squared_difference(logits,labels_final))
    neg_log_likelihood = -tf.reduce_mean(input_tensor = log_likelihood)
    kl = sum(layer.losses)/len(X_train) * tf.minimum(1.0, kl_regularizer)
    elbo_loss = neg_log_likelihood + kl

  """neg_log_likelihood = -tf.reduce_mean(labels_distribution.log_prob(features))
  kl = sum(layer.losses)/len(X_train)
  elbo_loss = neg_log_likelihood + kl """
  predictions = tf.cast(logits>0, dtype = tf.int32)
  accuracy, accuracy_update_op = tf.metrics.accuracy(labels = labels,  predictions=predictions)

  with tf.name_scope("train"):
    optimizer =tf.train.AdagradOptimizer(learning_rate=FLAGS.learning_rate)
    train_op = optimizer.minimize(elbo_loss)
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

  with tf.Session() as sess:
    sess.run(init_op)
    #Run the training looop
     # Fit the model to data.
    training_steps = int(round(FLAGS.epochs * (len(X_train) / FLAGS.batch_size)))
    for step in range(training_steps):
      _ = sess.run([train_op, accuracy_update_op])
      if step % 100 == 0:
        loss_value, accuracy_value = sess.run([elbo_loss, accuracy])
        print("Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f}".format(
            step, loss_value, accuracy_value))

    # Visualize some draws from the weights posterior.
    w_draw = layer.kernel_posterior.sample()
    b_draw = layer.bias_posterior.sample()
    candidate_w_bs = []
    for _ in range(FLAGS.num_monte_carlo):
      w, b = sess.run((w_draw, b_draw))
      candidate_w_bs.append((w, b))
    visualize_decision(x, y, (w_true, b_true),
                       candidate_w_bs,
                       fname=os.path.join(FLAGS.model_dir,
                                          "weights_inferred.png"))


  with tf.name_scope("ROC"):
    logits = sess.run([logits], feed_dict = {handle:test_handle})
    fpr,tpr, _ =  roc_curve(np.array(y_test), logits) # Logits, a tensor resulting from a graph evaluation, is a NumPy array
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr,tpr,  label = 'AUC (area= % 0.2f)' %roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0,1],[0,1], 'r')
    plt.xlm([0,1])
    plt.ylm([0,1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

  with tf.name_scope("Credible_interval"):
    logits = sess.run([logits], feed_dict = {handle:test_handle})
    credible_intervals =[]
    modes = []
    for i in range(logits.shape[1]):
      lb = np.percentile(logits[:, i], 2.5)
      ub = np.percentile(logits[:, i], 97.5)
      mode = np.mean(logits[:, i])
      credible_intervals.append([lb, ub])
      modes.append(mode)
      lb, ub = np.exp(lb), np.exp(ub)
      print(f'P({lb:.3f} < Odds Ratio < {ub:.3f}) = 0.95')
      fig, ax = plt.subplots(figsize=(8, 4))
      sns.distplot(credible_enter code hereinterval, axlabel='Odds Ratio', ax=ax)
      ax.set_title(f'Credible Interval: P({lb:.3f} < Odds Ratio < {ub:.3f}) = 0.95')
      ax.axvspan(lb, ub, alpha=0.5, color='gray');
if __name__ == "__main__":
  tf.app.run()

0 个答案:

没有答案
相关问题