使用会话张量流实现推理贝叶斯网络

时间:2018-02-01 08:18:50

标签: python machine-learning data-mining bayesian-networks

我是机器学习的新手。我有一个关于预测的最终项目,使用两种算法,人工神经网络和贝叶斯神经网络。我想比较ANN和BNN之间的预测结果。我已经完成了ANN程序,但我遇到了BNN的问题。我从这个链接尝试了一个教程:bayesian neural network tutorial。这是我的ANN示例代码,用于训练和评估模型。

keep_prob = tf.placeholder("float", name="keep_prob")
x = tf.placeholder(tf.float32, [None, n_input], name="x")
y = tf.placeholder(tf.float32, name="y")

training_epochs = 5000
display_step = 1000
batch_size = 5

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y), name="cost_function")
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, name="Adam").minimize(cost)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in tqdm(range(training_epochs)):
            avg_cost = 0.0
            total_batch = int(len(x_train) / batch_size)
            x_batches = np.array_split(x_train, total_batch)
            y_batches = np.array_split(y_train, total_batch)

            for i in range(total_batch):
                batch_x, batch_y = x_batches[i], y_batches[i]
                _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.8})
                avg_cost += c / total_batch
            if epoch % display_step == 0:
                print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

        print("Optimization Finished!")
        correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1), name="corr_pred")
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
        # print('Accuracy: ', sess.run(accuracy, feed_dict={x: x_test, y: y_test}))
        print("Accuracy:", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))

这是我的BNN代码:

# Importing required libraries
from math import floor

import edward as ed
import numpy as np
import pandas as pd
import tensorflow as tf
from edward.models import Normal, NormalWithSoftplusScale
from fancyimpute import KNN
from sklearn import preprocessing

# Read data
features_dummies_nan = pd.read_csv('csv/features_dummies_with_label.csv', sep=',')


# Function: impute missing value by KNN
def impute_missing_values_by_KNN():
    home_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'hp' in col]]
    away_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'ap' in col]]
    label_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'label' in col]]

    home_filled = pd.DataFrame(KNN(3).complete(home_data))
    home_filled.columns = home_data.columns
    home_filled.index = home_data.index

    away_filled = pd.DataFrame(KNN(3).complete(away_data))
    away_filled.columns = away_data.columns
    away_filled.index = away_data.index

    data_frame_out = pd.concat([home_filled, away_filled, label_data], axis=1)

    return data_frame_out


features_dummies = impute_missing_values_by_KNN()

target = features_dummies.loc[:, 'label'].values
data = features_dummies.drop('label', axis=1)
data = data.values

perm = np.random.permutation(len(features_dummies))
data = data[perm]
target = target[perm]

train_size = 0.9

train_cnt = floor(features_dummies.shape[0] * train_size)

x_train = data[0:train_cnt]  # data_train
y_train = target[0:train_cnt]  # target_train
x_test = data[train_cnt:]  # data_test
y_test = target[train_cnt:]  # target_test

keep_prob = tf.placeholder("float", name="keep_prob")
n_input = data.shape[1]  # D
n_classes = 3
n_hidden_1 = 100  # H0
n_hidden_2 = 100  # H1
n_hidden_3 = 100  # H2


def neural_network(X, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out):
    hidden1 = tf.nn.relu(tf.matmul(X, W_0) + b_0)
    hidden2 = tf.nn.relu(tf.matmul(hidden1, W_1) + b_1)
    hidden3 = tf.nn.relu(tf.matmul(hidden2, W_2) + b_2)
    output = tf.matmul(hidden3, W_out) + b_out
    return tf.reshape(output, [-1])


scaler = preprocessing.StandardScaler().fit(x_train)

data_train_scaled = scaler.transform(x_train)
data_test_scaled = scaler.transform(x_test)

W_0 = Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=5.0 * tf.ones([n_input, n_hidden_1]))
W_1 = Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=5.0 * tf.ones([n_hidden_1, n_hidden_2]))
W_2 = Normal(loc=tf.zeros([n_hidden_2, n_hidden_3]), scale=5.0 * tf.ones([n_hidden_2, n_hidden_3]))
W_out = Normal(loc=tf.zeros([n_hidden_3, 1]), scale=5.0 * tf.ones([n_hidden_3, 1]))

b_0 = Normal(loc=tf.zeros(n_hidden_1), scale=5.0 * tf.ones(n_hidden_1))
b_1 = Normal(loc=tf.zeros(n_hidden_2), scale=5.0 * tf.ones(n_hidden_2))
b_2 = Normal(loc=tf.zeros(n_hidden_3), scale=5.0 * tf.ones(n_hidden_3))
b_out = Normal(loc=tf.zeros(1), scale=5.0 * tf.ones(1))

qW_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
                               scale=tf.Variable(tf.random_normal([n_input, n_hidden_1])))
qW_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
                               scale=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))
qW_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
                               scale=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])))
qW_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3, 1])),
                                 scale=tf.Variable(tf.random_normal([n_hidden_3, 1])))

qb_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1])),
                               scale=tf.Variable(tf.random_normal([n_hidden_1])))
qb_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2])),
                               scale=tf.Variable(tf.random_normal([n_hidden_2])))
qb_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3])),
                               scale=tf.Variable(tf.random_normal([n_hidden_3])))
qb_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([1])),
                                 scale=tf.Variable(tf.random_normal([1])))

sigma_y = 1.0

x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)

inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
                     W_1: qW_1, b_1: qb_1,
                     W_2: qW_2, b_2: qb_2,
                     W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})

global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           1000, 0.3, staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)

但是,我想比较两种算法的结果。所以,我想让ANN和BNN之间的一些变量相同,例如epoch的总和。然后我想调整我的ANN代码以适应这个BNN代码部分。

sigma_y = 1.0

x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)

inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
                     W_1: qW_1, b_1: qb_1,
                     W_2: qW_2, b_2: qb_2,
                     W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})

global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           1000, 0.3, staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)

我有几件我不明白的事情。 ANN中有y = tf.placeholder(tf.float32, name="y")但是BNN中有y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)。然后,在BNN中存在比例但在ANN中没有比例。那么,我可以调整我的ANN列车和测试样本代码到上面的BNN示例代码吗?我想对ANN上的sess.run()中的BNN运行进行推理,这样我可以计算BNN预测精度结果。我能这样做吗?

0 个答案:

没有答案