
时间:2019-01-18 02:26:19

标签: python tensorflow machine-learning


我正在训练这些数据。这是我的testing sample和我的training sample。您可以通过右键单击网页并单击“另存为”从Git下载它。



import csv
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

''' Here we unpack the .csv files. I have chosen to put their contents 
into lists. Do let me know if there exists a more efficient method. '''

distribution_train = []
probs_train = []
distribution_test = []
probs_test = []

with open('training_sample.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')

    for row in csv_reader:

with open('testing_sample.csv') as csv_file_1:
    csv_reader_1 = csv.reader(csv_file_1, delimiter= ',')

    for row in csv_reader_1:

'''Get rid of the titles in the training_sample.csv file.'''


'''For some reason everything in my csv file is stored as strings. Or 
maybe it's just because of the way I have unpacked it. The below function 
is to convert it into floats so that TF can work with it. 
It's crude, but it locates all the numbers and appends them to a list, 
which then gets appended to a giant list called f.'''

def num_converter_flatten(csv_list):
    f = []
    for j in range(len(csv_list)):
        append_this = []
        for i in csv_list[j]:
            if i == '1' or i == '2' or i == '3' or i == '4' or i == '5' or i == '6' or i == '7' or i == '8' or i =='9' or i =='0':

    return f

'''Basically, this line is to convert the distribution_train and 
probs_train which are currently strings
into numbers. And we normalize the training data.'''
x_train = num_converter_flatten(distribution_train)
y_train = num_converter_flatten(probs_train)
x_train = tf.keras.utils.normalize(x_train, axis=1)


'''This line we reshape x_train and y_train into tensors. The convertion 
to float 32 is also necessary as I realised that A and B are different 
floats for some reason.'''

A = tf.reshape(x_train, [-1,1*26])
B = tf.reshape(y_train, [-1,1*80])
A = tf.dtypes.cast(A, dtype = tf.float32)
B = tf.dtypes.cast(B, dtype = tf.float32)

'''Doing the same thing to x_test and y_test'''

x_test = num_converter_flatten(distribution_test)
y_test = num_converter_flatten(probs_test)
C = tf.reshape(x_test, [-1,1*26])
D = tf.reshape(y_test, [-1,1*80])
C = tf.dtypes.cast(C, dtype = tf.float32)
D = tf.dtypes.cast(D, dtype = tf.float32)


'''Model starts from here'''

model = tf.keras.models.Sequential()

'''I'm not too sure if relu is the right activation function to use here. 
I've tried different activation functions, but all run into the same 
problem described below.'''

model.add(tf.keras.layers.Dense(180, activation=keras.activations.relu, input_shape=(26,)))

model.add(tf.keras.layers.Dense(2080, activation=keras.activations.relu))

model.add(tf.keras.layers.Dense(180, activation=keras.activations.relu))

'''I'm making the final layer 80 because I want TF to output the size of 
the 'probs' list in the csv file'''

model.add(tf.keras.layers.Dense(80, activation=keras.activations.softplus))

'''Again I'm not sure if softplus is the best to use here. I've also 
tested a number of activation functions for the last layer, and it also 
runs to the same problem.'''


model.fit(A,B, epochs=2, steps_per_epoch=16)

val_loss, val_acc = model.evaluate(C,D, steps = 128)
print (val_loss, val_acc)

'''Just saving the model'''
new_model = tf.keras.models.load_model('epic_equation_model_try1')
predictions = new_model.predict(C, steps = 1)



for i in range(1):
    x = np.array(predictions[i]).reshape(5,16)
    print (x)


[[0.00000014 0.00000065 0.0000007  0.00000031 0.00000069 0.00000003
0.00000073 0.00000009 0.00000001 0.00000095 0.00000215 0.00000045
0.00000155 0.00000274 0.00000057 0.00053975]
[0.00000016 0.00000011 0.00000021 0.00000006 0.00000012 0.00000022
0.00000002 0.00000005 0.00000019 0.00000002 0.00000087 0.00000465
0.00000238 0.00000009 0.00003278 0.00001788]
[0.00000002 0.00000001 0.00000046 0.00000131 0.00000072 0.00000006
0.00000005 0.00000001 0.00000001 0.0000003  0.0000005  0.00000016
0.00000465 0.00000226 0.00000083 0.00002015]
[0.00000005 0.0000004  0.00000001 0.00000032 0.00000008 0.00000061
0.00000107 0.00000015 0.00000013 0.00000014 0.00000012 0.00000037
0.00000334 0.00000016 0.00000057 0.00018404]
[0.00000044 0.00000038 0.00000095 0.00000013 0.0000002  0.00000006
0.00000019 0.00000087 0.00000095 0.00000016 0.00000513 0.00000095
0.00000846 0.0000534  0.00000049 0.00000429]]



'''This tests for only the first prediction. If you wwant to see more 
predictions, change the range.'''
for i in range(1):
    MUTI = 500000
    x = np.array(predictions[i]).reshape(5,16)
    # print (x)
    PX = MUTI*x
    PX = np.round(PX, 2)
    PX[PX<0.1] = 0
    PX[PX>0.1] = 1
    PX[PX==0.1] = 1
    print (PX)

那么我在这里做错了什么?最好的选择是让输出矩阵对应于0和1,例如[0.999998,0.000002,0.000003,0.099995 ...],但是到目前为止,我在Tensorflow中没有找到允许我执行此操作的函数。



model.add(tf.keras.layers.Dense(80, activation=keras.activations.sigmoid))


 # Model Training
 1/16 [>.............................] - ETA: 27s - loss: 0.6932 - acc: 0.4845
 2/16 [==>...........................] - ETA: 20s - loss: 0.6919 - acc: 0.5277
 3/16 [====>.........................] - ETA: 18s - loss: 0.6905 - acc: 0.5470
 4/16 [======>.......................] - ETA: 15s - loss: 0.6889 - acc: 0.5577
 5/16 [========>.....................] - ETA: 14s - loss: 0.6871 - acc: 0.5654
 6/16 [==========>...................] - ETA: 12s - loss: 0.6851 - acc: 0.5720
 7/16 [============>.................] - ETA: 11s - loss: 0.6831 - acc: 0.5769
 8/16 [==============>...............] - ETA: 9s - loss: 0.6813 - acc: 0.5813 
 9/16 [===============>..............] - ETA: 8s - loss: 0.6799 - acc: 0.5851
 10/16 [=================>............] - ETA: 7s - loss: 0.6787 - acc: 0.5883
 11/16 [===================>..........] - ETA: 6s - loss: 0.6776 - acc: 0.5912
 12/16 [=====================>........] - ETA: 4s - loss: 0.6766 - acc: 0.5936
 13/16 [=======================>......] - ETA: 3s - loss: 0.6756 - acc: 0.5957
 14/16 [=========================>....] - ETA: 2s - loss: 0.6746 - acc: 0.5975
 15/16 [===========================>..] - ETA: 1s - loss: 0.6738 - acc: 0.5991
 16/16 [==============================] - 19s 1s/step - loss: 0.6731 - acc: 0.6005
Epoch 2/2

1/16 [>.............................] - ETA: 16s - loss: 0.6626 - acc: 0.6221
2/16 [==>...........................] - ETA: 15s - loss: 0.6625 - acc: 0.6221
3/16 [====>.........................] - ETA: 14s - loss: 0.6624 - acc: 0.6221
4/16 [======>.......................] - ETA: 13s - loss: 0.6623 - acc: 0.6221
5/16 [========>.....................] - ETA: 12s - loss: 0.6622 - acc: 0.6221
6/16 [==========>...................] - ETA: 11s - loss: 0.6621 - acc: 0.6221
7/16 [============>.................] - ETA: 10s - loss: 0.6621 - acc: 0.6221
8/16 [==============>...............] - ETA: 9s - loss: 0.6621 - acc: 0.6221 
9/16 [===============>..............] - ETA: 8s - loss: 0.6621 - acc: 0.6221
10/16 [=================>............] - ETA: 6s - loss: 0.6621 - acc: 0.6221
11/16 [===================>..........] - ETA: 5s - loss: 0.6620 - acc: 0.6221
12/16 [=====================>........] - ETA: 4s - loss: 0.6620 - acc: 0.6221
13/16 [=======================>......] - ETA: 3s - loss: 0.6620 - acc: 0.6221
14/16 [=========================>....] - ETA: 2s - loss: 0.6620 - acc: 0.6221
15/16 [===========================>..] - ETA: 1s - loss: 0.6619 - acc: 0.6221
16/16 [==============================] - 18s 1s/step - loss: 0.6619 - acc: 0.6221

# Testing Sample
1/128 [..............................] - ETA: 11s
6/128 [>.............................] - ETA: 3s 
10/128 [=>............................] - ETA: 2s
14/128 [==>...........................] - ETA: 2s
19/128 [===>..........................] - ETA: 1s
23/128 [====>.........................] - ETA: 1s
27/128 [=====>........................] - ETA: 1s
32/128 [======>.......................] - ETA: 1s
36/128 [=======>......................] - ETA: 1s
41/128 [========>.....................] - ETA: 1s
45/128 [=========>....................] - ETA: 1s
50/128 [==========>...................] - ETA: 1s
54/128 [===========>..................] - ETA: 1s
59/128 [============>.................] - ETA: 1s
63/128 [=============>................] - ETA: 0s
67/128 [==============>...............] - ETA: 0s
72/128 [===============>..............] - ETA: 0s
76/128 [================>.............] - ETA: 0s
81/128 [=================>............] - ETA: 0s
84/128 [==================>...........] - ETA: 0s
88/128 [===================>..........] - ETA: 0s
92/128 [====================>.........] - ETA: 0s
96/128 [=====================>........] - ETA: 0s
101/128 [======================>.......] - ETA: 0s
105/128 [=======================>......] - ETA: 0s
109/128 [========================>.....] - ETA: 0s
112/128 [=========================>....] - ETA: 0s
116/128 [==========================>...] - ETA: 0s
120/128 [===========================>..] - ETA: 0s
124/128 [============================>.] - ETA: 0s
128/128 [==============================] - 2s 15ms/step
# Val loss, val acc
3.21346378326416 0.6113420724868774
# Model prediction 
[0.07243679 0.06086067 0.17687203 0.0424496  0.04154298]
[[3.17100697e-04 1.11687055e-04 1.74964574e-04 6.10632123e-05
9.15286364e-05 5.74094338e-05 1.06784762e-04 9.21065075e-05
3.97201220e-04 6.81500824e-05 2.94565433e-03 1.35827821e-03
2.81211367e-04 1.00520747e-02 6.01871812e-04 5.57196997e-02]
[4.75579909e-05 1.06101972e-04 6.00069507e-05 4.96198081e-05
1.41731420e-04 8.79359577e-05 7.46832447e-05 4.33949099e-05
6.03557055e-05 7.12051406e-05 8.45988281e-04 2.34248699e-03
2.74868420e-04 1.51112420e-03 7.97806482e-04 5.43457977e-02]
[1.06174128e-04 6.14731325e-05 2.77487037e-04 9.75391740e-05
6.97174910e-05 1.57534625e-04 1.94240944e-04 7.85565353e-05
8.36232939e-05 4.36835981e-05 4.04849125e-04 6.86666509e-03
3.01186665e-04 3.41302366e-04 2.24954495e-03 1.65538445e-01]
[4.85532328e-05 5.30333891e-05 1.46693186e-04 2.40156965e-04
6.14130186e-05 6.84155602e-05 1.68141501e-04 9.09807641e-05
2.58948799e-04 6.01471947e-05 1.68107694e-03 8.52260040e-04
1.04383787e-03 4.75410791e-03 4.93220054e-04 3.24286185e-02]
[2.70062083e-05 1.19853627e-04 2.31390568e-05 4.41022166e-05
6.90615489e-05 2.53524253e-04 1.09332977e-04 7.17278526e-05
1.98521622e-04 3.77545693e-05 1.86053314e-03 2.42721115e-04
1.85445603e-03 1.16664183e-03 1.00720196e-03 3.44574042e-02]]


2 个答案:

答案 0 :(得分:0)


model.add(tf.keras.layers.Dense(80, activation=keras.activations.sigmoid))


答案 1 :(得分:0)

因此,您想从(1,26)(实值?)矩阵中预测5 * 16二进制矩阵。

  • 您应该经常检查的第一件事是,您的训练准确性和训练损失是否随着nbr个时期的增加而减少?神经网络应该总是能够过拟合!
    • 如果您的培训损失没有减少,则说明您的网络存在问题:
  • 由于仅应在输出层中获取二进制值,因此请使用激活功能,例如,输出在[0,1]之间的S形。这些值将用于计算损失。
  • 由于您要预测二进制值,因此应调节输出,如果输出> threshold-> 1,否则为0。我比keras更习惯Tensorflow,所以我不确定keras如何计算精度当输出为数字时...尝试将指标更改为自定义指标,例如:
    import keras.backend as K
    from keras.metrics import binary_accuracy
    def custom_metric(y_true, y_pred):
        threshold =0.5
        thresholded_values = K.greater(y_pred, threshold)
        return binary_accuracy(y_true, thresholded_values)
