Question

我正在使用CNN Tensorflow代码 - ＆gt; https://www.tensorflow.org/tutorials/layers 我正在尝试运行自己的数据而不是MNIST数据集。由于我是这方面的新手，我在编码和错误方面遇到了很多困难:(

我制作了一个file.txt，它包含了我计算机中的每个图像路径及其标签。我有400张图像，灰度，16x16。

这是代码：

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

...
from PIL import Image
import PIL.Image
#import imageflow
import os
import cv2
#import glob
import __main__ as _main_module
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from sklearn.model_selection import train_test_split
...

from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib

#tf.logging.set_verbosity(tf.logging.INFO)
  
#%%%%%%%%%%%%%%%%%%%%%% MY DATA %%%%%%%%%%%%%%%%%%%%%%%

def main(unused_argv):
    
    path = 'C:/Users/.../ImageDir-Lables-01.txt'
    filenames = []
    labels = []
    
    #Reading file and extracting paths and labels
    with open(path, 'r') as File:
        infoFile = File.readlines() #Reading all the lines from File
        for line in infoFile: #Reading line-by-line
            words = line.split() #Splitting lines in words using space character as separator
            filenames.append(words[0])
            labels.append(int(words[1]))
        
    NumFiles = len(filenames)
    print (NumFiles)
    
    #Converting filenames and labels into tensors
    tfilenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
    tlabels = ops.convert_to_tensor(labels, dtype=dtypes.int32)
    
    #Creating a queue which contains the list of files to read and the value of the labels
    filename_queue = tf.train.slice_input_producer([tfilenames, tlabels], 
                                                   num_epochs=10, 
                                                   shuffle=True, 
                                                   capacity=NumFiles)
    #Reading the image files and decoding them
    rawIm= tf.read_file(filename_queue[0])
    decodedIm = tf.image.decode_image(rawIm)   # png or jpg decoder
    
    #Extracting the labels queue
    label_queue = filename_queue[1]
    
    #Initializing Global and Local Variables so we avoid warnings and errors
    init_op = tf.group(tf.local_variables_initializer() ,tf.global_variables_initializer())
    
    #Creating an InteractiveSession so we can run in iPython
    sess = tf.InteractiveSession()
    with sess.as_default():
        sess.run(init_op)
        # Start populating the filename queue.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for i in range(NumFiles): #length of your filenames list
             nm, image, lb = sess.run([filename_queue[0], decodedIm, label_queue])
        
             print (image.shape)
             print (nm)
             print (lb)
        
             #Showing the current image
        jpgfile = Image.open(nm)
        jpgfile.show()

        coord.request_stop()
        coord.join(threads)
        
    train_data, train_labels, eval_data, eval_labels = 
             tf.train_split([filename_queue[0], filename_queue[1]], frac=.1)
#    train_data, eval_data, train_labels, eval_labels = 
          train_test_split([filename_queue[0], filename_queue[1]], frac=0.2)
#    train_data, train_labels, eval_data, eval_labels = 
          tf.split(tf.random_shuffle(filename_queue[0], filename_queue[1], 
                                     frac=0.25))

    return train_data, train_labels, eval_data, eval_labels
    print (train_data.shape)

###########################################

  # Create the Estimator
    Xray_classifier = learn.Estimator(model_fn=cnn_model_fn, model_dir="/tmp/Xray_convnet_model")
  
###########################################
  # Set up logging for predictions
  # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=50)

  # Train the model
    Xray_classifier.fit(
       x=train_data,
       y=train_labels,
       batch_size=10,
       steps=20000,
       monitors=[logging_hook])

  # Configure the accuracy metric for evaluation
    metrics = {
      "accuracy":
          learn.MetricSpec(
              metric_fn=tf.metrics.accuracy, prediction_key="classes"),
  }

  # Evaluate the model and print results
    eval_results = Xray_classifier.evaluate(
       x=eval_data, y=eval_labels, metrics=metrics)
    print(eval_results)

# Our application logic will be added here
if __name__ == "__main__":
  tf.app.run()

我使用了3种不同的代码来划分我的数据集。我用过 - ＆gt; train_data，train_labels，eval_data，eval_labels = tf.train_split（image，lb，frac = .1）它给出了这个错误 - ＆gt; AttributeError ：模块'tensorflow'没有属性'train_split'

我用的时候 - ＆gt; train_data，eval_data，train_labels，eval_labels = train_test_split（[filename_queue [0]，filename_queue [1]]，frac = 0.2）它给出了错误 - ＆gt; TypeError ：传递的参数无效：{'frac'：0.2}

当我使用时 - ＆gt; train_data，train_labels，eval_data，eval_labels = tf.split（tf.random_shuffle（filename_queue [0]，filename_queue [1]，frac = 0.25））它给出了这个错误 - ＆gt; TypeError ：random_shuffle（）得到了一个意外的关键字参数'frac'

有人知道我应该为分裂写些什么吗？任何帮助，将不胜感激。谢谢

Answer 1

您可以使用http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html Scikit Learn的train_test_split功能。

如何在Tensorflow CNN中将自己的数据集拆分为训练和验证

1 个答案: