在检查点中找不到密钥unit_3_0 / sub_unit0 / batch_normalization / beta

时间:2019-04-03 15:49:34

标签: python tensorflow

我在路径\DLTK-master\examples\applications\IXI_HH_age_regression_resnet中使用dltk软件包https://github.com/DLTK/DLTK。在代码中进行了一些修改之后,我返回到原始代码:

def model_fn(features, labels, mode, params):
    # 1. create a model and its outputs
    net_output_ops = resnet_3d(
        inputs=features['x'],
        num_res_units=2,
        num_classes=NUM_CLASSES,
        filters=(16, 32, 64, 128, 256),
        strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2), (2, 2, 2)),
        mode=mode,
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-4))

    # 1.1 Generate predictions only (for `ModeKeys.PREDICT`)
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=net_output_ops,
            export_outputs={'out': tf.estimator.export.PredictOutput(net_output_ops)})

    # 2. set up a loss function
    loss = tf.losses.mean_squared_error(
        labels=labels['y'],
        predictions=net_output_ops['logits'])
    # 3. define a training op and ops for updating moving averages (i.e.
    # for batch normalisation)
    global_step = tf.train.get_global_step()
    optimiser = tf.train.AdamOptimizer(
        learning_rate=params["learning_rate"],
        epsilon=1e-5)


    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimiser.minimize(loss, global_step=global_step)

    # 4.2 (optional) track the rmse (scaled back by 100, see reader.py)
    rmse = tf.metrics.root_mean_squared_error
    mae = tf.metrics.mean_absolute_error
    eval_metric_ops = {"rmse": rmse(labels['y'], net_output_ops['logits']),
                       "mae": mae(labels['y'], net_output_ops['logits'])}

    # 5. Return EstimatorSpec object
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=net_output_ops,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
def train(args):

    np.random.seed(42)
    tf.set_random_seed(42)

    print('Setting up...')

    # Parse csv files for file names
    all_filenames = pd.read_csv(
        args.data_csv,
        dtype=object,
        keep_default_na=False,
        na_values=[]).as_matrix()

    train_filenames = all_filenames[:400]
    val_filenames = all_filenames[400:543]
    #val_filenames = []

    # Set up a data reader to handle the file i/o.
    reader_params = {'n_examples': 2,
                     'example_size': [64, 64, 64],
                     'extract_examples': False}

    reader_example_shapes = {'features': {'x': reader_params['example_size'] + [NUM_CHANNELS, ]},
                             'labels': {'y': [1]}}

    reader = Reader(read_fn, {'features': {'x': tf.float32},
                              'labels': {'y': tf.float32}})

    # Get input functions and queue initialisation hooks for training and
    # validation data
    train_input_fn, train_qinit_hook = reader.get_inputs(
        file_references=train_filenames,
        mode=tf.estimator.ModeKeys.TRAIN,
        example_shapes=reader_example_shapes,
        batch_size=BATCH_SIZE,
        shuffle_cache_size=SHUFFLE_CACHE_SIZE,
        params=reader_params)

    val_input_fn, val_qinit_hook = reader.get_inputs(
        file_references=val_filenames,
        mode=tf.estimator.ModeKeys.EVAL,
        example_shapes=reader_example_shapes,
        batch_size=BATCH_SIZE,
        shuffle_cache_size=SHUFFLE_CACHE_SIZE,
        params=reader_params)

    # Instantiate the neural network estimator
    nn = tf.estimator.Estimator(
        model_fn=model_fn,
        model_dir=args.model_path,
        params={"learning_rate": 0.001},
        config=tf.estimator.RunConfig())

    # Hooks for validation summaries
    val_summary_hook = tf.contrib.training.SummaryAtEndHook(
        os.path.join(args.model_path, 'eval'))
    step_cnt_hook = tf.train.StepCounterHook(
        every_n_steps=EVAL_EVERY_N_STEPS, output_dir=args.model_path)

    print('Starting training...')
    try:
        for _ in range(MAX_STEPS // EVAL_EVERY_N_STEPS):
            print('heloooo')
            print(_)
            nn.train(input_fn=train_input_fn,
                     hooks=[train_qinit_hook, step_cnt_hook],
                     steps=EVAL_EVERY_N_STEPS)

            if args.run_validation:
                print('helooo again')
                results_val = nn.evaluate(input_fn=val_input_fn,
                                          hooks=[val_qinit_hook, val_summary_hook],
                                          steps=EVAL_STEPS)
                                          #steps=EVAL_STEPS)
                print('Step = {}; val loss = {:.5f};'.format(
                    results_val['global_step'],
                    results_val['loss']))

    except KeyboardInterrupt:
        pass

    print('Stopping now.')

    # When exporting we set the expected input shape to be arbitrary.
    export_dir = nn.export_savedmodel(
        export_dir_base=args.model_path,
        serving_input_receiver_fn=reader.serving_input_receiver_fn(
            {'features': {'x': [None, None, None, NUM_CHANNELS]},
             'labels': {'y': [1]}}))
    print('Model saved to {}.'.format(export_dir))

但是我遇到了一些错误:

  

NotFoundError(请参阅上面的回溯):从检查点还原   失败了这很可能是由于变量名或其他图形键   检查点缺少的内容。请确保您没有   根据检查点更改了期望的图形。原始错误:

     

在以下位置找不到密钥unit_3_0 / sub_unit0 / batch_normalization / beta   检查点[[节点保存/还原V2(在E:/ UT Projects / khanom中定义   Sajedi博士/DLTK-master/examples/applications/IXI_HH_age_regression_resnet/train.py:192)   = RestoreV2 [dtypes = [DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT,...,DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT,DT_FLOAT],   _device =“ / job:localhost /副本:0 / task:0 / device:CPU:0”](_ arg_save / Const_0_0,   保存/恢复V2 / tensor_names,保存/恢复V2 / shape_and_slices)]]

有人可以帮助我吗?

0 个答案:

没有答案