Question

我正在尝试微调BERT以进行多标签分类。我有自己的数据处理器，并且使用了预训练的BERT。我为我的任务在预训练的BERT的末尾添加了一个微调层。我有一个create model函数，在现有BERT的末尾添加了一个微调层。


def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        # print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list))
        # mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility
        probabilities = tf.nn.sigmoid(logits)
        # log_probs=tf.log(probabilities)
        labels = tf.cast(labels, tf.float32)
        #  below is for single label classification
        #  one-hot for single label classification
        #  probabilities = tf.nn.softmax(logits, axis=-1)
        # log_probs = tf.nn.log_softmax(logits, axis=-1)
        #  one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        tf.logging.debug("num_labels = %s; logits = %s; labels = %s" % (num_labels, logits, labels))
        # print("log_probs:",log_probs)
        # per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 利用交叉熵就和
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)

        loss = tf.reduce_mean(per_example_loss)
        return (loss, per_example_loss, logits, probabilities)

我在model_fn_builder中使用它，其中返回的logit用于估计器中


def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
                     num_train_steps, num_warmup_steps, use_tpu,
                     use_one_hot_embeddings):
    """Returns `model_fn` closure for TPUEstimator."""

    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, probabilities) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            logging_hook = tf.train.LoggingTensorHook({"loss": total_loss,'precision:': t_precision,'recall:': t_recall}, every_n_iter=10)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook],
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits, is_real_example):
                # print("###metric_fn.logits:",logits.shape) # (?,80)
                # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # print("###metric_fn.label_ids:",label_ids.shape,";predictions:",predictions.shape) # label_ids: (?,80);predictions:(?,)
                print(logits)
                logits_split = tf.split(logits, args.num_classes,
                                        axis=-1)  # a list. length is num_classes
                label_ids_split = tf.split(logits, args.num_classes,
                                           axis=-1)  # a list. length is num_classes
                accuracy = tf.constant(0.0, dtype=tf.float64)

                for j, logits in enumerate(logits_split):  #
                    #  accuracy = tf.metrics.accuracy(label_ids, predictions)

                    label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)
                    current_auc, update_op_auc = tf.metrics.auc(label_id_, logits)
                    # TP = tf.count_nonzero(logits * label_id_)
                    # TN = tf.count_nonzero((logits - 1) * (label_id_ - 1))
                    # FP = tf.count_nonzero(logits * (label_id_ - 1))
                    # FN = tf.count_nonzero((logits - 1) * label_id_)
                    # current_precision,update_op_precision = tf.metrics.Precision(label_id_, logits)
                    # current_recall,update_op_recall = tf.metrics.Recall(label_id_, logits)
                    prec,prec_op=precision(label_id_,logits)
                    rec,rec_op=recall(label_id_,logits)
                    f_1=f1(label_id_,logits)

                eval_loss = tf.metrics.mean(values=per_example_loss)

                return {
                    "eval_precision":(prec,prec_op),
                    "eval_recall" : (rec_op,rec_op),
                    "eval_auc" : (current_auc, update_op_auc),
                    "eval_loss": eval_loss,
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, label_ids, logits, is_real_example])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec

    return model_fn

在我的model_fn中，当估算器处于评估模式时，我使用对数来计算metric_fn中定义的各种指标（在model_fn_builder中定义）

我在回溯中遇到以下错误：


ERROR:tensorflow:Error recorded from evaluation_loop: 2 root error(s) found.
  (0) Invalid argument: assertion failed: [`predictions` contains negative values] [Condition x >= 0 did not holdelement-wise:] [x (Reshape:0) = ] [0 -1 -2...]
         [[node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert (defined at /home/aditya_vartak/virtualenvs/anaconda3/envs/tf1/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
         [[confusion_matrix_2/ones_like/_1429]]
  (1) Invalid argument: assertion failed: [`predictions` contains negative values] [Condition x >= 0 did not holdelement-wise:] [x (Reshape:0) = ] [0 -1 -2...]
         [[node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert (defined at /home/aditya_vartak/virtualenvs/anaconda3/envs/tf1/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.

我知道错误是由于logits中的负值引起的。我的问题是为什么？什么是解决方法？

编辑1 ：如果问题不明确，我想补充一下，我确实对形状为[hidden_dimension，num_classes]的预训练BERT的最后一层的加权和应用了S型激活。存储在probablities中，然后应用sigmoid_cross_entropy_with_logits。（如create_model()中的显示）。根据{{3}}，它为每个输入返回介于0,1之间的值。那么，概率如何获得负值呢？我觉得问题出在metric_fn()中。但不知道到底是什么

[Tensorflow 1.x]使用BERT的multi_label分类，无效参数：断言失败：[`predictions`包含负值]

0 个答案: