Question

我正在使用以下代码来计算平均值，但是由于我需要总和和元素数量才能计算平均值，因此出现错误。

import tensorflow as tf
# tf.enable_eager_execution()

ids = tf.constant([1, 2, 3, 1, 2, 3])

# Define reducer
# Reducer requires 3 functions - init_func, reduce_func, finalize_func. 
# init_func - to define initial value
# reducer_func - operation to perform on values with same key
# finalize_func - value to return in the end.
def init_func(_):
    return [0, 0]

def reduce_func(state, value):
    return (state[0] + value['ids'], state[1] + 1)

def finalize_func(state):
    # I actually want to sample 2 values from list but for simplicity here I return sum
    return (state[0]*1.0/state[1])

reducer = tf.contrib.data.Reducer(init_func, reduce_func, finalize_func)

# Group by reducer
# Group the data by id
def key_f(row):
    return tf.to_int64(row['ids'])

t = tf.contrib.data.group_by_reducer(
    key_func = key_f,
    reducer = reducer)

ds = tf.data.Dataset.from_tensor_slices({'ids':ids})
ds = ds.apply(t)
ds = ds.batch(6)

iterator = ds.make_one_shot_iterator()
data = iterator.get_next()
print(sess.run(data))

以下是错误。如何将2个值传递给finalize_func（）还是有替代方法？

TypeError: finalize_func() takes exactly 1 argument (2 given)

Answer 1

您可以使用ID对数据进行细分，以找到每个细分的平均值：

import numpy as np
import tensorflow as tf
sess =tf.InteractiveSession()

feature = tf.constant([0, 2, 4, 1, 3, 5])
ids = tf.constant([1, 2, 3, 1, 2, 3])
id_unique, idx = tf.unique(ids)

num_segments = tf.size(id_unique)

mean = tf.math.unsorted_segment_mean(
    feature,
    idx,
    num_segments,
    name=None
)
print(sess.run(mean))

tf的示例。 group_by_reducer平均吗？

1 个答案: