我正在尝试使用 tf.TextLineReader()读取CSV文件(以高效的方式,如Google推荐)。
正如您所看到的,我可以创建批次,我将在稍后用它来训练我的基本回归模型。 问题是 tf.train.shuffle_batch 会返回一个张量。这意味着我不能在我的模型中使用占位符X Y_pred = X * W + b。 在训练阶段,我现在使用 batch_variable 作为占位符,但我需要一个占位符才能使用不同的数据进行测试。
我做错了什么?
编辑:我修改了尼古拉斯建议的代码(非常感谢!)但现在我的准确度达到了0.0 ......这有点尴尬。 我正在使用鲍鱼数据集(8个特征和3个类),其中我用一热矢量(1,0,0)改变了(M)ale(F)emale(I)nfant分类结果,(0, 1,0)...... def getPartitionedDatasets(filenames):
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.TextLineReader()
_, value = reader.read(filename_queue) # return a key and value (key is for debugging
record_defaults = [[1.0] for _ in range(N_FEATURES+1)]
cont = tf.decode_csv(value, record_defaults=record_defaults)
features = tf.stack([cont[1],cont[2],cont[3],cont[4],cont[5],cont[6],cont[7],cont[8]])
label = tf.to_int32(cont[0])
min_after_dequeue = 10 * BATCH_SIZE
capacity = 20 * BATCH_SIZE
data_batch, label_batch_raw = tf.train.shuffle_batch([features, label], batch_size=BATCH_SIZE,
capacity=capacity, min_after_dequeue=min_after_dequeue)
label_batch_hot = tf.one_hot(label_batch_raw,on_value=1,off_value=0, depth=3)
return data_batch, label_batch_hot
def get_model_params():
"""Build the model parameters."""
W = tf.Variable(tf.zeros([N_FEATURES,CLASSES]), name='weights')
b = tf.Variable(tf.zeros([CLASSES]), name = "bias")
return W, b
def build_model(data_batch, label_batch_hot, w, b):
"""Use `W` and `b` to build a model based on some data and labels."""
Y_predicted = tf.matmul(data_batch , w) + b
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=label_batch_hot,
logits=Y_predicted))
return Y_predicted, cross_entropy
def get_optimizer_op(cross_entropy_op):
"""Return an optimiser associated to a cross entropy op."""
return tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy_op)
def run():
train_data, train_label = getPartitionedDatasets(["ABA_Train.csv"])
test_data, test_label = getPartitionedDatasets(["ABA_Test.csv"])
W,b = get_model_params();
train_predicted, train_cross_entropy = build_model(train_data, train_label, W, b)
optimizer = get_optimizer_op(train_cross_entropy)
test_predicted, test_cross_entropy = build_model(test_data, test_label, W, b)
correct_prediction = tf.equal(tf.argmax(test_predicted, 1), tf.argmax(test_label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
sess.run(tf.global_variables_initializer())
for i in range(300): # generate 10 batches
sess.run([optimizer])
print(sess.run(accuracy))
def main():
run();
if __name__ == '__main__':
main()
答案 0 :(得分:0)
你可以做的是实现各种方法
例如,你可以有类似的东西(假设您的火车和测试数据在两组不同的文件中):
fun main(args: Array<String>) {
// doNoSubjectExample()
doSubjectExample()
}
private fun doSubjectExample() {
val disposable = TwitterSubject().observe().subscribe(
{ status -> println("Status: {$status)}") },
{ error -> println("Error callback: $error") })
TimeUnit.SECONDS.sleep(10)
disposable.dispose()
}
private class TwitterSubject {
val subject = PublishSubject.create<Status>()
init {
val twitterStream = TwitterStreamFactory().instance
// See: https://stackoverflow.com/questions/37672023/how-to-create-an-instance-of-anonymous-interface-in-kotlin/37672334
val listner = object : StatusListener {
override fun onStatus(status: Status?) {
subject.onNext(status)
}
override fun onException(ex: Exception?) {
subject.onError(ex)
}
override fun onTrackLimitationNotice(numberOfLimitedStatuses: Int) {
// Not implemented.
}
override fun onStallWarning(warning: StallWarning?) {
// Not implemented.
}
override fun onDeletionNotice(statusDeletionNotice: StatusDeletionNotice?) {
// Not implemented.
}
override fun onScrubGeo(userId: Long, upToStatusId: Long) {
// Not implemented.
}
}
Twitter4JHelper.addStatusListner(twitterStream, listner)
twitterStream.sample()
}
fun observe(): Observable<Status> = subject
}