我正在尝试从csv文件读取大数据。代码如下:
Worksheet_Change
但是,数据仍然存储在内存中并由cpu计算。(我已经测试过class VoiceDataGenerator():
def __init__(self):
pass
@staticmethod
def dataset_iterator(dataset_train, dataset_val):
vgg_iter = tf.data.Iterator.from_structure(dataset_train.output_types, dataset_train.output_shapes)
x, y = vgg_iter.get_next()
# initializer for train_data
train_init = vgg_iter.make_initializer(dataset_train)
test_init = vgg_iter.make_initializer(dataset_val)
# return dataset_train, dataset_val, nb_exp_train, np_exp_val
return train_init, test_init, x, y
@staticmethod
def parse_data(x, n_classes):
return tf.convert_to_tensor(x[:-1]), tf.one_hot(indices=tf.cast(x[-1], dtype=tf.int32), depth=n_classes)
@staticmethod
def load_dataset(batch_size, cpu_cores, dataset_path):
dataset_train = tf.data.experimental.CsvDataset(dataset_path + 'train.csv', [tf.float32] * 430, header=False,
field_delim=' ')
dataset_val = tf.data.experimental.CsvDataset(dataset_path + 'test.csv', [tf.float32] * 430, header=False,
field_delim=' ')
n_sample_train = 1019915
total_batches_train = n_sample_train // batch_size + 1
n_sample_val = 57909
n_classes = 1928
dataset_train = dataset_train.shuffle(buffer_size=100000)
dataset_train = dataset_train.map(map_func=lambda *x: VoiceDataGenerator.parse_data(x, n_classes),
num_parallel_calls=cpu_cores)
dataset_train = dataset_train.batch(batch_size)
dataset_train = dataset_train.prefetch(buffer_size=1)
dataset_val = dataset_val.map(map_func=lambda *x: VoiceDataGenerator.parse_data(x, n_classes),
num_parallel_calls=cpu_cores)
dataset_val = dataset_val.batch(batch_size)
dataset_val = dataset_val.prefetch(buffer_size=1)
return dataset_train, dataset_val, total_batches_train, n_sample_train, n_sample_val
中的gpu可以正常工作),所以我怀疑读取csv文件的代码是否有错误。这是运行我的代码时的日志:
tensorflow-gpu
我想知道我的代码中是否有错误以及如何处理?
我的环境是: