我有一个包含9列的数据集,最后一个是目标变量,作为带有标题的csv。我正在尝试编写一个生成器来训练keras
中的模型。代码如下。训练在第一个时期进行,但在完成之前停止/挂起。
from sklearn.datasets import california_housing
import pandas as pd
import numpy as np
data=california_housing.fetch_california_housing()
cols=data.feature_names
cols.append('y')
data=pd.DataFrame(np.column_stack([data.data,data.target.reshape((data.target.shape[0],1))]),columns=cols)
data.to_csv('/media/jma/DATA/calhousing.csv',index=False)
发电机代码:
import csv
import numpy as np
def generate_arrays_from_file(file_name,batchsz):
csvfile = open(file_name)
reader = csv.reader(csvfile)
batchCount = 0
inputs = []
targets = []
while True: #infinite loop
linecounter=0 #which line the reader is reading
for line in reader:
if linecounter >0: #is not the header
inputs.append(line[0:8])
targets.append(line[8])
batchCount += 1 # we added
if batchCount >= batchsz: # we have our mini batch
batchCount = 0 #reset batch counter
X = np.array(inputs,dtype="float32")
y = np.array(targets,dtype="float32")
yield (X, y)
#reset the lists to hold the batches
inputs = []
targets = []
linecounter += 1 #increment the line read
linecounter = 0 #reset
像这样跑:
from keras.models import Sequential
from keras.layers import Dense
batch_size =100
train_gen=generate_arrays_from_file('/media/jma/DATA/calhousing.csv',batchsz=batch_size)
model = Sequential()
model.add(Dense(32, input_shape=(8,)))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='rmsprop',
loss='mse', metrics=['mse'])
model.fit_generator(train_gen,steps_per_epoch=data.shape[0] / batch_size, epochs=5, verbose=1)
大纪元1/5 194/206 [===========================> ..] - ETA:0s - 损失:67100.1775 - mean_squared_error:67100.1775
答案 0 :(得分:1)
我发现了这个问题,并且正在为寻找示例的其他人发布此信息:
import csv
import numpy as np
def generate_arrays_from_file(file_name,batchsz):
batchCount = 0
inputs = []
targets = []
linecounter=0 #which line the reader is reading
while True: #infinite loop
with open(file_name, "r") as csvfile:
for line in csv.reader(csvfile):
if linecounter >0: #is not the header
#could procress data as well
inputs.append(line[0:8])
targets.append(line[8])
batchCount += 1 # we added
if batchCount >= batchsz: # we have our mini batch
batchCount = 0 #reset batch counter
X = np.array(inputs,dtype="float32")
y = np.array(targets,dtype="float32")
yield (X, y)
#reset the lists to hold the batches
inputs = []
targets = []
linecounter += 1 #increment the line read
linecounter = 0
答案 1 :(得分:1)
OP改变了什么:
import csv
import numpy as np
def generate_arrays_from_file(file_name,batchsz):
###################
### Moved this: ###
###################
# csvfile = open(file_name)
# reader = csv.reader(csvfile)
### End ###########
batchCount = 0
inputs = []
targets = []
linecounter=0 #which line the reader is reading
while True: #infinite loop
################
### to here: ###
################
with open(file_name, "r") as csvfile:
for line in csv.reader(csvfile):
### End ###########
if linecounter >0: #is not the header
#could procress data as well
inputs.append(line[0:8])
targets.append(line[8])
batchCount += 1 # we added
if batchCount >= batchsz: # we have our mini batch
batchCount = 0 #reset batch counter
X = np.array(inputs,dtype="float32")
y = np.array(targets,dtype="float32")
yield (X, y)
#reset the lists to hold the batches
inputs = []
targets = []
linecounter += 1 #increment the line read
linecounter = 0