program for applying linear regression on dataset using tensorflow
# About housing_data:
# dataset characterisitics:multivariate
#Associate task: regression
#Number of instance: 506
#number of attribut: 14
#Attribute Information:
#1. CRIM: per capita crime rate by town
#2. ZN: proportion of residential land zoned for lots over 25,000 sq.ft.
#3. INDUS: proportion of non-retail business acres per town
#4. CHAS: Charles River dummy variable
#5. NOX: nitric oxides concentration (parts per 10 million)
#6. RM: average number of rooms per dwelling
#7. AGE: proportion of owner-occupied units built prior to 1940
#8. DIS: weighted distances to five Boston employment centres
#9. RAD: index of accessibility to radial highways
#10. TAX: full-value property-tax rate per $10,000
#11. PTRATIO: pupil-teacher ratio by town
#12. B: 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
#13. LSTAT: % lower status of the population
#14. MEDV: Median value of owner-occupied homes in $1000's
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
batch_size=50
#make queue of file
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once("housing_data.csv"),
shuffle=True)
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)
record_defaults = [[0.0], [0.0], [0.0], [0.0], [0.0],[0.0],[0.0],[0.0],[0.0],
[0.0], [0.0], [0.0], [0.0], [0.0]]
CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV = tf.decode_csv(
csv_row, record_defaults=record_defaults)
# Turn the features back into a tensor.
features = tf.pack([CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV])
#variable which we need to fill when we are ready to comput the graph
x=tf.placeholder(tf.float32)
y=tf.placeholder(tf.float32)
W = tf.Variable(tf.random_normal([1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
y_pred = tf.add(tf.mul(x, W), b)
error=tf.reduce_sum((y-y_pred)**2/506)
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
#create the session to use the graph
with tf.Session() as sess:
# Here we tell tensorflow that we want to initialize all
# the variables in the graph so we can use them
#tf.initialize_all_variables().run()
sess.run(tf.initialize_all_variables())
sess.run([features,MEDV])
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
sess.run([features,MEDV])
#gradient descent loop for 500 iteration
for _ in range(500):
#select random mini batch
indices=np.random.choice(506,batch_size)
x_batch,y_batch=features[indices],MEDV[indices]
#do gradient descent
_,loss_val=sess.run([optimizer,error],feed_dict={x:x_batch,y:y_batch})
print _,loss_val
coord.request_stop()
coord.join(threads)
---------------------------错误------------------- ---------------------
When I am running this code I am getting following error.
error:
Traceback (most recent call last):
File "linear_reg_tf.py", line 92, in <module>
x_batch,y_batch=features[indices],MEDV[indices]
File "/usr/local/lib/python2.7/dist-packages/tensorflow/pytho/ops/array_ops.py", line 161, in _SliceHelper
raise TypeError("Bad slice index %s of type %s" % (s, type(s)))
TypeError: Bad slice index [468 105 218 124 492 428 464 194 110 76 165 127 480 414 88 332 54 68
195 162 223 65 340 279 390 390 1 127 290 82 430 154 202 389 478 42 410 25 161 42 143 256 481 417 263 92 335 101 430 334]类型
答案 0 :(得分:0)
也许代替x_batch,y_batch=features[indices],MEDV[indices]
你可以尝试
x_batch,y_batch = [], []
for index in indices:
x_batch.append(features[index])
y_batch.append(MEDV[index])
答案 1 :(得分:0)
此代码正在运行,但我想知道我们是否要测试一些新数据,然后我们将如何测试或如何将数据拆分为训练集和测试集
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
batch_size=50
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once("housing_data.csv"),
shuffle=True)
# for each column below.
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)
# Type information and column names based on the decoded CSV.
record_defaults = [[0.0], [0.0], [0.0], [0.0], [0.0],[0.0],[0.0],[0.0],[0.0],
[0.0], [0.0], [0.0], [0.0], [0.0]]
CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV = tf.decode_csv(
csv_row, record_defaults=record_defaults)
# Turn the features back into a tensor.
features = tf.pack([CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV])
#variable which we need to fill when we are ready to comput the graph
x=tf.placeholder(dtype=features.dtype)
y=tf.placeholder(dtype=MEDV.dtype)
W = tf.Variable(tf.random_normal([1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
y_pred = tf.add(tf.mul(x, W), b)
#loss function will measure the distance between our observations and predictions
#and average over them.Here housing data have 506 instances so divide by 506.
error=tf.reduce_sum((y-y_pred)**2/506)
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
#create会话以使用图表 使用tf.Session()作为sess:
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x_data =np.array(sess.run(features))
y_data =np.array(sess.run(MEDV))
#gradient descent loop for 10 iteration
for _ in range(10):
sess.run([features,MEDV])
#do gradient descent
_,loss_val=sess.run([optimizer,error],feed_dict={x:x_data,y:y_data})
print _,loss_val
coord.request_stop()
coord.join(threads)