我尝试根据新列中的文本在pandas数据框中添加新列,例如这是我的数据:
>>> data
No Description
1 Extention Slack 1 Month
2 Extention Slack 1 Year
3 Slack 6 Month
4 Slack 1 Year
我需要的是
No Description M M+1 M+2 M+3 M+4 M+5 M+6 ... M+11
1 Extention Slack 1 Month 1 0 0 0 0 0 0 0
2 Extention Slack 1 Year 1 1 1 1 1 1 1 1
3 Slack 6 Month 1 1 1 1 1 1 0 0
4 Slack 3 Month 1 1 1 0 0 0 0 0
我做的是
import numpy as np
data['M'] = np.where(data['Description'].str.contains('1 Year'), 1, 0)
我猜怎么做?
答案 0 :(得分:1)
在“说明”列中,您希望根据def train(X_train, y_train, X_val, y_val, batch_size, iterations=1000, learning_rate=0.5):
x = tf.placeholder(tf.float32, [None, 784])
w1 = tf.Variable(tf.zeros([784, 100]))
b1 = tf.Variable(tf.zeros([100]))
#a1 = tf.placeholder(tf.float32, [None, 100])
w2 = tf.Variable(tf.zeros([100, 11]))
b2 = tf.Variable(tf.zeros([11]))
y_ = tf.placeholder(tf.float32, [None, 11])
z1 = tf.matmul(x, w1) + b1
a1 = tf.nn.relu(z1)
y = tf.matmul(a1, w2) + b2
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
#train
parameters = {}
i = 0
j = batch_size
for k in range(iterations):
batch_x = X_train[i:j,:]
batch_y = y_train[i:j,:]
print batch_x.shape
print batch_y.shape
i += batch_size
j += batch_size
if(i > X_train.shape[0] or j > y_train.shape[0]):
i += batch_size % X_train.shape[0]
j += batch_size % y_train.shape[0]
sess.run(train_step, feed_dict={x: batch_x, y_: batch_y})
if( k % 100 == 0):
print(sess.run(accuracy, feed_dict={x: X_val, y_: y_val}))
parameters = {"w1": w1,
"b1": b1,
"w2": w2,
"b2": b2
}
return parameters
或{time} {time_label}
部分推断,在12个月内填充一个或零。
这是一种做你想做的事情的方法:
1 Year
完全可重复的例子:
1 Month
输出:
# create two temporary columns
# time: holds the numeric value associated with time_label (month or year)
df['time'], df['time_label'] = df.Description.str.split().apply(lambda x: pd.Series(x[-2:])).values.T
# define the numeric equivalent of Month and Year
mapping = {"Month":1, "Year":12}
for month in range(12):
# if is only here to pretty print M, M+1, M+2, ...
# you can remove it if you accept M+0, M+1, ...
if month == 0:
df["M"] = np.where(df.time.astype(int)*df.time_label.map(mapping) >= month+1, 1, 0)
else:
df["M"+"+"+str(month)] = np.where(df.time.astype(int)*df.time_label.map(mapping) >= month+1, 1, 0)