说我有一个data.table
,其中C列保存N个可能值中的离散值:
set.seed(123)
datapoints = data.table(replicate(3, sample(0:5, 4, rep=TRUE)))
print(datapoints)
V1 V2 V3
1: 1 5 3
2: 4 0 2
3: 2 3 5
4: 5 5 2
(这里C = 3,N = 5)
我想添加N列,如果C列之一包含第N个值,则每个列都包含TRUE
,否则,请添加FALSE
V1 V2 V3 has0 has1 has2 has3 has4 has5
1: 1 5 3 FALSE TRUE FALSE TRUE FALSE TRUE
2: 4 0 2 TRUE FALSE TRUE FALSE TRUE FALSE
3: 2 3 5 FALSE FALSE TRUE TRUE FALSE TRUE
4: 5 5 2 FALSE FALSE TRUE FALSE FALSE TRUE
我已经尝试过了:
for (value in 0:5) {
datapoints <- datapoints[, (paste("has", value, sep="")) := (value %in% .SD), .SDcols = c("V1", "V2", "V3")]
}
列已添加,但用FALSE
填充:
V1 V2 V3 has0 has1 has2 has3 has4 has5
1: 1 5 3 FALSE FALSE FALSE FALSE FALSE FALSE
2: 4 0 2 FALSE FALSE FALSE FALSE FALSE FALSE
3: 2 3 5 FALSE FALSE FALSE FALSE FALSE FALSE
4: 5 5 2 FALSE FALSE FALSE FALSE FALSE FALSE
在我看来,如果我将.SD
替换为对当前行(而不是整个表)的引用,该代码将起作用,但是我不知道该怎么做。
添加这些列的有效方法是什么?
答案 0 :(得分:1)
这是一种方法
def build_model2(dataIn, timeWindow, layerOut, outputs, activation,
optimizer):
layerOut = 84
layerOutExpand = 100
dataIn = 42
timeWindow = 20
outputs = 6
# Takes feature data matrix row size as input = 42
# Returns vector of size 20 (timeWindow) as output
num_classes = 10
model = Sequential()
model.add(Convolution2D(32, 3, 3, input_shape=(28, 28, 1)))
model.add(Activation('relu'))
model.add(Conv2D(64,3,3))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# Compile the model and time how long it takes
start = time.time()
#optimizer = SGD(lr=0.3, momentum=0.9)
model.compile(loss="mse", optimizer=optimizer, metrics=[coeffDetermination, 'accuracy'])
print("Compilation Time : ", time.time() - start)
# Let's see the model details too, and create an image file of the
# network structure
model.summary()
return model
要使其更加灵活,您还可以将library(data.table)
# sample data
set.seed(123)
datapoints = data.table(replicate(3, sample(0:5, 4, rep=TRUE)))
# find if value exists
for(value in 0:5) {
datapoints[, paste("has", value, sep="") := apply(.SD, 1, function(x) any(x %in% value)), .SDcols = c("V1", "V2", "V3")]
}
datapoints
#> V1 V2 V3 has0 has1 has2 has3 has4 has5
#> 1: 1 5 3 FALSE TRUE FALSE TRUE FALSE TRUE
#> 2: 4 0 2 TRUE FALSE TRUE FALSE TRUE FALSE
#> 3: 2 3 5 FALSE FALSE TRUE TRUE FALSE TRUE
#> 4: 5 5 2 FALSE FALSE TRUE FALSE FALSE TRUE
替换为any(x %in% value)
,以获取该值每行出现多少次。对于同一示例
sum(x %in% value)
当然,如果您只想要列的子集,仍然可以使用# find how many times a value exists
for(value in 0:5) {
datapoints[, paste("has", value, sep="") := apply(.SD, 1, function(x) sum(x %in% value)), .SDcols = c("V1", "V2", "V3")]
}
datapoints
#> V1 V2 V3 has0 has1 has2 has3 has4 has5
#> 1: 1 5 3 0 1 0 1 0 1
#> 2: 4 0 2 1 0 1 0 1 0
#> 3: 2 3 5 0 0 1 1 0 1
#> 4: 5 5 2 0 0 1 0 0 2
。