我是r的新手,我正在尝试从给定的表中计算一个索引。我已经计算了下表中coulnmn的最小值,最大值和中值。我在列中有一些NaN。第一列和第二列是纬度和经度,从3开始是土壤湿度。
1 2 3 4 5 6 7 8 9 10 11
28.188 -111.438 NaN NaN NaN NaN NaN NaN NaN NaN NaN
28.188 -111.312 362.74 360.723 361.645 356.894 354.188 354.166 355.16 350.148 339.705
28.188 -111.188 189.991 188.153 188.96 184.49 182.028 181.776 182.322 175.402 161.836
28.188 -111.062 312.743 310.278 310.867 306.436 304.178 304.157 306.149 302.238 293.318
28.188 -110.938 146.97 144.582 145.274 141.367 139.66 140.137 142.949 141.15 135.776
28.188 -110.812 149.095 146.86 147.706 143.991 142.258 143.005 146.508 144.388 138.771
28.188 -110.688 147.06 144.825 145.479 141.687 139.826 140.714 144.882 142.481 136.582
28.188 -110.562 145.857 143.75 144.493 140.791 138.866 139.933 144.882 142.148 135.532
28.188 -110.438 135.207 133.254 134.163 130.436 128.345 129.526 135.307 132.01 123.974
我使用以下代码来计算最小值,中位数和最大值;
data=read.table('NLDAS_MOISTURE_200CM.csv', header=TRUE, sep=',')
df=data.frame(data[3:11])
Minimum=apply(X = df, MARGIN=2, FUN = min, na.rm = TRUE)
Maximum=apply(X = df, MARGIN=2, FUN = max, na.rm = TRUE)
Median=apply(X = df, MARGIN=2, FUN = median, na.rm = TRUE)
现在,我想使用以下条件计算每个单元格列的索引:
1)如果列的单元格值小于列的中值 (单元格值< Median)Index =(cell 值的中值)/(中值 - 最小值)×100
2)如果列的单元格值大于列的中值 (单元格值>中位数)索引=(单元格值 - 中位数)/(最大中值)* 100
如果某个机构可以使用if或for循环完成代码,我将不胜感激。感谢
加文
答案 0 :(得分:1)
你可以尝试
df[-(1:2)] <- lapply(df[,-(1:2)], function(x) {med <- median(x, na.rm=TRUE)
min1 <- min(x, na.rm=TRUE)
max1 <- max(x, na.rm=TRUE)
ifelse(x <= med, 100*(x-med)/(med-min1),
100*(x-med)/(max1-med)) })
df[1:5]
# X1 X2 X3 X4 X5
#1 28.188 -111.438 NA NA NA
#2 28.188 -111.312 100.0000000 100.000000 100.0000000
#3 28.188 -111.188 19.5253013 19.690246 19.7010032
#4 28.188 -111.062 76.7090200 76.524161 76.3880913
#5 28.188 -110.938 -8.6049493 -10.013107 -10.6078282
#6 28.188 -110.812 0.4739999 0.473519 0.5177805
#7 28.188 -110.688 -7.9056758 -8.082774 -8.9585261
#8 28.188 -110.562 -17.2526320 -16.622314 -16.8912667
#9 28.188 -110.438 -100.0000000 -100.000000 -100.0000000
或基于已计算的Minimum
,Maximum
,Median
值
df[-(1:2)] <- Map(function(v, w, x,y) ifelse(v<=w, 100*(v-w)/(w-x),
100*(v-w)/(y-w)) ,df[-(1:2)], Median, Minimum, Maximum)
要计算min
,max
,median
,其他选项是使用dplyr
library(dplyr)
df %>%
mutate_each(funs(min=min(., na.rm=TRUE),
max=max(., na.rm=TRUE), median=median(., na.rm=TRUE)), 3:11)
df <- structure(list(X1 = c(28.188, 28.188, 28.188, 28.188, 28.188,
28.188, 28.188, 28.188, 28.188), X2 = c(-111.438, -111.312, -111.188,
-111.062, -110.938, -110.812, -110.688, -110.562, -110.438),
X3 = c(NaN, 362.74, 189.991, 312.743, 146.97, 149.095, 147.06,
145.857, 135.207), X4 = c(NaN, 360.723, 188.153, 310.278,
144.582, 146.86, 144.825, 143.75, 133.254), X5 = c(NaN, 361.645,
188.96, 310.867, 145.274, 147.706, 145.479, 144.493, 134.163
), X6 = c(NaN, 356.894, 184.49, 306.436, 141.367, 143.991,
141.687, 140.791, 130.436), X7 = c(NaN, 354.188, 182.028,
304.178, 139.66, 142.258, 139.826, 138.866, 128.345), X8 = c(NaN,
354.166, 181.776, 304.157, 140.137, 143.005, 140.714, 139.933,
129.526), X9 = c(NaN, 355.16, 182.322, 306.149, 142.949,
146.508, 144.882, 144.882, 135.307), X10 = c(NaN, 350.148,
175.402, 302.238, 141.15, 144.388, 142.481, 142.148, 132.01
), X11 = c(NaN, 339.705, 161.836, 293.318, 135.776, 138.771,
136.582, 135.532, 123.974)), .Names = c("X1", "X2", "X3",
"X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11"), class = "data.frame",
row.names = c(NA, -9L))
答案 1 :(得分:1)
以下是您可以立即执行整个操作的方法。这假设您不需要保存中值,最小值和最大值以供日后使用。如果你这样做,请发表评论。
此函数循环向量median
的三个函数min
,max
和x
,然后将变量发送到函数环境,使它们可供使用按ifelse
中的名称及后续计算。
index <- function(x, ...) {
funs <- c(med = median, min = min, max = max)
list2env(lapply(funs, function(f) f(x, ...)), environment())
ifel <- ifelse(x < med, med - min, max - med)
(x - med) / ifel * 100
}
然后我们可以使用sapply
或lapply
或其他来获取索引值。在此之前,我使用cbind
来避免新的分配。
indVals <- cbind(df[1:2], sapply(df[-c(1:2)], index, na.rm=TRUE))
indVals[,1:5]
# X1 X2 X3 X4 X5
# 1 28.188 -111.438 NaN NaN NaN
# 2 28.188 -111.312 100.0000000 100.000000 100.0000000
# 3 28.188 -111.188 19.5253013 19.690246 19.7010032
# 4 28.188 -111.062 76.7090200 76.524161 76.3880913
# 5 28.188 -110.938 -8.6049493 -10.013107 -10.6078282
# 6 28.188 -110.812 0.4739999 0.473519 0.5177805
# 7 28.188 -110.688 -7.9056758 -8.082774 -8.9585261
# 8 28.188 -110.562 -17.2526320 -16.622314 -16.8912667
# 9 28.188 -110.438 -100.0000000 -100.000000 -100.0000000