我有这个NDVI时间序列数据集,其中第一列是日期,接下来的三列是三种不同ID(59231、158157、282302)的NDVI数据
Date X59231 X158157 X282302
1 13149 NA 0.398 NA
2 13157 0.344 0.267 0.327
3 13165 NA 0.431 NA
. ..... ..... ..... .....
这是赔率:
structure(list(Date = c(13149L, 13157L, 13165L, 13173L, 13181L,
13189L, 13197L, 13205L, 13213L, 13221L, 13229L, 13237L, 13245L,
13253L, 13261L, 13269L, 13277L, 13285L, 13293L, 13301L, 13309L,
13317L, 13325L, 13333L, 13341L, 13349L, 13357L, 13365L, 13373L,
13381L, 13389L, 13397L, 13405L, 13413L, 13421L, 13429L, 13437L,
13445L, 13453L, 13461L, 13469L, 13477L, 13485L, 13493L, 13501L,
13509L), X59231 = c(NA, 0.344, NA, 0.398, NA, 0.587, NA, NA,
0.451, 0.597, 0.593, 0.556, 0.559, 0.375, 0.374, 0.386, 0.425,
0.383, 0.349, 0.315, 0.282, 0.323, 0.315, 0.359, 0.292, 0.271,
0.297, 0.307, 0.322, 0.344, 0.297, 0.285, 0.273, 0.282, 0.281,
0.304, 0.314, NA, 0.391, 0.601, 0.65, NA, 0.653, 0.666, 0.519,
0.625), X158157 = c(0.398, 0.267, 0.431, NA, 0.36, 0.434, 0.434,
0.465, 0.447, 0.521, 0.539, 0.563, 0.595, 0.541, 0.553, 0.381,
0.533, 0.505, 0.551, NA, 0.546, 0.535, 0.523, 0.501, 0.508, 0.51,
0.506, 0.51, 0.514, 0.526, 0.555, 0.545, 0.53, 0.539, 0.531,
0.53, NA, 0.585, 0.597, 0.32, 0.569, 0.601, NA, NA, 0.52, 0.532
), X282302 = c(NA, 0.327, NA, 0.282, 0.26, 0.293, 0.25, 0.288,
0.336, 0.299, 0.29, 0.28, NA, 0.305, 0.319, NA, 0.255, 0.292,
0.294, NA, NA, 0.367, 0.331, 0.344, 0.283, 0.284, 0.291, 0.273,
0.239, 0.285, 0.249, 0.285, 0.247, 0.288, 0.276, NA, 0.317, 0.375,
0.38, 0.417, 0.374, 0.491, NA, NA, NA, 0.471)), class = "data.frame", row.names = c(NA,
-46L))
我运行以下代码来平滑时间序列(消除噪声),并找到每个ID的NDVI时间序列的多个最大值和最小值。
rm(list=ls())
#Read in csv data
df=read.csv("Data.csv", header = TRUE)
date_col = df[,1]
num_cols = length(df[1,]) #count number of columns there are
num_Dcols = num_cols-1 #count the number of columns there are minus the index (first) column
#Function to append columns to a dataframe
cbind.fill <- function(...){
nm <- list(...)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}
#Create an empty data frame
finalDF = data.frame(matrix(ncol=(0),nrow=0)) #create empty dataframe
#Create an empty vector for column names
CNames = c()
for (i in c(1:num_Dcols)){
df_sub = df[,c(1,i+1)] #create a data frame of the date column and the i+1 column
df_removeNA = na.omit(df_sub)
#Append the date column to the final data frame
df_date = df_removeNA[,1]
finalDF = cbind.fill(finalDF, df_date)
#Append the NDVI timeseries column to the final data frame
df_data = df_removeNA[,2]
finalDF = cbind.fill(finalDF, df_data)
stl_1=stl(ts(df_data, frequency=4), "periodic")
#Function to calculate all the maximums
ts_max<-function(signal)
{
points_max=which(diff(sign(diff(signal)))==-2)+1
return(points_max)
}
#Function to calculate all the minimums
ts_min<-function(signal)
{
points_min=which(diff(sign(diff(-signal)))==-2)+1
return(points_min)
}
#Smooth the timeseries
trend_1=as.numeric(stl_1$time.series[,2])
#Find max and mins of the smoothed timeseries
max_1=ts_max(trend_1)
min_1=ts_min(trend_1)
#Append max and mins to the final data frame
finalDF = cbind.fill(finalDF, df_data[max_1])
finalDF = cbind.fill(finalDF, df_data[min_1])
#Append column names to the column names vector
CNames = c(CNames, toString(colnames(df_sub[1])))
CNames = c(CNames, toString(colnames(df_sub[2])))
CNames = c(CNames, paste(c(toString(colnames(df_sub[2])), "_Max"), collapse=''))
CNames = c(CNames, paste(c(toString(colnames(df_sub[2])), "_Min"), collapse=''))
#Plot final results
plot(df_date, trend_1, type = 'l')
abline(v=df_date[max_1], col="red")
abline(v=df_date[min_1], col="blue")
}
#Rename final data frame's column names
colnames(finalDF) = CNames
#Export final data frame to CSV
write.csv(finalDF, file = "finalDF_smooth.csv")
这是NDVI时间序列数据的第一列的所有最大值和最小值的图像。 我要弄清楚的是如何在每个ID列旁边的原始(或新)数据框中添加两个新列,以便在其中存储最大值和最小值。最大值和最小值需要放在与其相应日期匹配的单元格中。换句话说,每个ID列都需要两个重复的列。插入每个ID列旁边,所有值均用NA替换,最大值和最小值除外。两者都是在上面的平滑代码中计算的。例如,这就是我需要的最终数据框看起来像这样:
Date 59231 59231_Max 59231_Min 158157 158157_Max 158157_Min 282302 282302_Max 282302_Min
13149 NA NA NA 0.398 NA NA NA NA NA
13157 0.344 NA NA 0.267 NA NA 0.327 NA NA
13165 NA NA NA 0.431 NA NA NA NA NA
13173 0.398 NA NA NA NA NA 0.282 NA NA
13181 NA NA NA 0.360 NA NA 0.260 NA NA
13189 0.587 NA NA 0.434 NA NA 0.293 NA 0.293
13197 NA NA NA 0.434 NA NA 0.25 NA NA
13205 NA NA NA 0.465 NA NA 0.288 NA NA
13213 0.451 NA NA 0.447 NA NA 0.336 NA NA
13221 0.597 NA NA 0.521 NA NA 0.299 0.299 NA
... ... .. .. ... .. .. ... ... ..
这就是现在的样子。
Date 59231 59231_Max 59231_Min Date 158157 158157_Max 158157_Min Date 282302 282302_Max 282302_Min
13157 0.344 0.593 0.386 13149 0.398 0.595 0.533 13157 0.327 0.299 0.293
13173 0.398 0.425 0.282 13157 0.267 0.546 0.508 13173 0.282 0.331 0.255
13189 0.587 0.315 0.297 13165 0.431 0.545 0.539 13181 0.260 NA 0.285
13213 0.451 0.322 0.273 13181 0.360 0.530 0.320 13189 0.293 NA NA
13221 0.597 0.653 NA 13189 0.434 NA NA 13197 0.250 NA NA
13229 0.593 NA NA 13197 0.434 NA NA 13205 0.288 NA NA
13237 0.556 NA NA 13205 0.465 NA NA 13213 0.336 NA NA
13245 0.559 NA NA 13213 0.447 NA NA 13221 0.299 NA NA
13253 0.375 NA NA 13221 0.521 NA NA 13229 0.290 NA NA
13261 0.374 NA NA 13229 0.539 NA NA 13237 0.280 NA NA
..... ... .. .. ..... ..... .. .. ..... ..... ... ..
注意:我必须在每个循环中省略NA,因此代码会生成一个CSV文件,其中每个ID都有一个唯一的子集日期列。我只想像上面理想表中的一个日期列即可。
在我的代码中,我开始创建一个新的数据框,并在每个循环后追加每一列,但是我不知道如何在正确的单元格中匹配最大值和最小值。现在,所有的最大值和最小值都堆积在其列的顶部。有任何想法吗?谢谢。
答案 0 :(得分:0)
这个怎么样?它将添加最小和最大列。
df
df$max <- apply(df[2:4], 1, max, na.rm = TRUE)
df$min <- apply(df[2:4], 1, min, na.rm = TRUE)
head(df)
哪个会产生:
ID X59231 X158157 X282302 max min
1 13149 NA 0.398 NA 0.398 0.398
2 13157 0.344 0.267 0.327 0.344 0.267
3 13165 NA 0.431 NA 0.431 0.431
4 13173 0.398 NA 0.282 0.398 0.282
5 13181 NA 0.360 0.260 0.360 0.260
6 13189 0.587 0.434 0.293 0.587 0.293
我已根据您提供的说明添加了此内容。您可以忽略以上内容:
这将产生您想要的。我只为第一列做过,但是您可以更改变量以获取其他列。
library(dplyr)
df2 <- as_tibble(df)
df2 <- df2 %>%
mutate(X59231_min = min(X59231, na.rm = TRUE))%>%
mutate(X59231_min = ifelse(X59231 == X59231_min, X59231_min, NA)) %>%
mutate(X59231_max = max(X59231, na.rm = TRUE))%>%
mutate(X59231_max = ifelse(X59231 == X59231_max, X59231_max, NA))
所以:
df2 %>% filter(!is.na(X59231_min))
给予我们
# A tibble: 1 x 6
ID X59231 X158157 X282302 X59231_min X59231_max
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 13349 0.271 0.51 0.284 0.271 NA
并且:
df2 %>% filter(!is.na(X59231_max))
显示:
# A tibble: 1 x 6
ID X59231 X158157 X282302 X59231_min X59231_max
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 13493 0.666 NA NA NA 0.666
您应该能够在其他列中做到这一点。