我有一个数据框,其中包含不同季节的样本。我想要的是总结一下不同年份的春季(3月至5月)和秋季(9月至11月)的样本。例如,如果站点A有一个来自2007年春季的样本,那么该单元格将显示为“TRUE”'。这是一个示例数据集:
Dates <- data.frame(c(as.Date("2007-9-1"),
rep(as.Date("2008-3-1"), times = 3) ,
rep(as.Date("2008-9-1"), times = 3)))
Sites <- as.data.frame(as.factor(c("SiteA",rep(c("SiteA","SiteB","SiteC"), 2))))
Values <- data.frame(matrix(sample(0:50, 3.5*2, replace=TRUE), ncol=1))
Dataframe <- cbind(Dates,Sites,Values)
colnames(Dataframe) <- c("date","site","value")
我设法创造了因素&#39;赛季&#39;在此数据框中基于这些功能。
Dataframe$Months <- as.numeric(format(Dataframe$date, '%m'))
Dataframe$Season <- cut(Dataframe$Months,
breaks = c(1, 2, 5, 8, 11, 12),
labels = c("Winter", "Spring", "Summer", "Autumn", "Winter"),
right = FALSE)
但我不确定从哪里开始。这是输出应该是什么样的。
A <- rep("TRUE",times = 3)
B <- c("FALSE",rep("TRUE",times = 2))
C <- c("FALSE",rep("TRUE",times = 2))
Output <- as.data.frame(rbind(A,B,C))
colnames(Output) <- c("Autumn.07","Spring.07","Autumn.08")
答案 0 :(得分:1)
这是一个命题:
Dataframe$Samplings <- interaction(Dataframe$Season, unlist(lapply(strsplit(as.character(Dataframe$date), '-'), function(x) x[[1]]) ))
u1 <- unique(Dataframe$site)
u2 <- unique(Dataframe$Samplings)
output <- matrix(
matrix(levels(interaction(u1, u2)), nrow=length(unique(Dataframe$site))) %in%
interaction(Dataframe$site,Dataframe$Samplings),
nrow=length(unique(Dataframe$site))
)
colnames(output) <- levels(Dataframe$Samplings)
rownames(output) <- unique(Dataframe$site)
output # with all time interactions
# you can clear it with
output[, apply(output, 2, sum) != 0]
答案 1 :(得分:1)
使用reshape2 :: dcast
Dataframe$site <- gsub("Site","",Dataframe$site)
Dataframe$year <- format(Dataframe$date, "%y")
temp <- reshape2::dcast(Dataframe, site ~ Season + year, length)
(ans <- apply(data.frame(temp[,2:ncol(temp)], row.names=temp[,1]), 1:2, as.logical))
由于标签重复,您的Dataframe $ Season会出现警告。你可能想解决这个问题。
答案 2 :(得分:1)
我认为这就是你正在寻找的东西。时间标签与问题完全不同,但我希望它仍然可以理解。
library(reshape2)
# prepare the input, to have a handy label for the columns
Dataframe$Year <- as.numeric(format(Dataframe$date, '%Y'))
Dataframe$TimeLabel <- paste0(Dataframe$Season, '.', Dataframe$Year)
# This is in stages, to make it clear what's happening.
# create a data frame with the right structure, but cells holding NA / numbers
df1 <- dcast(Dataframe, site ~ TimeLabel)
# turn NA / number into false/true, while ignoring the site column
df2 <- !is.na(df1[, -1])
# add back the site labels for rows
df3 <- cbind(as.data.frame(df1$site), df2)