我有两个数据集: 首先是水分数据:按日期/地点/处理/树木进行组织,并包含百分比(水分百分比)
我还有另一个数据集(SCD),其中包含日期/地点/处理/树,然后有采集量。 首先,日期不排队(按天关闭)。每个处理方法有8棵树,每个站点有2种处理方法
我想使用水分数据在给定的采样日内对每棵树的数据进行插值(我已经使用平均值进行了此操作)。
我希望根据每个给定地点/处理/树木的水分来回归日期,然后从样本收集数据中获取日期以获取水分百分比。
水分数据片段:
structure(list(date = structure(c(17681, 17681,
17681, 17681,
17681, 17681, 17681, 17681, 17681, 17681, 17681,
17681, 17681,
17681, 17681, 17681, 17681, 17681, 17681, 17681,
17681, 17681,
17681, 17681, 17681, 17681, 17681, 17681, 17681,
17681, 17681,
17681, 17681, 17685, 17685, 17685, 17685, 17685,
17685, 17685,
17685, 17685, 17685, 17685, 17685, 17685, 17685,
17685, 17685,
17685), class = "Date"), site = structure(c(2L, 4L,
2L, 4L, 6L,
2L, 4L, 4L, 2L, 4L, 4L, 6L, 4L, 6L, 4L, 6L, 4L, 6L,
4L, 2L, 4L,
6L, 4L, 6L, 4L, 6L, 4L, 6L, 4L, 6L, 2L, 4L, 6L, 2L,
4L, 2L, 4L,
6L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 6L, 2L, 4L, 6L, 4L),
.Label = c("hydric",
"Hydric", "mesic", "Mesic", "xeric", "Xeric"), class
= "factor"),
trt = structure(c(1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L), .Label =
c("c",
"s"), class = "factor"), tree = c(1L, 1L, 1L, 1L, 1L,
2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
6L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 1L,
1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L),
percent = c(55.19, 54.22, 56.8, 42.05, 45, 67.53, 50,
41.85,
58.39, 51.76, 38.2, 43.6, 60.27, 32.25, 42.1, 49.25,
47.85,
25.25, 53.1, 44.25, 46.15, 42.65, 52.18, 40.4, 37.8,
47,
44.25, 43.75, 31.8, 37.05, 47, 62.34, 48.55, 64.86,
44.45,
61.9, 47.1, 47.25, 68.05, 46.77, 59.93, 33.5, 68.11,
52.61,
41.05, 44.2, 63.76, 58.42, 34.8, 62.92), year =
c(2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L,
2018L, 2018L, 2018L, 2018L)), .Names = c("date",
"site",
"trt", "tree", "percent", "year"), row.names = c(NA,
50L), class = "data.frame")
样品采集数据:
structure(list(Date = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L,
3L), .Label = c("43333", "5/31/2018", "6/1/2018",
"6/10/2018",
"6/11/2018", "6/14/2018", "6/15/2018", "6/16/2018",
"6/2/2018",
"6/20/2018", "6/21/2018", "6/24/2018", "6/25/2018",
"6/26/2018",
"6/27/2018", "6/28/2018", "6/29/2018", "6/9/2018",
"7/14/2018",
"7/15/2018", "7/16/2018", "7/20/2018", "7/21/2018",
"7/23/2018",
"7/24/2018", "7/25/2018", "7/28/2018", "7/29/2018",
"7/30/2018",
"7/6/2018", "7/7/2018", "7/9/2018", "8/11/2018",
"8/12/2018",
"8/16/2018", "8/17/2018", "8/18/2018", "8/2/2018",
"8/20/2018",
"8/21/2018", "8/22/2018", "8/24/2018", "8/26/2018",
"8/27/2018",
"8/3/2018", "8/4/2018", "8/9/2018"), class =
"factor"), Site = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Hydric",
"Mesic", "Xeric"
), class = "factor"), treatment = structure(c(5L, 5L,
5L, 5L,
5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L,
5L, 5L, 5L,
5L, 5L, 5L, 5L), .Label = c("Ancillary", "Control",
"Fertalized",
"Lysim", "Snowfence"), class = "factor"), Plot =
c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L), Fill = structure(c(3L, 7L, 7L,
3L, 5L, 5L,
5L, 4L, 5L, 5L, 3L, 5L, 5L, 5L, 4L, 7L, 3L, 7L, 7L,
4L, 5L, 4L,
7L, 5L), .Label = c("", "Broken", "D", "F", "M",
"reinstall",
"S", "Sp", "SP", "VF"), class = "factor"), Mass =
c(12.2, 7.2,
12, 8.1, 11.4, 8.3, 12.5, 12.8, 12.6, 12.9, 7.4,
12.6, 12.8,
9.8, 12.8, 9.3, 8, 9.8, 11.6, 12.8, 10.4, 13.1, 10.6,
12.9)), .Names = c("Date",
"Site", "treatment", "Plot", "Fill", "Mass"),
row.names = c(14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L,
28L, 29L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L),
class = "data.frame")
Moisture$date<-as.Date(Moisture$date,
format="%m/%d/%Y")
SCD$Date<-as.Date(SCD$Date, format="%m/%d/%Y")
因此,目标是获取与SCD数据集一致的水分百分比数据,以进行进一步分析。我想对每棵树进行回归,然后输入SCD日期/树/站点并返回百分比,并将其添加为SCD中的列。
我知道这远非正确,但这是我目前的尝试。
library(dplyr)
MoistForSCD <- Moisture %>%
group_by(site,trt,tree) %>%
arrange(date,tree,site,trt) %>%
mutate(Loess = predict(loess(percent ~ date, span =
.5, data=.),SCD))
SCD$M<-MoistForSCD
答案 0 :(得分:1)
您可以这样做:
library(tidyverse)
library(lubridate)
Moisture %>%
mutate(set = 'train') %>%
select(-year) %>%
bind_rows(
SCD %>%
select(Date, Site, treatment, Plot, Mass) %>%
rename_at(1:4, ~colnames(Moisture)[1:4]) %>%
mutate(
trt = trt %>% str_sub(1, 1) %>% str_to_lower(),
set = 'test'
)
) %>%
mutate(month = date %>% month()) %>%
group_by(tree) %>%
nest() %>%
mutate(
train = map(data, ~filter(.x, set == 'train')),
test = map(data, ~filter(.x, set == 'test') %>% select(-percent)),
model = map(train, ~lm(percent ~ date + trt, data = .x)),
pred = map2(model, test, ~predict(.x, newdata = .y))
) %>%
select(test, pred, tree) %>%
unnest() %>%
select(-set)
如果您有更多数据,还可以在公式中包含site
变量:
percent ~ date + site + trt