我想用该列的滞后值替换一列(“股票”)中的缺失值,并用其他列来进行其他处理。由于以下循环在我的原始数据集中需要花费大量时间,因此还有另一种方法可以不循环吗?
for (i in 1990:1993){
df <- df %>%
group_by(fuel) %>%
mutate(stock=ifelse(i==year & year>1991,lag(stock)+formation+lag(sales),stock))%>%
mutate(sales=ifelse(i==year & year>1991, stock-lag(stock),sales))
}
数据集示例:
df <- structure(list(year = c(1990L, 1991L, 1992L, 1993L, 1990L, 1991L,
1992L, 1993L), fuel = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L), .Label = c("a", "b"), class = "factor"), stock = c(10, 11,
NA, NA, 10, 11, NA, NA), sales = c(NA, 1, NA, NA, NA, 1, NA,
NA), formation = c(0.3, 0.4, 0.5, 0.3, 0.7, 0.4, 0.5, 0.7)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L), vars = "fuel", labels = structure(list(
fuel = structure(1:2, .Label = c("a", "b"), class = "factor")), class = "data.frame", row.names = c(NA,
-2L), vars = "fuel", drop = TRUE), indices = list(0:3, 4:7), drop = TRUE, group_sizes = c(4L,
4L), biggest_group_size = 4L)
答案 0 :(得分:0)
这是您要找的吗?在这种情况下,TRUE
基本上类似于ELSE
语句。我相信您也可以使用类似的方法,但是结果是相同的。
df <- structure(list(year = c(
1990L, 1991L, 1992L, 1993L, 1990L, 1991L,
1992L, 1993L
), fuel = structure(c(
1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L
), .Label = c("a", "b"), class = "factor"), stock = c(
10, 11,
NA, NA, 10, 11, NA, NA
), sales = c(
NA, 1, NA, NA, NA, 1, NA,
NA
), formation = c(0.3, 0.4, 0.5, 0.3, 0.7, 0.4, 0.5, 0.7)), class = c(
"grouped_df",
"tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L), vars = "fuel", labels = structure(list(
fuel = structure(1:2, .Label = c("a", "b"), class = "factor")
), class = "data.frame", row.names = c(
NA,
-2L
), vars = "fuel", drop = TRUE), indices = list(0:3, 4:7), drop = TRUE, group_sizes = c(
4L,
4L
), biggest_group_size = 4L)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
cat("Before")
#> Before
print(df)
#> # A tibble: 8 x 5
#> # Groups: fuel [2]
#> year fuel stock sales formation
#> <int> <fct> <dbl> <dbl> <dbl>
#> 1 1990 a 10 NA 0.3
#> 2 1991 a 11 1 0.4
#> 3 1992 a NA NA 0.5
#> 4 1993 a NA NA 0.3
#> 5 1990 b 10 NA 0.7
#> 6 1991 b 11 1 0.4
#> 7 1992 b NA NA 0.5
#> 8 1993 b NA NA 0.7
df <- df %>%
mutate(
stock = case_when(
year == 1991 ~ stock,
TRUE ~ lag(stock) + formation + lag(sales)
),
sales = case_when(
year == 1991 ~ sales,
TRUE ~ stock - lag(stock)
)
)
cat("After")
#> After
print(df)
#> # A tibble: 8 x 5
#> # Groups: fuel [2]
#> year fuel stock sales formation
#> <int> <fct> <dbl> <dbl> <dbl>
#> 1 1990 a NA NA 0.3
#> 2 1991 a 11 1 0.4
#> 3 1992 a 12.5 1.5 0.5
#> 4 1993 a NA NA 0.3
#> 5 1990 b NA NA 0.7
#> 6 1991 b 11 1 0.4
#> 7 1992 b 12.5 1.5 0.5
#> 8 1993 b NA NA 0.7
由reprex package(v0.2.1)于2019-01-04创建
答案 1 :(得分:0)
这是使用临时列的代数解决方案。也许有一种更优雅的方式,但我认为这可行。
df2 <- df %>%
# Make temporary columns for stock and sales.
mutate(stock_temp = stock,
sales_temp = sales) %>%
# For each fuel type, fill down over missing values in those columns
group_by(fuel) %>%
fill(stock_temp, sales_temp) %>%
# Add temp column using formation when stock is missing, and collect the
# cumulative total of these. Add that to sales_temp to get stock.
# The missing sales values are defined in OP as change in stock.
mutate(formation_temp = if_else(is.na(stock),
formation, 0),
cuml_form = cumsum(formation_temp),
add = if_else(is.na(stock),
lag(sales_temp) + cuml_form, 0),
stock = if_else(is.na(stock), stock_temp + add, stock),
sales = if_else(is.na(sales), stock - lag(stock), sales)) %>%
select(year:formation)
> df2
## A tibble: 8 x 5
## Groups: fuel [2]
# year fuel stock sales formation
# <int> <fct> <dbl> <dbl> <dbl>
#1 1990 a 10 NA 0.3
#2 1991 a 11 1 0.4
#3 1992 a 12.5 1.5 0.5
#4 1993 a 12.8 0.3 0.3
#5 1990 b 10 NA 0.7
#6 1991 b 11 1 0.4
#7 1992 b 12.5 1.5 0.5
#8 1993 b 13.2 0.700 0.7