
时间:2019-10-09 00:26:48

标签: r spread



df1 <- data.frame(locationID = c(1,2,3), conquered_in = c(1931, 1932, 1929))

  locationID conquered_in
1          1         1931
2          2         1932
3          3         1929


df2 <- data.frame(locationID = c(1,1,1,1,2,2,2,2,3,3,3,3), year = c(1929,1930,1931,1932,1929,1930,1931,1932,1929,1930,1931,1932), conquered = c(0,0,1,1,0,0,0,0,1,1,1,1))

   locationID year conquered
1           1 1929         0
2           1 1930         0
3           1 1931         1
4           1 1932         1
5           2 1929         0
6           2 1930         0
7           2 1931         0
8           2 1932         0
9           3 1929         1
10          3 1930         1
11          3 1931         1
12          3 1932         1

我最初的策略是spread被征服,然后尝试gatherThis answer似乎很接近,但是我似乎无法使用fill来解决这个问题,因为后来我也尝试用1填充。

2 个答案:

答案 0 :(得分:1)



df1 %>% 
  mutate(conquered = 1) %>%
  complete(locationID, conquered_in = seq(min(conquered_in), max(conquered_in)), fill = list(conquered = 0)) %>%
  group_by(locationID) %>%
  mutate(conquered = cumsum(conquered == 1))

# A tibble: 12 x 3
# Groups:   locationID [3]
   locationID conquered_in conquered
        <dbl>        <dbl>     <int>
 1          1         1929         0
 2          1         1930         0
 3          1         1931         1
 4          1         1932         1
 5          2         1929         0
 6          2         1930         0
 7          2         1931         0
 8          2         1932         1
 9          3         1929         1
10          3         1930         1
11          3         1931         1
12          3         1932         1

答案 1 :(得分:0)



df1 <- data.frame(locationID = c(1,2,3), conquered_in = c(1931, 1932, 1929))

# A data frame full of all year you want to cover
df2 <- data.frame(year=seq(1929, 1940, by=1))

# Create a data frame full of combination of year and location + conquered data
df3 <- full_join(df2, df1, by=c("year"="conquered_in")) %>%
  mutate(conquered=if_else(!is.na(locationID), 1, 0)) %>%
  complete(year, locationID) %>%
  arrange(locationID) %>%

# calculate conquered depend on the first year it get conquered - using group by location
df3 %<>%
  group_by(locationID) %>%
  # year 2000 in the min just for case if you have location that never conquered 
  mutate(conquered=if_else(year>=min(2000, year[conquered==1], na.rm=T), 1, 0)) %>%

df3 %>% filter(year<=1932)
# A tibble: 12 x 3
    year locationID conquered
   <dbl>      <dbl>     <dbl>
 1  1929          1         0
 2  1930          1         0
 3  1931          1         1
 4  1932          1         1
 5  1929          2         0
 6  1930          2         0
 7  1931          2         0
 8  1932          2         1
 9  1929          3         1
10  1930          3         1
11  1931          3         1
12  1932          3         1