我有一个数据集,其中的每一行都是部长ID,对于每一行我都有带有日期(天,月和年)的列,部长进入政府(开始)并离开政府(退出)。这是有关当前外观的示例:
data <- structure(list(id_min = c("1030015", "1030028"), begin_day = c("29",
"4"), begin_month = c("12", "1"), begin_year = c("2019", "2020"
), exit_day = c("3", "10"), exit_month = c("1", "1"), exit_year = c("2020",
"2020")), row.names = c(NA, -2L), class = c("data.frame"))
我想扩展“开始日期”和“退出日期”之间的行,并创建一个新列(number_days)来计算部长在政府任职的天数。数据集中的其他变量(此处省略)仅应在数据的扩展版本中重复)。这是我正在寻找的数据帧输出:
我能够将日期以%Y-%m-%d的格式放在一起,并使用以下代码创建新列“ number_days”
data$begin_date <- as.Date(with(data, paste(begin_year, begin_month, begin_day,sep="-")), "%Y-%m-%d")
data$exit_date <- as.Date(with(data, paste(exit_year, exit_month, exit_day,sep="-")), "%Y-%m-%d")
data$number_days <- data$exit_date - data$begin_date
但是我没有成功扩展两个日期(begin_date和exit_date)之间的行。我试图使用tidyr::complete()
来做到这一点。
答案 0 :(得分:3)
一种<string.h>
解决方案:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *CreateString(const char *string1, const char *string2) {
size_t size1 = strlen(string1);
size_t size2 = strlen(string2);
size_t size = size1 * (size2 + 1);
char *stringptr = malloc(size + 1);
if (stringptr) {
for (size_t i = 0; i < size; i = i + size2 + 1) {
stringptr[i] = *string1++;
strcpy(stringptr + i + 1, string2);
}
}
return stringptr;
}
int main() {
char string1[] = "chocolate";
char string2[] = "123";
char *string3 = CreateString(string1, string2);
if (string3) {
printf("%s\n", string3);
free(string3);
} else {
printf("cannot allocate memory\n");
}
return 0;
}
答案 1 :(得分:2)
您也可以使用fill_gaps()
软件包中的tsibble
函数。
library(tsibble)
library(dplyr)
library(tidyr)
data %>%
mutate(
exit_date = as.Date(paste(exit_year, exit_month, exit_day, sep = "-")),
begin_date = as.Date(paste(begin_year, begin_month, begin_day, sep = "-")),
number_days = as.numeric(exit_date - begin_date) + 1
) %>%
pivot_longer(cols = ends_with("date"), names_to = "event",values_to = "date") %>%
as_tsibble(key = id_min, index = date) %>%
fill_gaps() %>%
mutate(begin_day = format(date, "%d"),
begin_month = format(date, "%m"),
begin_year = format(date, "%Y")) %>%
as_tibble() %>%
select(-event, -date) %>%
fill(everything())
# A tibble: 13 x 8
id_min begin_day begin_month begin_year exit_day exit_month exit_year number_days
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1030015 29 12 2019 3 1 2020 6
2 1030015 30 12 2019 3 1 2020 6
3 1030015 31 12 2019 3 1 2020 6
4 1030015 01 01 2020 3 1 2020 6
5 1030015 02 01 2020 3 1 2020 6
6 1030015 03 01 2020 3 1 2020 6
7 1030028 04 01 2020 10 1 2020 7
8 1030028 05 01 2020 10 1 2020 7
9 1030028 06 01 2020 10 1 2020 7
10 1030028 07 01 2020 10 1 2020 7
11 1030028 08 01 2020 10 1 2020 7
12 1030028 09 01 2020 10 1 2020 7
13 1030028 10 01 2020 10 1 2020 7
答案 2 :(得分:1)
一种方法是使用uncount
函数:
library(dplyr)
library(tidyr)
library(lubridate)
data %>%
mutate(start_date = mdy(paste(begin_month,begin_day,begin_year,sep="-")),
end_date = mdy(paste(exit_month,exit_day,exit_year,sep="-")),
number_days = as.integer(end_date-start_date + 1)) %>%
uncount(as.integer(number_days)) %>%
group_by(id_min) %>%
mutate(begin_day = day(seq(start_date[1], end_date[1], by = "days")),
begin_month = month(seq(start_date[1], end_date[1], by = "days")),
begin_year = year(seq(start_date[1], end_date[1], by = "days"))) %>%
dplyr::select(-start_date, -end_date)
# A tibble: 13 x 8
# Groups: id_min [2]
id_min begin_day begin_month begin_year exit_day exit_month exit_year number_days
<chr> <int> <int> <int> <chr> <chr> <chr> <int>
1 1030015 29 12 2019 3 1 2020 6
2 1030015 30 12 2019 3 1 2020 6
3 1030015 31 12 2019 3 1 2020 6
4 1030015 1 1 2020 3 1 2020 6
5 1030015 2 1 2020 3 1 2020 6
6 1030015 3 1 2020 3 1 2020 6
7 1030028 4 1 2020 10 1 2020 7
8 1030028 5 1 2020 10 1 2020 7
9 1030028 6 1 2020 10 1 2020 7
10 1030028 7 1 2020 10 1 2020 7
11 1030028 8 1 2020 10 1 2020 7
12 1030028 9 1 2020 10 1 2020 7
13 1030028 10 1 2020 10 1 2020 7