我有一个由出生,死亡和结婚日期组成的数据框。我想创建一个表,该表计算每个群组和婚姻期间的婚姻事件(不计算资产净值)。我也想绘制它。
这是数据:
structure(list(born = c(1795.63287671233, 1796.8606557377, 1769.47671232877,
1800.32328767123, 1799.98904109589, 1784.53278688525, 1791.6602739726,
1797.40547945205, 1797.01643835616, 1790.2, 1799.88767123288,
1798.56438356164, 1798.91506849315, 1797.26575342466, 1795.6904109589,
1790.2904109589, 1786.57534246575, 1783.97260273973, 1796.4262295082,
1798.49315068493, 1797.95342465753, 1796.61202185792, 1791.99178082192,
1793.53424657534, 1793.77808219178, 1789.06575342466, 1794.06301369863,
1794.73698630137, 1792.03278688525, 1788.45628415301), dead = c(1878.04383561644,
NA, 1846.00273972603, 1853.66575342466, 1875.04931506849, 1842.76164383562,
1844.48633879781, 1870.34246575342, 1880.24863387978, 1858.39178082192,
1861.77260273973, 1832.35245901639, 1821.50136986301, NA, 1873.25205479452,
NA, NA, 1867.66301369863, 1848.45628415301, 1843.82465753425,
1854.81095890411, 1858.34794520548, 1854.20821917808, 1873.32876712329,
1847.61095890411, 1868.87158469945, 1863.57260273973, 1865.45205479452,
1845.83835616438, 1860.96994535519), cohort = structure(c(9L,
9L, 6L, 10L, 9L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 9L, 9L, 9L, 8L), .Label = c("1710",
"1720", "1730", "1740", "1750", "1760", "1770", "1780", "1790",
"1800", "1810", "1820", "1830", "1840", "1850", "1860", "1870",
"1880", "1890", "1900", "1910"), class = "factor"), marr_date = c(1829.87945205479,
1824.03825136612, NA, 1824.03825136612, 1837.8602739726, NA,
1821.37808219178, 1821.37808219178, 1826.24657534247, NA, NA,
NA, NA, 1827.32602739726, 1828.71857923497, 1820.77322404372,
1820.77322404372, 1820.05464480874, 1825.51780821918, NA, 1828.87704918033,
1828.87704918033, 1823.27671232877, 1823.27671232877, 1833.44931506849,
NA, 1821.35068493151, 1821.35068493151, 1841.04109589041, NA),
birth_event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), death_event = c(1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), marriage_event = c(1, 1, 0,
1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 1, 0), Period_Marriage = structure(c(2L,
2L, NA, 2L, 3L, NA, 2L, 2L, 2L, NA, NA, NA, NA, 2L, 2L, 2L,
2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, 3L, NA, 2L, 2L, 4L, NA), .Label = c("1810",
"1820", "1830", "1840", "1850", "1860", "1870", "1880", "1890",
"1900"), class = "factor")), .Names = c("born", "dead", "cohort",
"marr_date", "birth_event", "death_event", "marriage_event",
"Period_Marriage"), class = "data.frame", row.names = c(NA, 30L
))
我另外创建了几列,如果发生事件,则显示1,否则显示0。根据结婚日期创建期间变量(Period_Marriages)。这是我创建表的失败尝试...
data_new <- dplyr::summarise(data,cohort,Period_Marriage,sum = sum(marriage_event))
...说“ summarise_impl(.data,点)中的错误:
cohort
列的长度必须为1(汇总值),而不是30“。
那我怎样才能得到我的(数字词汇表?)表?)
答案 0 :(得分:1)
你的意思是这样吗?
library( dplyr ) #or library( tidyverse )
df %>%
group_by( cohort, Period_Marriage ) %>%
summarise( sum = sum( marriage_event, na.rm = TRUE ) )
# # Groups: cohort [?]
# cohort Period_Marriage sum
# <fct> <fct> <dbl>
# 1 1760 NA 0
# 2 1780 1820 2
# 3 1780 NA 0
# 4 1790 1820 15
# 5 1790 1830 2
# 6 1790 1840 1
# 7 1790 NA 0
# 8 1800 1820 1