我一直在尝试各种时间序列插补模型以使用imputeTS
来插补丢失的时间序列,但是测试结果相当差。由于它看起来像季节性的时间序列数据,因此我考虑过简单地用几个月内的中位数或平均值来估算缺失值。
数据如下:
> head(data, 10)
v1 v2 v3 v4
1 1908 1 118 10
2 1908 2 138 33
3 1908 3 128 17
4 1908 4 NA 60
5 1908 5 NA 114
6 1908 6 72 124
7 1908 7 NA 44
8 1908 8 70 134
9 1908 9 58 121
10 1908 10 42 117
我计算了中位数:
data %>%
as_tibble() %>%
group_by(v2) %>%
summarise(imp_v3 = median(v3, na.rm = TRUE),
imp_v4 = median(v4, na.rm = TRUE))
哪个给:
# A tibble: 12 x 3
v2 imp_v3 imp_v4
<int> <dbl> <dbl>
1 1 126 23
2 2 132 27.5
3 3 138 33
4 4 22.5 76
5 5 42.5 102.
6 6 72 127
7 7 82 50
8 8 80 51
9 9 57 121
10 10 24 98
11 11 145 71
12 12 130. 31
我现在要做的就是将v1
和v2
的缺失值与相应的medians
一起估算。
v3
中所有丢失的值(在1
中具有v2
的值都将替换为126
。 v4
中v2
列中有5个值的所有缺失值都将替换为102.
。
v1
中的数字是年份,v2
中的数字是月份。
数据:
data <- structure(list(v1 = c(1908L, 1908L, 1908L, 1908L, 1908L, 1908L,
1908L, 1908L, 1908L, 1908L, 1908L, 1908L, 1909L, 1909L, 1909L,
1909L, 1909L, 1909L, 1909L, 1909L, 1909L, 1909L, 1909L, 1909L,
1910L, 1910L, 1910L, 1910L, 1910L, 1910L, 1910L, 1910L, 1910L,
1910L, 1910L, 1910L, 1911L, 1911L, 1911L, 1911L, 1911L, 1911L,
1911L, 1911L, 1911L, 1911L, 1911L, 1911L, 1912L, 1912L, 1912L,
1912L, 1912L, 1912L, 1912L, 1912L, 1912L, 1912L, 1912L, 1912L,
1913L, 1913L, 1913L, 1913L, 1913L, 1913L, 1913L, 1913L, 1913L,
1913L, 1913L, 1913L, 1914L, 1914L, 1914L, 1914L, 1914L, 1914L,
1914L, 1914L, 1914L, 1914L, 1914L, 1914L, 1915L, 1915L, 1915L,
1915L, 1915L, 1915L, 1915L, 1915L, 1915L, 1915L, 1915L, 1915L,
1916L, 1916L, 1916L, 1916L, 1916L, 1916L, 1916L, 1916L, 1916L,
1916L, 1916L, 1916L, 1917L, 1917L, 1917L, 1917L, 1917L, 1917L,
1917L, 1917L, 1917L, 1917L, 1917L, 1917L, 1918L, 1918L, 1918L,
1918L, 1918L, 1918L, 1918L, 1918L, 1918L, 1918L, 1918L, 1918L,
1919L, 1919L, 1919L, 1919L, 1919L, 1919L, 1919L, 1919L, 1919L,
1919L, 1919L, 1919L, 1920L, 1920L, 1920L, 1920L, 1920L, 1920L,
1920L, 1920L, 1920L, 1920L, 1920L, 1920L, 1921L, 1921L, 1921L,
1921L, 1921L, 1921L, 1921L, 1921L, 1921L, 1921L, 1921L, 1921L,
1922L, 1922L, 1922L, 1922L, 1922L, 1922L, 1922L, 1922L, 1922L,
1922L, 1922L, 1922L, 1923L, 1923L, 1923L, 1923L, 1923L, 1923L,
1923L, 1923L, 1923L, 1923L, 1923L, 1923L, 1924L, 1924L, 1924L,
1924L, 1924L, 1924L, 1924L, 1924L, 1924L, 1924L, 1924L, 1924L,
1925L, 1925L, 1925L, 1925L, 1925L, 1925L, 1925L, 1925L, 1925L,
1925L, 1925L, 1925L, 1926L, 1926L, 1926L, 1926L, 1926L, 1926L,
1926L, 1926L, 1926L, 1926L, 1926L, 1926L, 1927L, 1927L, 1927L,
1927L, 1927L, 1927L, 1927L, 1927L, 1927L, 1927L, 1927L, 1927L,
1928L, 1928L, 1928L, 1928L, 1928L, 1928L, 1928L, 1928L, 1928L,
1928L, 1928L, 1928L, 1929L, 1929L, 1929L, 1930L, 1930L, 1930L,
1930L, 1930L, 1930L, 1930L, 1930L, 1930L, 1930L, 1930L, 1930L,
1931L, 1931L, 1931L, 1931L, 1931L, 1931L, 1931L, 1931L, 1931L,
1931L, 1931L, 1931L, 1932L, 1932L, 1932L, 1932L, 1932L, 1932L,
1932L, 1932L, 1932L, 1932L, 1932L, 1932L, 1933L, 1933L, 1933L,
1933L, 1933L, 1933L, 1933L, 1933L, 1933L, 1933L, 1933L, 1933L,
1934L, 1934L, 1934L, 1934L, 1934L, 1934L, 1934L, 1934L, 1934L,
1934L, 1934L, 1934L, 1935L, 1935L, 1935L, 1935L, 1935L, 1935L,
1935L, 1935L, 1935L, 1935L, 1935L, 1935L, 1936L, 1936L, 1936L,
1936L, 1936L, 1936L, 1936L, 1936L, 1936L, 1936L, 1936L, 1936L,
1937L, 1937L, 1937L, 1937L, 1937L, 1937L, 1937L, 1937L, 1937L,
1937L, 1937L, 1937L, 1938L, 1938L, 1938L, 1938L, 1938L, 1938L,
1938L, 1938L, 1938L, 1938L, 1938L, 1938L, 1939L, 1939L, 1939L,
1939L, 1939L, 1939L, 1939L, 1939L, 1939L, 1939L, 1939L, 1939L,
1940L, 1940L, 1940L, 1940L, 1940L, 1940L, 1940L, 1940L, 1940L,
1940L, 1940L, 1940L, 1941L, 1941L, 1941L, 1941L, 1941L, 1941L,
1941L, 1941L, 1941L, 1941L, 1941L, 1941L, 1942L, 1942L, 1942L,
1942L, 1942L, 1942L, 1942L, 1942L, 1942L, 1942L, 1942L, 1942L,
1943L, 1943L, 1943L, 1943L, 1943L, 1943L, 1943L, 1943L, 1943L,
1943L, 1943L, 1943L, 1944L, 1944L, 1944L, 1944L, 1944L, 1944L,
1944L, 1944L, 1944L, 1944L, 1944L, 1944L, 1945L, 1945L, 1945L,
1945L, 1945L, 1945L, 1945L, 1945L), v2 = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 10L, 11L, 12L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L), v3 = c(118L, 138L, 128L, NA, NA, 72L,
NA, 70L, 58L, 42L, 159L, 125L, 118L, 122L, 123L, 22L, 43L, 45L,
68L, 82L, 41L, 29L, 140L, 120L, 119L, NA, 156L, 161L, 39L, 72L,
65L, 75L, 45L, 26L, 122L, 145L, 125L, 135L, 130L, 3L, 58L, 73L,
103L, 101L, 68L, 16L, 144L, 147L, NA, 132L, NA, 24L, NA, NA,
77L, 46L, 34L, 18L, 148L, 154L, 121L, NA, 148L, 9L, 43L, 69L,
70L, 79L, 64L, 27L, 4L, 134L, 126L, 158L, NA, 37L, 34L, 78L,
86L, 91L, 65L, NA, 156L, 130L, 121L, 129L, 144L, 12L, 36L, 80L,
NA, 77L, 63L, NA, 125L, NA, 160L, 121L, 112L, 16L, 44L, 40L,
NA, 96L, NA, NA, 153L, 112L, 105L, 107L, 122L, 148L, 58L, NA,
92L, 76L, 68L, 5L, 159L, 113L, 127L, 145L, 147L, NA, 64L, 61L,
NA, NA, NA, 20L, 144L, 153L, NA, 106L, 121L, 4L, 68L, 72L, 67L,
83L, NA, 14L, 117L, 137L, 139L, 148L, 4L, 160L, 44L, 70L, 58L,
59L, 54L, 27L, 152L, 128L, 154L, 130L, 162L, 17L, 47L, 80L, 104L,
NA, 72L, 57L, 127L, 155L, 116L, 127L, 129L, 151L, 59L, 65L, NA,
56L, 44L, NA, 153L, 140L, 147L, 128L, 155L, 161L, 17L, NA, 95L,
70L, 47L, 18L, 122L, NA, 126L, 119L, 132L, 160L, 35L, 62L, 75L,
62L, NA, 24L, 154L, 155L, 138L, 134L, 138L, 6L, 42L, 83L, 98L,
82L, 39L, 30L, 125L, 114L, 133L, 147L, 150L, 23L, NA, 67L, 97L,
87L, 66L, 8L, 142L, 130L, 133L, 132L, 158L, 5L, 36L, NA, 81L,
74L, 45L, 31L, 145L, NA, NA, 142L, 143L, 14L, 36L, 49L, 82L,
80L, 54L, 24L, 161L, 124L, 21L, NA, 137L, 141L, 109L, NA, 162L,
34L, 81L, 70L, 80L, 52L, 26L, 150L, 126L, 122L, 119L, 130L, NA,
41L, 62L, 75L, NA, 39L, 22L, 158L, 138L, 153L, 125L, 141L, 158L,
28L, 73L, 84L, 90L, 54L, 16L, 147L, 138L, 112L, NA, 4L, 23L,
51L, 89L, 100L, 99L, 83L, 23L, 141L, 111L, 138L, 136L, 138L,
10L, 48L, 77L, 104L, 80L, 79L, 26L, NA, 157L, 131L, 140L, 154L,
10L, 33L, 81L, 99L, NA, 60L, 20L, 152L, 113L, 118L, 110L, 153L,
NA, 41L, 76L, 78L, 88L, 66L, 25L, 144L, 142L, 135L, 134L, 118L,
16L, 50L, 66L, 90L, NA, 59L, 28L, 142L, 113L, 143L, 133L, 25L,
15L, 38L, 69L, 74L, 87L, 61L, 24L, 11L, 127L, 121L, 141L, 144L,
NA, 45L, 76L, 78L, 87L, 62L, 12L, 3L, 119L, 1L, NA, 154L, 7L,
64L, 102L, 78L, 79L, 53L, 19L, 151L, 126L, 2L, 115L, NA, 146L,
23L, 85L, 103L, 74L, 71L, NA, 151L, 140L, 94L, 93L, 136L, 22L,
40L, 79L, 82L, 85L, 56L, 28L, 141L, 152L, NA, 150L, 162L, 35L,
56L, 72L, 92L, 75L, 51L, 31L, 149L, 126L, 147L, NA, 146L, 26L,
40L, 55L, 82L, 88L, 47L, 13L, 149L, 131L, 108L, 159L, 17L, 32L,
49L, 72L, 95L, 83L), v4 = c(10L, 33L, 17L, 60L, 114L, 124L, 44L,
134L, 121L, 117L, 73L, NA, 15L, 3L, 3L, 72L, 86L, 112L, 42L,
41L, 118L, 106L, 31L, 18L, 5L, 29L, NA, 63L, NA, 133L, 135L,
52L, 122L, 110L, 4L, 75L, NA, 27L, 33L, 78L, 116L, 127L, 51L,
NA, 123L, 94L, 71L, 72L, 15L, NA, 74L, 73L, 103L, 136L, 51L,
129L, 114L, 81L, NA, 63L, 25L, 26L, 60L, 77L, 105L, 132L, 38L,
NA, 37L, NA, 89L, 62L, 21L, 74L, 65L, 82L, 96L, 128L, 49L, 45L,
121L, 109L, NA, 31L, 27L, 22L, 29L, 74L, 87L, 119L, 37L, 36L,
126L, 89L, 6L, 26L, 82L, 19L, NA, 70L, 100L, 114L, 45L, 54L,
126L, 98L, 80L, 6L, 5L, 12L, NA, 22L, 103L, 131L, 44L, 53L, 37L,
75L, NA, 1L, 16L, 68L, 59L, 68L, 110L, NA, 39L, 48L, 118L, 98L,
31L, 76L, 15L, 10L, NA, NA, 110L, 131L, NA, 39L, 120L, 70L, 16L,
59L, NA, NA, 70L, 79L, 107L, 127L, 38L, 134L, 122L, 105L, 83L,
31L, 77L, 33L, NA, NA, 96L, 123L, 53L, 49L, 130L, 125L, 23L,
72L, NA, 19L, 27L, 21L, NA, 123L, 133L, 136L, 121L, 87L, 72L,
72L, 69L, 33L, 67L, 71L, 85L, 120L, NA, 41L, NA, 102L, 19L, 19L,
25L, NA, 9L, NA, 107L, 131L, 43L, 34L, 36L, 104L, 83L, 86L, 61L,
28L, 32L, 73L, 109L, 129L, 54L, 51L, 112L, 101L, 2L, 8L, 29L,
74L, 70L, 87L, 84L, 120L, NA, 44L, NA, 80L, 61L, 30L, 33L, 20L,
74L, 77L, 93L, 111L, 50L, 51L, 129L, 102L, 67L, 5L, 23L, 62L,
62L, 72L, 94L, 115L, 47L, 44L, 122L, 98L, 86L, 1L, 90L, 64L,
63L, 27L, 5L, 24L, 81L, 95L, 133L, NA, NA, 136L, 107L, NA, 29L,
14L, 24L, 7L, 77L, 105L, 135L, 52L, 136L, 120L, 84L, 88L, 63L,
70L, 18L, 26L, 63L, NA, 125L, 56L, 56L, 124L, 85L, NA, 75L, 3L,
24L, 60L, 86L, 114L, 136L, 56L, 57L, NA, NA, 71L, 1L, 29L, 29L,
25L, 78L, 102L, 132L, 58L, 41L, 34L, 111L, 76L, 90L, 33L, 66L,
72L, 78L, 86L, 37L, 52L, 48L, 129L, 98L, NA, NA, 19L, 9L, 71L,
60L, 101L, 133L, 50L, NA, 46L, 96L, NA, 59L, 31L, 30L, 6L, 88L,
113L, NA, 56L, 51L, 126L, 107L, 61L, 17L, NA, 62L, 91L, 74L,
100L, 35L, 43L, 52L, 134L, 108L, 97L, 26L, 23L, 63L, 66L, 80L,
NA, 125L, 49L, 56L, 40L, 86L, 88L, 28L, 13L, 11L, 33L, 75L, 109L,
40L, 43L, 39L, 119L, 99L, 80L, 29L, 12L, NA, 23L, 65L, NA, 130L,
57L, NA, 46L, 109L, 69L, 66L, 10L, NA, 20L, 78L, 97L, 126L, 43L,
58L, 120L, 107L, 66L, 76L, 23L, 69L, 64L, 98L, 96L, 135L, 41L,
47L, 127L, 97L, 74L, 26L, 65L, 20L, 27L, 92L, 98L, 124L, 55L,
53L, 121L, 96L, NA, 27L, NA, 73L, NA, 85L, 106L, 135L, 54L, NA
)), class = "data.frame", row.names = c(NA, -443L))
答案 0 :(得分:3)
我们可以使用const char*
用列的replace
替换NA
元素
median
或者另一个选择是library(dplyr)
data %>%
group_by(v2) %>%
mutate_at(vars(v3, v4), list(~ replace(., is.na(.), median(., na.rm = TRUE))))
# A tibble: 443 x 4
# Groups: v2 [12]
# v1 v2 v3 v4
# <int> <int> <dbl> <dbl>
# 1 1908 1 118 10
# 2 1908 2 138 33
# 3 1908 3 128 17
# 4 1908 4 22.5 60
# 5 1908 5 42.5 114
# 6 1908 6 72 124
# 7 1908 7 82 44
# 8 1908 8 70 134
# 9 1908 9 58 121
#10 1908 10 42 117
# … with 433 more rows
中的na.aggregate
zoo
默认情况下,library(zoo)
data %>%
group_by(v2) %>%
mutate_at(vars(v3, v4), na.aggregate)
是FUN
。如果我们想要mean
,请指定median
FUN