这是我需要应用到大型复杂表的转换的简化表示。在此,输入表是具有两个数字列的查找。 raw
与SS
有多对一的关系。 raw
的许多值可以与SS
的值相同,反之亦然。
library(tidyverse)
input <- tribble(
~raw, ~SS,
0, 75,
1, 78,
2, 80,
3, 83,
4, 83,
5, 83,
6, 90,
7, 93,
8, 95,
9, 98
)
所需的输出表如下。
output <- tribble(
~SS, ~raw,
100, '-',
99, '-',
98, '9',
97, '-',
96, '-',
95, '8',
94, '-',
93, '7',
92, '-',
91, '-',
90, '6',
89, '-',
88, '-',
87, '-',
86, '-',
85, '-',
84, '-',
83, '3-5',
82, '-',
81, '-',
80, '2',
79, '-',
78, '1',
77, '-',
76, '-',
75, '0',
74, '-',
73, '-',
72, '-',
71, '-',
70, '-'
)
要创建此输出,需要以几种方式转换输入:
SS
列中所有缺失的整数均已填充(70:100)并以降序排列raw
列被强制为字符,具有三种类型的
值以保留原始SS的多对一关系:
SS
的破折号'-',没有对应的值为raw
(例如SS = 70)SS
的单个值(例如'2')和一个相应的值为raw
的单个值(例如SS = 80)SS
的值(例如'3-5')和raw
的多个对应值(例如SS = 83)这是我已经走了多远了:
interim <- input %>% select(
SS, raw
) %>%
mutate_at(
vars(
raw
), ~ as.character(.x)
) %>%
complete(
SS = 70:100
) %>%
arrange(
desc(
SS
)
) %>%
mutate_at(
vars(
raw
), ~ case_when(
is.na(.x) ~ '-',
TRUE ~ .x
)
)
如果运行上面的代码,您将看到唯一剩下的转换是将三行SS
= 83行折叠为一行,并具有raw
的相应'3-5'
值。这意味着以某种方式认识到3和5是范围的上下限,因为该解决方案将需要处理长度不确定的连续整数范围。
在此先感谢您的帮助!
答案 0 :(得分:1)
这是一种方法-
tibble(SS = 100:70) %>%
left_join(input, by = "SS") %>%
replace_na(list(raw = "-")) %>%
group_by(SS) %>%
summarize(raw = paste0(unique(range(raw)), collapse = "-")) %>%
arrange(desc(SS))
# A tibble: 31 x 2
SS raw
<dbl> <chr>
1 100 -
2 99 -
3 98 9
4 97 -
5 96 -
6 95 8
7 94 -
8 93 7
9 92 -
10 91 -
11 90 6
12 89 -
13 88 -
14 87 -
15 86 -
16 85 -
17 84 -
18 83 3-5
19 82 -
20 81 -
21 80 2
22 79 -
23 78 1
24 77 -
25 76 -
26 75 0
27 74 -
28 73 -
29 72 -
30 71 -
31 70 -
答案 1 :(得分:1)
一个选项是#include <stdlib.h>
#include <stdio.h>
int main()
{
// name your variables properly
// initialize them immediately to avoid undefined values
// respect your variables types: use '\0' instead of 0 for chars and 0 instead of NULL for integers
char gender = '\0';
int matrix[2][2] = {{0}};
// display accurate messages to the user
printf("Select a gender (m or f): ");
// don't scan a string if you only need a char
// always check the return of a scan (reason in this answer's comments)
if(scanf("%c", &gender) <= 0)
{
printf("Input error\n");
return 0;
}
// switch is usually more efficient then else-ifs
switch(gender)
{
case 'm':
matrix[0][0] = 1;
break;
case 'f':
matrix[0][0] = 2;
break;
default:
matrix[0][0] = 3;
printf("Invalid gender\n");
break;
}
printf("Selected gender: %c\n", gender);
printf("Value on matrix[0][0]: %i\n", matrix[0][0]);
return 0;
}
,然后执行complete
group_by
paste
-检查OP的“输出”
library(tidyverse)
out1 <- input %>%
complete(SS = 70:100, fill = list(raw = '-')) %>%
group_by(SS) %>%
summarise(raw = if(n() > 1) str_c(range(raw), collapse='-') else raw) %>%
arrange(desc(SS))
out1
# A tibble: 31 x 2
# SS raw
# <dbl> <chr>
# 1 100 -
# 2 99 -
# 3 98 9
# 4 97 -
# 5 96 -
# 6 95 8
# 7 94 -
# 8 93 7
# 9 92 -
#10 91 -
# … with 21 more rows
或者在identical(out1, output)
#[1] TRUE
之前filter
之后使用group_by
步骤
paste
答案 2 :(得分:0)
data.table
解决方案:
input_dt[, .(raw = ifelse(.N == 1, as.character(raw), paste(min(raw), max(raw), sep = '-'))), by = SS
][data.table(SS = 70:100), on = 'SS'
][is.na(raw), raw := '-'
][order(-SS)]
还有base
:
out_2 <- merge(data.frame(SS = 70:100)
, aggregate(raw ~ SS
, data = input
, FUN = function(x) ifelse(length(x) == 1, as.character(x), paste(min(x), max(x), sep = '-')))
, by = 'SS'
, all.x = TRUE
)
out_2$raw <- ifelse(is.na(out_2$raw), '-', out_2$raw)
out_2[order(-out_2$SS), ]
数据:
library(tibble)
input <- tribble(
~raw, ~SS,
0, 75,
1, 78,
2, 80,
3, 83,
4, 83,
5, 83,
6, 90,
7, 93,
8, 95,
9, 98
)
library(data.table)
input_dt <- as.data.table(input)