这是How to append a sequential number for every element in a data frame?
的后续活动File
"C:\Users\<me>\AppData\Local\Programs\Python\Python37\lib\site-
packages\unicodecsv\py3.py", line 51, in <genexpr>
f = (bs.decode(encoding, errors=errors) for bs in f)
AttributeError: 'str' object has no attribute 'decode'
输出:
V1 <- c("a", "a", "b", "b", "a","c","b","c")
V2 <- c("e" ,"d", "f" ,"f", "d","d","e","f")
df <- data.frame(V1, V2)
df[] <- paste(col(df), sapply(df, function(x) match(x, unique(x))), as.matrix(df), sep=".")
期望的输出:
V1 V2
1 1.1.a 2.1.e
2 1.1.a 2.2.d
3 1.2.b 2.3.f
4 1.2.b 2.3.f
5 1.1.a 2.2.d
6 1.3.c 2.2.d
7 1.2.b 2.1.e
8 1.3.c 2.3.f
答案 0 :(得分:2)
这是可行的尝试。好奇其他人能否更优雅地实现目标。
# Get a list of available node names
unused_nodes <- data_frame(V2 = paste0("2.", 1:100)) %>%
anti_join(df %>% mutate(V2 = str_sub(V2,1,3))) %>%
pull()
# Get a list of nodes that need to be renamed
dupes <- df %>%
# collect unique combinations of V2 and V1
distinct(V2, V1) %>%
# For each V2, how many V1's was it tied to? Only keep dupes.
group_by(V2) %>% mutate(version_of_V2 = row_number()) %>% ungroup() %>%
filter(version_of_V2 > 1)
dupes$V2_new = paste0(unused_nodes[1:length(dupes$V1)],
str_sub(dupes$V2, -2))
# Bring in the renamed nodes and integrate into original
df %>%
left_join(dupes) %>%
mutate(V2 = if_else(!is.na(V2_new), V2_new, V2)) %>%
select(-version_of_V2, -V2_new)
V1 V2
1 1.1.a 2.1.e
2 1.1.a 2.2.d
3 1.2.b 2.3.f
4 1.2.b 2.3.f
5 1.1.a 2.2.d
6 1.3.c 2.4.d
7 1.2.b 2.5.e
8 1.3.c 2.6.f