Question

这是我编写的脚本的一部分，用于在使用merge()后更充分地合并列。如果两个数据集都有一个名称相同的列merge()，则会为您提供column.x和column.y列。
我编写了一个脚本来将这些数据放在一起，并删除不需要的列（column.y和column.x_error，我添加了一列，以便在{{1}时发出警告我希望将dat$column.x != dat$column.y)重命名为column.x，以减少数据集中不需要的手动操作。我还没有设法将column重命名为column.x ，请参阅代码了解更多信息。

column来自dat

dat = merge(data1,data2, by= "ID", all.x=TRUE)

#obtain a list of double columns dubbelkol = cbind() sorted = sort(names(dat)) for(i in as.numeric(1:length(names(dat)))) { if(grepl(".x",sorted[i])){ if (grepl(".y", sorted[i+1]) && (sub(".x","",sorted[i])==sub(".y","",sorted[i+1]))){ dubbelkol = cbind(dubbelkol,sorted[i],sorted[i+1]) } } } #Check data, fill in NA in column.x from column.y if poss temp = cbind() for (p in as.numeric(1:(length(dubbelkol)-1))){ if(grepl(".x",dubbelkol[p])){ dat[dubbelkol[p]][is.na(dat[dubbelkol[p]])] = dat[dubbelkol[p+1]][is.na(dat[dubbelkol[p]])] temp = (dat[dubbelkol[p]] != dat[dubbelkol[p+1]]) colnames(temp) = (paste(dubbelkol[p],"_error", sep="")) dat[colnames(temp)] = temp } } #If every value in "column.x_error" is TRUE or NA, delete "column.y" and "column.x_error" #Rename "column.x" to "column" #from here until next comment everything works droplist= c() for (k in as.numeric(1:length(names(dat)))) { if (grepl(".x_error",colnames(dat[k]))) { if (all(dat[k]==FALSE, na.rm = TRUE)) { droplist = c(droplist,colnames(dat[k]), sub(".x_error",".y",colnames(dat[k]))) #the next line doesnt work, it's supposed to turn the .x column back to "" before the .y en .y_error columns are dropped. colnames(dat[sub(".x_error",".x",colnames(dat[k]))])= paste(sub(".x_error","",colnames(dat[k]))) } } } dat = dat[,!names(dat) %in% droplist]会很快给我paste(sub(".x_error","",colnames(dat[k])))，但"BNR"不会更改colnames(...) = ...中的列名。

知道出了什么问题吗？

dat

Answer 1

我建议更换：

sub(".x_error",".x",colnames(dat[k]))]

使用：

sub("\\.x_error", "\\.x", colnames(dat[k]))]

如果您想要替换实际的.。您必须使用.转义\\.。正则表达式中的.表示any character。

更好的是，由于您要用.替换.，为什么不说：

sub("x_error", "x", colnames(dat[k]))]

（或）如果除了_error之外没有其他x_error，只需：

sub("_error", "", colnames(dat[k]))]

编辑：问题似乎是您的数据格式似乎在左侧和右侧加载了其他列。您可以先选择所需的列然后合并。

d1 <- read.table(textConnection("| ID | BNR   | 
|  1 | 123   | 
|  2 | 234   |
|  3 | NA    | 
|  4 | 456   | 
|  5 | 677   |
|  6 | NA    |"), sep = "|", header = TRUE, stringsAsFactors = FALSE)[,2:3]

d1$BNR <- as.numeric(d1$BNR)

d2 <- read.table(textConnection("|  1 | 123   | 
|  2 | 234   |
|  3 | 345   | 
|  4 | 456   | 
|  5 | 677   |
|  6 | NA    |"), header = FALSE, sep = "|", stringsAsFactors = FALSE)[,2:3]

names(d2) <- c("ID", "BNR")
d2$BNR <- as.numeric(d2$BNR)

# > d1
#   ID BNR
# 1  1 123
# 2  2 234
# 3  3  NA
# 4  4 456
# 5  5 677
# 6  6  NA

# > d2
#   ID BNR
# 1  1 123
# 2  2 234
# 3  3 345
# 4  4 456
# 5  5 677
# 6  6  NA

dat <- merge(d1, d2, by="ID", all=T)
> dat

#   ID BNR.x BNR.y
# 1  1   123   123
# 2  2   234   234
# 3  3    NA   345
# 4  4   456   456
# 5  5   677   677
# 6  6    NA    NA

# replace all NA values in x from y
dat$BNR.x <- ifelse(is.na(dat$BNR.x), dat$BNR.y, dat$BNR.x)

# now remove y
dat$BNR.y <- null

为什么我的列名称不能在R中更改？

1 个答案: