通过文本文件中的转换表替换文本文件中的值

时间:2015-10-20 10:51:21

标签: r text-files

我有以下文本文件(T1.txt):

"se" "su"
"1" "<{OV1}>" 0.0754101809002945
"2" "<{OV2},{OV4}>" 0.071729070256626
"3" "<{OV3},{OV5},{OV2},{OV4}>" 0.0703618005889777
"4" "<{OV1},{OV3},{OV4}>" 0.0704669751787968

另外,我在文本文件(TR.txt)中有以下转换表:

"items" "newitem"
"1" "BA31 U1" "OV1"
"2" "BECE D2" "OV2"
"3" "26D695 UPE" "OV3"
"4" "2BC5A DATA; BreE 124" "OV4"
"5" "659 INTS" "OV5"

我希望通过在转换文本文件中用等效项替换源表中的每个值来获得以下内容:

"se" "su"
"1" "<{BA31 U1}>" 0.0754101809002945
"2" "<{BECE D2},{2BC5A DATA; BreE 124}>" 0.071729070256626
"3" "<{26D695 UPE},{659 INTS},{BECE D2},{2BC5A DATA; BreE 124}>" 0.0703618005889777
"4" "<{BA31 U1},{26D695 UPE},{2BC5A DATA; BreE 124}>" 0.0704669751787968

2 个答案:

答案 0 :(得分:1)

dat1 <- read.table("T1.txt", head=T, as.is=T)
dat2 <- read.table("TR.txt", head=T, as.is=T)

> dat1
                         se         su
1                   <{OV1}> 0.07541018
2             <{OV2},{OV4}> 0.07172907
3 <{OV3},{OV5},{OV2},{OV4}> 0.07036180
4       <{OV1},{OV3},{OV4}> 0.07046698

> dat2
                 items newitem
1              BA31 U1     OV1
2              BECE D2     OV2
3           26D695 UPE     OV3
4 2BC5A DATA; BreE 124     OV4
5             659 INTS     OV5

for (i in 1:nrow(dat2)) {
  dat1$se <- gsub(dat2$newitem[i], dat2$items[i], dat1$se)
}

> dat1
                                                          se         su
1                                                <{BA31 U1}> 0.07541018
2                         <{BECE D2},{2BC5A DATA; BreE 124}> 0.07172907
3 <{26D695 UPE},{659 INTS},{BECE D2},{2BC5A DATA; BreE 124}> 0.07036180
4            <{BA31 U1},{26D695 UPE},{2BC5A DATA; BreE 124}> 0.07046698

答案 1 :(得分:1)

sour <- read.table("Source.txt", head=T, as.is=T, row.names=1)
trans <- read.table("Transofrmation Table.txt", head=T, as.is=T, row.names=1)

dim(sour)
dim(trans)
sour[1, ]
trans[1, ]

trans.tab <- trans$items
names(trans.tab) <- trans$newitem

for (i in 1:nrow(sour)) {
  x1 <- unlist(strsplit(sour$sequence[i], split="\\{|\\}"))
  x1 <- x1[-c(1, length(x1))]
  x1 <- x1[x1!=","]
  x2 <- rep("{", length(x1))
  x3 <- rep("},", length(x1))
  y <- paste(x2, trans.tab[x1], x3, sep="", collapse="")
  y <- gsub(",$", "", y)
  y <- paste("<", y, ">", sep="")
  sour$sequence[i] <- y
}

write.table(sour, file="Source.new.txt", quote=F, sep="\t", row.names=F)