我有这样的代码:
library(stringdist)
library(stringr)
for (j in seq(1,50))
{
ide <-ide_in_tf3c[[j]] # id
file1 <- paste("/something/can_seq_", ide, ".csv", sep="")
impo1 <- read_delim(file1, "¿", escape_double = FALSE,
col_names = FALSE, comment = "*>", trim_ws = TRUE)
pdb<-impo1$X1
nde<-stringr::str_replace(impo1$X2, '�', '')
tde<-stringr::str_replace(impo1$X3, '�', '')
sec0<-stringr::str_replace(impo1$X4, '�', '')
sec<-stringr::str_replace_all(sec0, '\\\\n', '')
oname1 = paste("df", "_", ide, sep="")
assign(oname1, data.frame(pdb, nde, tde, sec)) # assign
file2 <- paste("/something/cons_", ide, "_c", sep="")
impo2 <- read_csv(file2, col_names = FALSE)
oname2 = paste("cons_", ide, sep="")
assign(oname2, impo2$X1)
# n<-nrow(oname1)
# for (i in seq(1,n))
# {
# y<-adist(oname1$sec[i], oname2)
# print(y)
# }
}
注释掉的部分不起作用,我不确定为什么。
我正在导入csv
格式的50个文件(具有4列和不同的行),并将这50个文件分配给50个数据帧(据我所知)。然后,对具有字符串的文本文件执行相同的操作。我想做的就是将此字符串与来自adist
库的stringdist
的每个文件的第四列进行比较。问题是n
获得值NULL
可能是因为我正在使用paste
获得文件/对象的名称吗?
这是head -n 5 can_seq_P0DTD1.csv
5RE4¿1¿polypeptide(L)¿SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGH\nSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFC\nYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYE\nPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ
5RE6¿1¿polypeptide(L)¿SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGH\nSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFC\nYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYE\nPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ
5RE7¿1¿polypeptide(L)¿SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGH\nSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFC\nYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYE\nPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ
5RE8¿1¿polypeptide(L)¿SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGH\nSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFC\nYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYE\nPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ
5RE9¿1¿polypeptide(L)¿SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGH\nSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFC\nYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYE\nPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ
这是cat cons_P0DTD1_c
SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ
谢谢
答案 0 :(得分:1)
当我们用$
或[[
或[
上,
之后的data.frame
提取单个列时,它将是{{ 1}}。相反,我们可以使用vector
[
在评论行
assign(oname2, impo2['X1'])
'oname1'对象具有字符串值。要获取该字符串的值,请使用
n <- nrow(oname1)
并在循环中
nrow(get(oname1)) # nrow works only if the dataset assigned is data.frame
可以检查结构
for(i in seq(1, n)) {
y <- adist(get(oname1)$sec[i], get(oname2))
print(y)
}