我有两个文件,一个包含特定的染色体位置,另一个包含gene_name和基因的起始范围。我必须通过匹配基因的起始位置到染色体位置来找到基因名称。我的第一个文件的格式如下所示
Chromosome Position
1 394
1 447
2 534
我的第二个文件的格式是:
gene_name chromoome start end
pqr 1 201 230
sbc 1 300 450
ffg 2 500 550
我尝试过以下代码
setwd('/home/R/')
data = read.table(file='outfile.tsv', fill = TRUE)
data1 = read.table(file='Sample.tsv')
chr = data1[,1]
pos = data1[,2]
gene = data[,1]
beg = data[,3]
end = data[,4]
pos_sz = dim.data.frame(pos)
beg_sz = dim.data.frame(beg)
end_sz = dim.data.frame(end)
for (i in 1:length(pos))
{
pos_1 = pos[i]
x = pos_1>=beg & pos_1<=end
print(x)
if(any(x == "TRUE"))
{
t=pos[i]
print(t)
s = which(pos == t)
print(s)
v= chr[s]
print(v)
}
y=which(c(x))
print(y)
z=gene[y]
print(z)
}
我希望结果格式低于
gene_name Chromosome #chromosome against position
sbc 1
sbc 1
ffg 2
任何帮助都是适当的
答案 0 :(得分:0)
您可以尝试使用func cleanUp() {
let maximumDays = 10.0
let minimumDate = Date().addingTimeInterval(-maximumDays*24*60*60)
func meetsRequirement(date: Date) -> Bool { return date < minimumDate }
func meetsRequirement(name: String) -> Bool { return name.hasPrefix(applicationName) && name.hasSuffix("log") }
do {
let manager = FileManager.default
let documentDirUrl = try manager.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: false)
if manager.changeCurrentDirectoryPath(documentDirUrl.path) {
for file in try manager.contentsOfDirectory(atPath: ".") {
let creationDate = try manager.attributesOfItem(atPath: file)[FileAttributeKey.creationDate] as! Date
if meetsRequirement(name: file) && meetsRequirement(date: creationDate) {
try manager.removeItem(atPath: file)
}
}
}
}
catch {
print("Cannot cleanup the old files: \(error)")
}
}
包
GenomicRanges
或者通过合并
library(GenomicRanges)
# data
target <- read.table(text="gene_name chromoome start end
pqr 1 201 230
sbc 1 300 450
ffg 2 500 550", header=T)
# set up GRange objects
d <- GRanges(c(1,1,2), IRanges(c(394,447,534), width=1))
target_range <- GRanges(target$chromoome, IRanges(start=target$start, end=target$end))
# get overlaps
OL <- findOverlaps(d, target_range)
target[as.data.frame(OL)[,2],]
gene_name chromoome start end
2 sbc 1 300 450
2.1 sbc 1 300 450
3 ffg 2 500 550