我有一个ruby脚本从文件(genbank)中提取信息,我想将这些数据加载到数据库中。我创建了模型和模式以及连接脚本:
require 'active_record'
def establish_connection(db_location= "protein.db.sqlite3")
ActiveRecord::Base.establish_connection(
:adapter => "sqlite3",
:database => db_location,
:pool => 5,
:timeout => 5000
)
end
这是我输出数据的脚本:
require 'rubygems'
require 'bio'
require 'snp_db_models'
establish_connection
snp_positions_file = File.open("snp_position.txt")
outfile = File.open("output.txt", "w")
genome_sequence = Bio::FlatFile.open(Bio::EMBL, "ref.embl").next_entry
snp_positions = Array.new
snp_positions_file.gets # header line
while line = snp_positions_file.gets
snp_details = line.chomp.split("\t")
snp_seq = snp_details[1]
snp_positions << snp_details[1].to_i
end
mean_snp_per_base = snp_positions.size/genome_sequence.sequence_length.to_f
puts "Mean snps per base: #{mean_snp_per_base}"
#outfile = File.open("/Volumes/DataRAID/Projects/GAS/fastq_files/bowtie_results/snp_annotation/genes_with_higher_snps.tsv", "w")
outfile.puts("CDS start\tCDS end\tStrand\tGene\tLocus_tag\tnote\tsnp_ID\ttranslation_seq\tProduct\tNo_of_snps_per_gene\tsnp_rate_vs_mean")
genome_sequence.features do |feature|
if feature.feature !~ /gene/i && feature.feature !~ /source/i
start_pos = feature.locations.locations.first.from
end_pos = feature.locations.locations.first.to
number_of_snps_in_gene = (snp_positions & (start_pos..end_pos).to_a).size # intersect finds number of times snp occurs within cds location
mean_snp_per_base_in_gene = number_of_snps_in_gene.to_f/(end_pos - start_pos)
outfile.print "#{start_pos}\t"
outfile.print "#{end_pos}\t"
if feature.locations.locations.first.strand == 1
outfile.print "forward\t"
else
outfile.print "reverse\t"
end
qualifiers = feature.to_hash
["gene", "locus_tag", "note", "snp_id", "translation", "product"].each do |qualifier|
if qualifiers.has_key?(qualifier) # if there is gene and product in the file
# puts "#{qualifier}: #{qualifiers[qualifier]}"
outfile.print "#{qualifiers[qualifier].join(",")}\t"
else
outfile.print " \t"
end
end
outfile.print "#{number_of_snps_in_gene}\t"
outfile.print "%.2f" % (mean_snp_per_base_in_gene/mean_snp_per_base)
outfile.puts
end
end
outfile.close
如何将outfile.txt中的数据加载到数据库中。我是否必须做像马歇尔转储这样的事情?
提前致谢
标记
答案 0 :(得分:0)
您可以编写一个rake任务来执行此操作。保存在lib/tasks
中,并为其.rake
分机。
desc "rake task to load data into db"
task :load_data_db => :environment do
...
end
由于加载了rails环境,您可以像在任何Rails模型/控制器中一样直接访问模型。当然,它将根据执行rake任务时定义的环境变量连接到数据库。
答案 1 :(得分:0)
仅仅在一个脚本中,你的模型是未知的。
您必须定义一个最小值来使用它们,就像在Rails应用程序中一样。只需声明它们:
class Foo << ActiveRecord:Base
end
否则,在Rails上下文中,使用了解Rails应用程序详细信息的rake任务。