我有以下Rake文件。使用RoR 2.3.8。
desc "Create shops sitemap"
task(:shops => :environment) do
sitemap = Sitemap.new
#add every item
for i in shop.find(:all, :select => 'id, updated_at', :order => 'updated_at DESC', :limit => 50000)
sitemap.add_url("http://abc.com/shops/#{i.id}",w3c_date(i.updated_at),'daily','1.0')
end
puts "#{sitemap.urls.length} total urls"
#delete the file
FileUtils.rm(File.join(RAILS_ROOT, "public/sitemap_shops_1.xml.gz"), :force => true)
f =File.new(File.join(RAILS_ROOT, "public/sitemap_shops_1.xml"), 'w')
sitemap.write(f,2)
f.close
system("gzip #{File.join(RAILS_ROOT, 'public/sitemap_shops_1.xml')}")
end
上述文件根据上次更新搜索前50,000条记录,然后保存在编号为1的文件中。
如何修改代码以使其搜索下一个50,000,并保存编号为2的文件,然后保存下一个50,000,保存为编号为3的文件等?
感谢。
答案 0 :(得分:2)
您可以使用find
代替find_in_batches
,而:batch_size
将一次返回1,000个组(但您可以使用find_in_batches
选项将其覆盖为50,000)。抛出一个计数器变量(因为我不认为each_with_index
有desc "Create shops sitemap"
task(:shops => :environment) do
file_name_index = 0
Shop.find_in_batches(:all, :select => 'id, updated_at', :order => 'updated_at DESC', :batch_size => 50000) do |group_of_50000|
file_name_index += 1
sitemap = Sitemap.new
#add every item
for i in group_of_50000
sitemap.add_url("http://abc.com/shops/#{i.id}",w3c_date(i.updated_at),'daily','1.0')
end
puts "#{sitemap.urls.length} total urls"
#delete the file
FileUtils.rm(File.join(RAILS_ROOT, "public/sitemap_shops_#{file_name_index}.xml.gz"), :force => true)
f =File.new(File.join(RAILS_ROOT, "public/sitemap_shops_#{file_name_index}.xml"), 'w')
sitemap.write(f,2)
f.close
system("gzip #{File.join(RAILS_ROOT, 'public/sitemap_shops_#{file_name_index}.xml')}")
end
end
之类的东西),你可以处理你需要的所有文件。
{{1}}