我试图通过sitemap_generator gem添加并行化来加快parallel。我有以下代码,但我的小组没有写入public / sitemaps目录。我想这是因为lambda在并行的不同空间中执行。任何反馈都会有所帮助。谢谢!
#!/usr/bin/env ruby
require 'rubygems'
require 'sitemap_generator'
require 'benchmark'
require 'parallel'
require 'random-word'
SitemapGenerator::Sitemap.default_host = "http://localhost"
a = lambda {
SitemapGenerator::Sitemap.group(:filename => :biz, :sitemaps_path => 'sitemaps/biz/') do
(1..1000).each do |index|
url = "/#{RandomWord.adjs.next}/#{RandomWord.nouns.next}"
add url, :priority => 0.8
end
end
}
b = lambda {
SitemapGenerator::Sitemap.group(:filename => :wedding_ugc, :sitemaps_path => 'sitemaps/ugc') do
(1..1000).each do |index|
url = "/#{RandomWord.adjs.next}/#{RandomWord.nouns.next}"
add url, :priority => 0.8
end
end
}
#working example
# SitemapGenerator::Sitemap.default_host = "http://localhost"
# SitemapGenerator::Sitemap.create(:compress => false) do
# group(:filename => :biz, :sitemaps_path => 'sitemaps/biz/') do
# (1..1000).each do |index|
# url = "/#{RandomWord.adjs.next}/#{RandomWord.nouns.next}"
# add url, :priority => 0.8
# end
# end
# end
puts Time.now
Parallel.each([a,b]){|job| job.call()}
puts Time.now
答案 0 :(得分:1)
我得到了这个工作,并在github上发布了解决方案here
以下是url被破坏的代码。
SitemapGenerator::Sitemap.create(:compress => false, :create_index => false) do
group1 = lambda {
group = sitemap.group(:filename => :group1, :sitemaps_path => 'sitemaps/group1') do
Record.find_each do |record|
add '/record/path'
end
end
group.sitemap.write unless group.sitemap.written? #write if not full
}
# group2 like above...
Parallel.each([group1, group2], :in_processes => 8) do |group|
group.call
end
end
#regenerate the index sitemap xml file because I couldn't figure out how to track it with multiple processes
SitemapGenerator::Sitemap.create(:compress => false) do
Dir.chdir(sitemap.public_path.to_s)
xml_files = File.join("**", "sitemaps", "**", "*.xml")
xml_file_paths = Dir.glob(xml_files)
xml_file_paths.each do |file|
add file
end
end