我在我的wget
应用程序中使用rails
来从网站获取页面并从中存储一些提取的数据。它顺利开始,但经过一段时间后说can not allocate memory
。
请让我知道如何处理这个问题?
更新(已添加代码)
def self.crawl_my_links
puts "------------------- looping -------------------------------"
valid_domains = get_valid_domains
crawl_links = CrawlLink.all(:conditions => ["server_id = #{Monitoring::SERVER_ID} and crawl_status = 'Assigned'"], :order => "url").shuffle
crawl_links.each do |crawl_link|
url_host = URI.parse(URI.encode(crawl_link.url)).host
next if crawl_link.url.blank? or !crawl_link.url.starts_with?("http")
site_domain = Domainatrix.parse(URI.encode(crawl_link.url)).domain
unless valid_domains.has_key?(site_domain)
logger.info "Domain - '#{site_domain}' not registered in the system "
next
end
url_protocol = crawl_link.url.split('://').first
#if not crawl_link.recently_updated?
html_content = `wget -qO- #{crawl_link.url}`
update_result(crawl_link, html_content)
site_url = "#{url_protocol}://#{url_host}"
#end
crawl_for_new_links(site_url, html_content)
crawl_link.update_attribute(:crawl_status, "Available")
end
sleep(10)
Monitoring.delay.assign_links
puts "++++++++++++++++ DONE ++++++++++++++++++"
end