使用线程在rails中擦除数据

时间:2016-10-22 05:14:46

标签: ruby-on-rails

我正在抓取从网站上获取数据到rails中的数据库。我使用此脚本获取32000记录没有任何问题,但我想更快地获取数据,所以我在我的网站中应用了线程rake任务但是在运行rake任务时出现问题,一些数据正在获取,然后rake任务被中止。

如果可以提供任何帮助,我不知道该怎么做我真的很感激。这是我的rake任务代码。

task scratch_to_database: :environment do
  time2 = Time.now
  puts "Current Time : " + time2.inspect
    client = Mechanize.new
    giftcard_types=Giftcard.card_types
    find_all_merchant=Merchant.all.pluck(:id, :name).to_h

    #first index page of the merchant
    index_page = client.get('https://www.twitter.com//')
    document_page_index = Nokogiri::HTML::Document.parse(index_page.body)
    #set all merchant is deteled true
    # set_merchant_as_deleted = Merchant.update_all(is_deleted: true) if Merchant.exists?
    # set_giftcard_as_deleted = Giftcard.update_all(is_deleted: true) if Giftcard.exists?
    update_all_merchant_record = []
    update_all_giftcard_record = []
    threads = []
    #Merchant inner page pagination loop
    page_no_merchant = document_page_index.css('.pagination.pagination-centered ul li:nth-last-child(2) a').text.to_i
    1.upto(page_no_merchant) do |page_number|
        threads <<   Thread.new do
        client.get("https://www.twitter.com/buy-gift-cards?page=#{page_number}") do |page|
            document = Nokogiri::HTML::Document.parse(page.body)

            #Generate the name of the merchant and image of the merchant loop
            document.css('.product-source').each do |item|
               merchant_name= item.children.css('.name').text.gsub("Gift Cards", "")
                href = item.css('a').first.attr('href')
                image_url=item.children.css('.img img').attr('data-src').text.strip
                  #image url to parse the url of the image
                 image_url=URI.parse(image_url)
                 #saving the record of the merchant
                #  @merchant=Merchant.create(name: merchant_name , image_url:image_url)
                  if find_all_merchant.has_value?(merchant_name)
                    puts "this if"
                    merchant_id=find_all_merchant.key(merchant_name)
                    puts merchant_id
                  else
                    @merchant= Merchant.create(name: merchant_name , image_url:image_url)
                    update_all_merchant_record << @merchant.id
                    merchant_id=@merchant.id
                  end
                # @merchant.update_attribute(:is_deleted,  false)
                #set all giftcard is deteled true
                # set_giftcard_as_deleted = Giftcard.where(merchant_id: @merchant.id).update_all(is_deleted: true) if Giftcard.where(merchant_id: @merchant.id).exists?
                 #first page of the giftcard details page
                first_page = client.get("https://www.twitter.com#{href}")
                document_page = Nokogiri::HTML::Document.parse(first_page.body)
                page_no = document_page.css('.pagination.pagination-centered ul li:nth-last-child(2) a').text.to_i
                hrefextra =document_page.css('.dropdown-menu li a').last.attr('href')

                #generate the giftcard details loop with the pagination
                # update_all_record = []
               find_all_giftcard=Giftcard.where(merchant_id:merchant_id).pluck(:row_id)
               puts merchant_name
               #   puts find_all_giftcard.inspect


                     card_page = client.get("https://www.twitter.com#{hrefextra}")
                     document_page = Nokogiri::HTML::Document.parse(card_page.body)

                     #table details to generate the details of the giftcard with price ,per_off and final value of the giftcard

                     document_page.xpath('//table/tbody/tr[@class="toggle-details"]').collect do |row|
                       type1=[]
                         row_id = row.attr("id").to_i

                       row.at("td[2] ul").children.each do |typeli|
                       type = typeli.text.strip if typeli.text.strip.length != 0
                       type1 << type if typeli.text.strip.length != 0
                       end

                         value = row.at('td[3]').text.strip
                         value = value.to_s.tr('$', '').to_f

                         per_discount = row.at('td[4]').text.strip
                         per_discount = per_discount.to_s.tr('%', '').to_f

                         final_price = row.at('td[5] strong').text.strip
                         final_price = final_price.to_s.tr('$', '').to_f

                         type1.each do |type|
                            if find_all_giftcard.include?(row_id)
                              update_all_giftcard_record<<row_id
                              puts "exists"
                            else
                              puts "new"
                           @giftcard= Giftcard.create(card_type: giftcard_types.values_at(type.to_sym)[0], card_value:value, per_off:per_discount, card_price: final_price, merchant_id: merchant_id , row_id: row_id )
                           update_all_giftcard_record << @giftcard.row_id
                           end
                         end
                         #saving the record of the giftcard
                           # @giftcard=Giftcard.create(card_type:1, card_value:value, per_off:per_discount, card_price: final_price, merchant_id: @merchant.id , gift_card_type: type1)
                     end
                     # Giftcard.where(:id =>update_all_record).update_all(:is_deleted => false)

                #delete all giftcard which is not present
                # giftcard_deleted = Giftcard.where(:is_deleted => true,:merchant_id => @merchant.id).destroy_all if Giftcard.where(merchant_id: @merchant.id).exists?
          time2 = Time.now
          puts "Current Time : " + time2.inspect
            end
        end
        end
    end
    threads.each(&:join)
        puts "-------"
        puts threads
    # merchant_deleted = Merchant.where(:is_deleted => true).destroy_all if Merchant.exists?
    merchant_deleted = Merchant.where('id NOT IN (?)',update_all_merchant_record).destroy_all if Merchant.exists?
    giftcard_deleted = Giftcard.where('row_id NOT IN (?)',update_all_giftcard_record).destroy_all if Giftcard.exists?
end

我收到的错误: ActiveRecord :: ConnectionTimeoutError:无法在5.000秒内从池中获取连接(等待5.001秒);所有汇集的连接都在使用中

1 个答案:

答案 0 :(得分:0)

每个线程都需要单独连接到您的数据库。您需要增加应用程序可以在database.yml文件中使用的连接池大小。

但是您的数据库也应该能够处理传入的连接。如果您使用的是mysql,可以通过在控制台上运行select @@MAX_CONNECTIONS来检查这一点。