我想使用python和美丽的汤来消除图表中的值

时间:2018-05-20 08:31:47

标签: web-crawler screen-scraping

这里是我想要从图click here中抓取价值的网站点击此处查看图表 graph image 这是从该页面中删除其他数据的代码 像规格和相关产品 并从我正在进行最后一年项目的页面收集不同的卖家。 要完成项目,我只需要从图表中删除值



def specification(self, request):
  product_name = list()
  product_price = list()
  image_source = list()
  product_href = list()
  rows = list()
  table_colum_1 = list()
  table_colum_2 = list()
  stores = list()
  store_redirect_links = list()
  other_prices = list()
  href = request.GET.get('url')
  l = href
  scrap1 = self.page_load(href)
  content1 = BeautifulSoup(scrap1.text, "html.parser")
  containers = content1.findAll("li", {
    "class": "crsl__itm prd-sldr__itm prd-sldr__itm--s"
  })
  for container in containers:
    name = container.find("div", {
      "class": "prdp-ttl"
    }).get_text()
  print(name)
  price = container.find("div", {
    "class": "prdp-prc price"
  }).get_text()
  print(price)
  source = container.findAll('div')[1]
  print(source)
  source = source.img['data-src']
  href = container.find('a')['href']
  product_name.append(name)
  product_price.append(price)
  image_source.append(source)
  product_href.append(href)
  data_id = content1.find('div', {
    'class': 'crd prd-prmry-crd'
  })
  id = data_id.find('li', {
    'class': 'float--right'
  })
  id = id.find('input', {
    'class': 'aPC'
  })
  pid = id['data-pid']
  info = content1.find('ul', {
   'class': 'nav soft-half--top soft-half--left quick-spec nav--vtop three-cols'
  })
  info_rows = info.findAll('li')
  for row in info_rows:
    rows.append(row.text)
  
  
  
  name = content1.find("h1", {
    "class": "bold txt-xl"
  }).text
  name = name.replace(' Price', '')
  price = content1.find("div", {
    "class",
    "txt-xl bold spcolor lowestPrice"
  }).text
  source = content1.find("li", {
    "class",
    'crsl__itm gllry__itm gllry__itm--s sgllryimgLnkCnt'
  })
  source = source.img['data-src']
  
  if content1.find('div', {
      'class': 'crd-hdr bg--warning'
    }) == None:
    r = content1.find('div', {
      'class': "push-half--bottom"
    })
  if r.find('a') != None:
    redirect = r.find('a')['href']
  redirect_name = r.text
  
  else :
    redirect_name = "out of stock"
  redirect = ""
  
  else :
    redirect_name = "out of stock"
  redirect = ""
  multiple_stores = content1.findAll('div', {
    'class': 'invt__itm inventoryListItem '
  })
  for store in multiple_stores:
    n = store.find('div', {
      'class',
      'soft-half--bottom'
    })
  store_name = n.img['src']
  store_redirect_link = n.find('a')['href']
  other_price = store.find('span', {
    'class',
    'price invt__prc'
  }).text
  if other_price == "-":
    other_price = "Not Available "
  stores.append(store_name)
  store_redirect_links.append(store_redirect_link)
  other_prices.append(other_price)
  tables = content1.findAll("table", {
    "class": "sp"
  })
  for table in tables:
    table_rows = table.findAll('tr')
  i = 0
  for tr in table_rows:
    if (i >= 1):
      colum1 = tr.findAll("td")[0].text
  colum2 = tr.findAll("td")[1].text
  
  table_colum_1.append(colum1)
  table_colum_2.append(colum2)
  i = i + 1
  spec = {
    'index1': range(0, len(product_name)),
    'product_name': product_name,
    'product_price': product_price,
    'image_source': image_source,
    'product_href': product_href,
    'name': name,
    'price': price,
    'source': source,
    'redirect': redirect,
    'redirect_name': redirect_name,
    'pid': pid,
    'index': range(0, len(rows) - 1),
    'rows': rows,
    'total_stores': range(0, len(stores)),
    'stores': stores,
    'store_redirect_links': store_redirect_links,
    'other_prices': other_prices,
    'total_spec': range(0, len(table_colum_1)),
    'table_colum_1': table_colum_1,
    'table_colum_2': table_colum_2,
    'l_p': l
  }
  
  return render(request, 'product_details.html', spec)




现在但我想废弃要在我的页面上显示的图表值

0 个答案:

没有答案