使用python中的bs4查找div元素的所有子级

时间:2018-01-08 07:21:54

标签: python beautifulsoup

#在这里我创建一个所有div的数组作为arrayOfdivAfterContent但是可以得到它....

import urllib2
import csv
import requests
import os, sys
from bs4 import BeautifulSoup
from datetime import datetime
try :
    quote_page = "http://nari.nic.in/schemes?`field_age_group_value[0]=2&field_age_group_value[1]=3&field_age_group_value[2]=4&field_age_group_value[3]=5&field_area_value[0]=2&field_area_value[1]=3&field_area_value[2]=4&field_area_value[3]=5&field_area_value[4]=6&field_state_value[0]=2&field_state_value[1]=3&field_state_value[2]=4&field_state_value[3]=5&field_state_value[4]=6&field_state_value[5]=7&field_state_value[6]=8&field_state_value[7]=9&field_state_value[8]=10&field_state_value[9]=11&field_state_value[10]=12&field_state_value[11]=13&field_state_value[12]=14&field_state_value[13]=15&field_state_value[14]=16&field_state_value[15]=17&field_state_value[16]=18&field_state_value[17]=19&field_state_value[18]=20&field_state_value[19]=21&field_state_value[20]=22&field_state_value[21]=23&field_state_value[22]=24&field_state_value[23]=25&field_state_value[24]=26&field_state_value[25]=27&field_state_value[26]=28&field_state_value[27]=29&field_state_value[28]=30&field_state_value[29]=31&field_state_value[30]=32&field_state_value[31]=33&field_state_value[32]=34&field_state_value[33]=35&field_state_value[34]=36&field_state_value[35]=37&field_state_value[36]=38&page=12"`
    page = urllib2.urlopen(quote_page)

    soup = BeautifulSoup(page, "html.parser")
    headingValue = soup.find_all('h3', attrs={"class": "views-
    accordion-
    schemes-schemedisplay-header"})

    view_content = soup.find('div', attrs={"class":"view-content"})

    arrayOfdivAfterContent = view_content.find_next('div')
except Exception as E:
    print(E)

1 个答案:

答案 0 :(得分:1)

首先,你还没有创建数组

from datetime import datetime
import urllib2
from bs4 import BeautifulSoup
try :
    quote_page = "http://nari.nic.in/schemes?`field_age_group_value[0]=2&field_age_group_value[1]=3&field_age_group_value[2]=4&field_age_group_value[3]=5&field_area_value[0]=2&field_area_value[1]=3&field_area_value[2]=4&field_area_value[3]=5&field_area_value[4]=6&field_state_value[0]=2&field_state_value[1]=3&field_state_value[2]=4&field_state_value[3]=5&field_state_value[4]=6&field_state_value[5]=7&field_state_value[6]=8&field_state_value[7]=9&field_state_value[8]=10&field_state_value[9]=11&field_state_value[10]=12&field_state_value[11]=13&field_state_value[12]=14&field_state_value[13]=15&field_state_value[14]=16&field_state_value[15]=17&field_state_value[16]=18&field_state_value[17]=19&field_state_value[18]=20&field_state_value[19]=21&field_state_value[20]=22&field_state_value[21]=23&field_state_value[22]=24&field_state_value[23]=25&field_state_value[24]=26&field_state_value[25]=27&field_state_value[26]=28&field_state_value[27]=29&field_state_value[28]=30&field_state_value[29]=31&field_state_value[30]=32&field_state_value[31]=33&field_state_value[32]=34&field_state_value[33]=35&field_state_value[34]=36&field_state_value[35]=37&field_state_value[36]=38&page=12"
    page = urllib2.urlopen(quote_page)

    soup = BeautifulSoup(page, "html.parser")
    headingValue = soup.find_all('h3', attrs={"class": "views-accordion-schemes-schemedisplay-header"})
    view_content = soup.find('div', attrs={"class":"view-content"})

    arrayOfdivAfterContent = view_content.find_all('div') # To get list of elements use find_all instead of find_next
except Exception as E:
    print(E)

您找到了下一个元素而不是元素列表