from BeautifulSoup import BeautifulSoup
import requests
import re
person_dict = {}
soup = BeautifulSoup(response.text)
div = soup.find('div', {'id': 'object-a'})
ul = div.find('ul', {'id': 'object-a-1'})
li_a = ul.findAll('a', {'class': 'title'})
li_p = ul.findAll('p', {'class': 'url word'})
li_po = ul.findAll('p')
for a in li_a:
nametemp = a.text
name = (nametemp.split(' - ')[0])
person_dict.update({'Name': name}) #I attempted updating
for lip in li_p:
person_dict['url'] = lip.text #I attempted adding directly
for email in li_po:
reg_emails = re.compile('[a-zA-Z0-9.]*' + '@')
person_dict['email'] = reg_emails.findall(email.text)
print person_dict # results in 1 entry being returned
<div id="object-a">
<ul id="object-a-1">
<a href="www.url.com/person" class="title">Person1</a>
<p class="url word">www.url.com/Person1</p>
<p>Person 1, some foobar possibly an email@address.com ...</p>
<a href="www.url.com/person" class="title">Person2</a>
<p class="url word">www.url.com/Person1</p>
<p>Person 2, some foobar possibly an email@address.com ...</p>
<a href="www.url.com/person" class="title">Person3</a>
<p class="url word">www.url.com/Person1</p>
<p>Person 3, some foobar, possibly an email@address.com ...</p>
答案 0 :(得分:1)
我建议您将所有条目存储在列表中。以下代码显示了两个建议,使用private void setDateTimeField () {
final Calendar newCalendar = Calendar.getInstance();
mDatePickerDialog = new DatePickerDialog(AddBirthday.this, new OnDateSetListener() {
public void onDateSet(DatePicker view, int year, int monthOfYear, int dayOfMonth) {
Calendar newDate = Calendar.getInstance();
newDate.set(year, monthOfYear, dayOfMonth);
mYear = c.get(Calendar.YEAR);
getAge = mYear - year;
if (getAge == 0) {
SuperActivityToast.create(AddBirthday.this, "Invalid Date of Birthday!",
SuperToast.Duration.SHORT, Style.getStyle(Style.RED, SuperToast.Animations.FLYIN)).show();
} else {
dateSelected = String.valueOf(dayOfMonth) + " /" + String.valueOf(monthOfYear + 1)
+ " /" + String.valueOf(year);
SuperActivityToast.create(AddBirthday.this, "Notification set for: " + dayOfMonth + "/"
+ (monthOfYear + 1) + "/" + year, SuperToast.Duration.SHORT, Style.getStyle(Style.RED, SuperToast.Animations.FLYIN))
}, newCalendar.get(Calendar.YEAR), newCalendar.get(Calendar.MONTH), newCalendar.get(Calendar.DAY_OF_MONTH));
# Two possible ways of storing your data: a list of tuples, or a list of dictionaries
entries_tuples = []
entries_dictionary = []
soup = BeautifulSoup(text)
div = soup.find('div', {'id': 'object-a'})
ul = div.find('ul', {'id': 'object-a-1'})
for li in ul.findAll('li'):
title = li.find('a', {'class': 'title'})
url_href = title.get('href')
person = title.text
url_word = li.find('p', {'class': 'url word'}).text
emails = re.findall(r'\s+(\S+@\S+)(?:\s+|\Z)', li.findAll('p')[1].text, re.M) # allow for multiple emails
entries_tuples.append((url_href, person, url_word, emails))
entries_dictionary.append({'url_href' : url_href, 'person' : person, 'url_word' : url_word, 'emails' : emails})
for url_href, person, url_word, emails in entries_tuples:
print '{:25} {:10} {:25} {}'.format(url_href, person, url_word, emails)
for entry in entries_dictionary:
print '{:25} {:10} {:25} {}'.format(entry['url_href'], entry['person'], entry['url_word'], entry['emails'])
答案 1 :(得分:0)
from BeautifulSoup import BeautifulSoup
import re
text = open('soup.html') # You are opening the file differently
soup = BeautifulSoup(text)
list_items = soup.findAll('li')
people = []
for item in list_items:
name = item.find('a', {'class': 'title'}).text
url = item.find('p', {'class': 'url word'}).text
email_text = item.findAll('p')[1].text
match = re.search(r'[\w\.-]+@[\w\.-]+', email_text)
email = match.group(0)
person = {'name': name, 'url': url, 'email': email}
print people