我目前正在尝试清理我从Twitter上删除的一些数据但是收到错误 TypeError:' NoneType'当我尝试打印出stamp_print变量的文本(只是一个简单的时间戳)时,对象是不可调用的。我不太确定我做错了什么。
import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re
my_url = "https://twitter.com/realDonaldTrump?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
tweets = page_soup.findAll("p",{"class":"TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"})
time_stamp = page_soup.find_all("a",{"class":"tweet-timestamp"})
i = 0
for tweet in tweets:
stamp = time_stamp[i]
stamp_print = stamp.match('[0-9]+\s\w+|[0-9]+\sw+')
print(stamp_print.get_text)
i+=1
print('---------------------------------------------------------------------')
print(tweet.get_text())
print('\n')
答案 0 :(得分:-1)
这最终得到了工作,谢谢。
import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re
my_url = "https://twitter.com/realDonaldTrump?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
tweets = page_soup.findAll("p",{"class":"TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"})
time_stamp = page_soup.find_all("a",{"class":"tweet-timestamp"})
i = 0
for tweet in tweets:
stamp = time_stamp[i]
print(re.findall(r'[0-9]+\s\w+|\w+\s[0-9]+', stamp.get_text()))
i+=1
print('---------------------------------------------------------------------')
print(tweet.get_text())
print('\n')