NoneType对象不可调用Python打印正则表达式

时间:2018-04-28 16:01:49

标签: regex python-3.x beautifulsoup

我目前正在尝试清理我从Twitter上删除的一些数据但是收到错误 TypeError:' NoneType'当我尝试打印出stamp_print变量的文本(只是一个简单的时间戳)时,对象是不可调用的。我不太确定我做错了什么。

import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re  

my_url = "https://twitter.com/realDonaldTrump?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()


page_soup = soup(page_html, "html.parser")

tweets = page_soup.findAll("p",{"class":"TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"})
time_stamp = page_soup.find_all("a",{"class":"tweet-timestamp"})

i = 0
for tweet in tweets:
    stamp = time_stamp[i]
    stamp_print = stamp.match('[0-9]+\s\w+|[0-9]+\sw+')
    print(stamp_print.get_text)
    i+=1
    print('---------------------------------------------------------------------')
    print(tweet.get_text())
    print('\n')

1 个答案:

答案 0 :(得分:-1)

这最终得到了工作,谢谢。

import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re  

my_url = "https://twitter.com/realDonaldTrump?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()


page_soup = soup(page_html, "html.parser")

tweets = page_soup.findAll("p",{"class":"TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"})
time_stamp = page_soup.find_all("a",{"class":"tweet-timestamp"})

i = 0
for tweet in tweets:
    stamp = time_stamp[i]
    print(re.findall(r'[0-9]+\s\w+|\w+\s[0-9]+', stamp.get_text())) 
    i+=1
    print('---------------------------------------------------------------------')
    print(tweet.get_text())
    print('\n')