global twitter_user_info
twitter_user_info=[]
def get_user_info(twitter_user):
"""
An example of using the query_user_info method
:param twitter_user: the twitter user to capture user data
:return: twitter_user_data: returns a dictionary of twitter user data
"""
user_info = query_user_info(user=twitter_user)
twitter_user_data = {}
twitter_user_data["user"] = user_info.user
twitter_user_data["fullname"] = user_info.full_name
twitter_user_data["location"] = user_info.location
twitter_user_data["blog"] = user_info.blog
twitter_user_data["date_joined"] = user_info.date_joined
twitter_user_data["id"] = user_info.id
twitter_user_data["num_tweets"] = user_info.tweets
twitter_user_data["following"] = user_info.following
twitter_user_data["followers"] = user_info.followers
twitter_user_data["likes"] = user_info.likes
twitter_user_data["lists"] = user_info.lists
return twitter_user_data
absd=[]
def main():
start = time.time()
csv = pd.read_csv('operationbandar_users.csv')
users = csv['username']
pool = Pool(4)
for user in pool.map(get_user_info,users):
twitter_user_info.append(user)
cols=['id','fullname','date_joined','location','blog', 'num_tweets','following','followers','likes','lists']
data_frame = pd.DataFrame(twitter_user_info, index=absd, columns=cols)
data_frame.index.name = "Users"
data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last')
elapsed = time.time() - start
print(f"Elapsed time: {elapsed}")
display(data_frame)
此代码返回以下错误:
multiprocessing.pool.RemoteTraceback: “” 追溯(最近一次通话): 在工作程序中的文件“ /home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py”,第121行 结果=(真,func(* args,* kwds)) 在mapstar中,文件“ /home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py”,第44行 返回列表(map(* args)) get_user_info中的文件“ examples / get_twitter_user_data_1.py”,第43行
twitter_user_data [“ user”] = user_info.user
AttributeError:“ NoneType”对象没有属性“ user” “”“
上面的异常是以下异常的直接原因:
追溯(最近一次通话): 在第89行的文件“ examples / get_twitter_user_data_1.py”中 主要() 主文件中的文件“ examples / get_twitter_user_data_1.py”,第66行 适用于pool.map(get_user_info,users)中的用户: 地图中的文件“ /home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py”,第268行 返回self._map_async(func,可迭代,mapstar,chunksize).get() 在get中,文件“ /home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py” 提高自我。_value
AttributeError:“ NoneType”对象没有属性“ user”
我在这里用pool.map和传递的字典作为参数搜索了可能的情况,但是找不到问题。我是python的新手。有人可以帮忙吗?
query_user_info()
def query_user_info(user):
"""
Returns the scraped user data from a twitter user page.
:param user: the twitter user to web scrape its twitter page info
"""
try:
user_info = query_user_page(INIT_URL_USER.format(u=user))
if user_info:
logger.info("Got user information from username {}".format(user))
return user_info
except KeyboardInterrupt:
logger.info("Program interrupted by user. Returning user information gathered so far...")
except BaseException:
logger.exception("An unknown error occurred! Returning user information gathered so far...")
logger.info("Got user information from username {}".format(user))
return user_info
query_user_page()
def query_user_page(url, retry=10, timeout=60):
"""
Returns the scraped user data from a twitter user page.
:param url: The URL to get the twitter user info from (url contains the user page)
:param retry: Number of retries if something goes wrong.
:return: Returns the scraped user data from a twitter user page.
"""
print("reached url:",url)
try:
proxy = next(proxy_pool)
logger.info('Using proxy {}'.format(proxy))
response = requests.get(url, headers=HEADER, proxies={"http": proxy})
html = response.text or ''
user_info = User.from_html(html)
if not user_info:
return None
return user_info
except requests.exceptions.HTTPError as e:
logger.exception('HTTPError {} while requesting "{}"'.format(
e, url))
except requests.exceptions.ConnectionError as e:
logger.exception('ConnectionError {} while requesting "{}"'.format(
e, url))
except requests.exceptions.Timeout as e:
logger.exception('TimeOut {} while requesting "{}"'.format(
e, url))
if retry > 0:
logger.info('Retrying... (Attempts left: {})'.format(retry))
return query_user_page(url, retry-1)
logger.error('Giving up.')
return None