我试图从pickle文件夹中导入videoId,然后打印youtube视频的描述。我收到此错误='bytes'对象没有属性'findAll',我想提取包含videoid和title的描述在csv中
cwd=os.getcwd()
parent_folder=os.path.join(cwd,'Data') pickle_out=open("C:/Users/india/Desktop/PARUL/Data/Pickle/vid_ids_dict.pickle","rb")
vid_id_dict=pickle.load(pickle_out)
dataset_folder=os.path.join(parent_folder,"Dataset")
if not os.path.exists(dataset_folder):
os.makedirs(dataset_folder)
csv_file_path= os.path.join(parent_folder,'main.csv')
base = "https://www.youtube.com/watch?v="
for keys, values in vid_id_dict.items():
for key in keys:
query_dataset_folder=os.path.join(dataset_folder,key)
if not os.path.exists(query_dataset_folder):
os.makedirs(query_dataset_folder)
for VidID in values:
r = requests.get(base+VidID)
soup = bs(r.text,'html.parser').encode("utf-8")
name=VidID+".txt"
save_description_link=os.path.join(query_dataset_folder,name)
f= open(save_description_link,"a+")
for title in soup.findAll('p', attrs={'id': 'eow-description'}):
description=title.text.strip()
f.write(description)
print(description)
f.close()
for title in soup.findAll('span', attrs={'class': 'watch-title'}):
vid_title= title.text.strip()
print(vid_title)
with open(csv_file_path, 'a+') as csvfile:
fieldnames = ['Video id', 'Title','Description','Category']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({'Video id': VidID, 'Title': vid_title, 'Description':description,'Category':key})
错误- AttributeError Traceback(最近一次通话) 在 35 f =打开(save_description_link,“ a +”) 36 ---> 37为soup.findAll('p',attrs = {'id':'eow- 描述'}): 38 description = title.text.strip() 39 f.write(描述)
AttributeError:“字节”对象没有属性“ findAll”
答案 0 :(得分:0)
我假设您正在使用 requests 模块来获取http响应。
只需将代码替换为:
r.encoding = 'utf-8'
soup = bs(req.content, 'html.parser')
最终运行代码:
for keys, values in vid_id_dict.items():
for key in keys:
query_dataset_folder=os.path.join(dataset_folder,key)
if not os.path.exists(query_dataset_folder):
os.makedirs(query_dataset_folder)
for VidID in values:
r = requests.get(base+VidID)
r.encoding = 'utf-8'
soup = bs(r.content, 'html.parser')
name=VidID+".txt"
save_description_link=os.path.join(query_dataset_folder,name)
f= open(save_description_link,"a+")
for title in soup.findAll('p', attrs={'id': 'eow-description'}):
description=title.text.strip()
f.write(description)
print(description)
f.close()
for title in soup.findAll('span', attrs={'class': 'watch-title'}):
vid_title= title.text.strip()
print(vid_title)
with open(csv_file_path, 'a+') as csvfile:
fieldnames = ['Video id', 'Title','Description','Category']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({'Video id': VidID, 'Title': vid_title, 'Description':description,'Category':key})