我无法自动执行以下代码以转到下一页并从Indeed.com抓取数据。请让我知道如何处理此问题。
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd
import time
URL = "https://www.indeed.com/jobs?q=Amazon&l="
# Get the html info of the page
page = requests.get(URL)
soup = BeautifulSoup(page.text, "html.parser")
# Get the job title
def extract_job_title_from_result(soup):
jobs = []
for div in soup.find_all(name="div",attrs={"class":"row"}):
for a in div.find_all(name="a", attrs={"data-tn-element":"jobTitle"}):
jobs.append(a["title"])
return(jobs)
extract_job_title_from_result(soup)
# Get company name
def extract_company_from_result(soup):
companies = []
for div in soup.find_all(name="div", attrs={"class":"row"}):
company = div.find_all(name="span", attrs={"class":"company"})
if len(company) > 0:
for b in company:
companies.append(b.text.strip())
else:
sec_try = div.find_all(name="span", attrs={"class":"result-link-source"})
for span in sec_try:
companies.append(span.text.strip())
return(companies)
extract_company_from_result(soup)
ocations = extract_location_from_result(soup)
jobs = extract_job_title_from_result(soup)
companies = extract_company_from_result(soup)
summary = extract_summary_from_result(soup)
columns = {'company_name': companies, 'job_title': jobs}
df = pd.DataFrame.from_dict(columns, orient='index')
df = df.transpose()
我试图将参数添加到url并用于for循环,但是它不起作用。我真的很感激一个有效的解决方案。
答案 0 :(得分:2)
使用页码移至下一页。尝试以下代码让我知道这是否适合您。
from bs4 import BeautifulSoup
import pandas as pd
import re
headers = {'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
page = "https://www.indeed.com/jobs?q=Amazon&l="
company_name = []
job_title = []
page_num = 10
session = requests.Session()
while True:
pageTree = session.get(page, headers=headers)
pageSoup = BeautifulSoup(pageTree.content, 'html.parser')
jobs= pageSoup.find_all("a", {"data-tn-element": "jobTitle"})
Companys = pageSoup.find_all("span", {"class": "company"})
for Company, job in zip(Companys, jobs):
companyname=Company.text
company_name.append(companyname.replace("\n",""))
job_title.append(job.text)
if pageSoup.find("span", text=re.compile("Next")):
page = "https://www.indeed.com/jobs?q=Amazon&start={}".format(page_num)
page_num +=10
else:
break
df = pd.DataFrame({"company_name":company_name,"job_title":job_title})
print(df.head(1000))
输出:
company_name job_title
0 Amazon HVH Warehouse Team Member (Part-Time, Full-Time, F...
1 Amazon HVH Warehouse Team Member (Seasonal, Part-Time, Fu...
2 Amazon HVH Warehouse/Shopper Team Member (Seasonal, Part-...
3 Amazon.com Amazon Go Retail Associate - Full-time & Part-...
4 Amazon HVH Warehouse Team Member (Seasonal, Part-Time, Fl...
5 Amazon HVH Warehouse/Shopper Team Member
6 Amazon HVH Amazon Warehouse Fulfillment Associate
7 Amazon.com Amazon Go Retail Associate - Overnight Shift
8 Amazon HVH Warehouse Team Member
9 Amazon HVH Shopper Team Member (Seasonal, Part-Time, Full...
10 Amazon HVH Warehouse/Shopper Team Member (Seasonal, Part-...
11 Amazon HVH Warehouse Team Member (Seasonal, Full-Time)
12 ISS Allentown - Hiring for Amazon Fulf... Help Wanted
13 Amazon HVH Warehouse (Seasonal, Part-Time, Flexible Hours)
14 Amazon.com Services, Inc. Process Assistant
15 Amazon HVH Warehouse Shopper/Team Member- Moonachie, Tete...
16 Amazon HVH Warehouse/Shopper Team Member (Seasonal, Part-...
17 Amazon.com Services, Inc. Lead Fulfillment Associate
18 Amazon HVH Warehouse Team Member (Seasonal, Part Time, Fl...
19 Amazon HVH Part-Time Amazon Fresh Pickup Associate
20 Amazon.com Amazon Go Lead Retail Associate - Overnight
21 Amazon.com Services, Inc. Full Time Shift Assistant
22 Amazon.com Amazon Go Lead Retail Associate
23 Amazon.com Services, Inc. Receiving Associate
24 Amazon.com Packager - Amazon Go
25 Amazon Retail LLC Warehouse Associate - Amazon Go
26 Amazon.com Retail Sales Associate - Woodridge, IL
27 Amazon.com Services, Inc. Operations Admin Assistant
28 Amazon HVH Amazon Warehouse - Milford, MA
29 Amazon.com Seasonal Delivery Associate
.. ... ...
970 Amazon.com Optimization Specialist
971 Amazon.com Services, Inc. Operations Program Manager, Social Responsibility
972 Amazon.com Services, Inc. Paid Media Manager
973 Amazon.com Services, Inc. Amazon S3, Software Development Engineer
974 Amazon.com Sr. Facilities Manager
975 Amazon.com Software Development Engineer - Amazon Devices
976 Amazon.com Services, Inc. Senior HR Specialist- Work Authorization
977 Amazon.com Media Software Engineer - Amazon Chime
978 Amazon.com Services, Inc. Senior Designer - Digital
979 Amazon.com Services, Inc. Knowledge Engineer
980 Amazon.com Services, Inc. Research Engineer
981 Amazon.com Services, Inc. Data Engineer, Talent Management Analytics
982 Amazon.com Services, Inc. AWS TRANSPORTATION MANAGER
983 Amazon.com Strategic Partner Development Manager, Retail ...
984 Amazon.com Services, Inc. Software Development Engineer, Localization - ...
985 Amazon Services LLC Email Marketing Specialist
986 Amazon.com Services, Inc. Event Producer Manager
987 Amazon.com Content Strategist
988 Amazon Robotics LLC Commodity Management Analyst
989 Amazon Web Services, Inc. AWS Institute Operations and Relations Manager
990 Amazon.com Services, Inc. Marketing Manager, Cleo
991 Amazon.com Services, Inc. Manager, Programmatic Partner Manager
992 Amazon.com GSOC Program Manager (Amazon Business Assuranc...
993 Amazon.com Services, Inc. Sr. HR Assistant - Military Spouse Preferred -...
994 Amazon Studios LLC Sr. Development and Programming Executive - Ge...
995 Amazon.com Services, Inc. Financial Analyst II, AGFS FP&A
996 Amazon Capital Services, Inc. Principal Enterprise Sales - Amazon Connect
997 Amazon Digital Services LLC Sr Product Manager, Amazon Photos
998 Amazon.com Services, Inc. Prime Air Site Lead
999 Amazon.com Services, Inc. Applied Scientist Winter/Fall Internship - Nat...