我想在https://lawyers.justia.com/lawyer/robin-d-gross-39828上抓取该个人的网站和Blog的链接。
我到目前为止:
SELECT
A - LAG(A, 1, 0) OVER (PARTITION BY PERSON ORDER BY ID),
B - LAG(B, 1, 0) OVER (PARTITION BY PERSON ORDER BY ID),
C - LAG(C, 1, 0) OVER (PARTITION BY PERSON ORDER BY ID),
D - LAG(D, 1, 0) OVER (PARTITION BY PERSON ORDER BY ID),
E - LAG(E, 1, 0) OVER (PARTITION BY PERSON ORDER BY ID)
....
FROM Table
答案 0 :(得分:0)
select r.id role_id
, e.id
, role_name
, role_description
, contact_name
, e.employee_mail
, contact_phone
from rcsa.fs_role r
left
join rcsa.fs_customer_roles_external e
on e.fs_role_id = r.id
left
join rcsa.fs_business_service_contacts c
on e.employee_mail = c.contact_email
where role_type_id = 2
and fs_customer_id = :account
group
by r.id
, employee_mail
select r.id role_id
, e.id
, role_name
, role_description
, contact_name
, e.employee_mail
, contact_phone
from rcsa.fs_role r
left
join rcsa.fs_customer_roles_external e
on e.fs_role_id = r.id
and fs_customer_id =?
left
join rcsa.fs_business_service_contacts c
on c.contact_email = e.employee_mail
where role_type_id = 2
group
by role.id
, employee_mail
输出:
from bs4 import BeautifulSoup
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0'}
r = requests.get(
"https://lawyers.justia.com/lawyer/robin-d-gross-39828", headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
for item in soup.findAll("a", {'data-vars-action': ['ProfileWebsite', 'ProfileBlogPost']}):
print(item.get("href"))