我有此规则可以从index.php重定向
from bs4 import BeautifulSoup
import requests
from collections import ChainMap
pages=[]
for i in range(0,5):
url = 'https://sportmedbc.com/practitioners?field_profile_first_name_value=&field_profile_last_name_value=&field_pract_profession_tid=All&city=&taxonomy_vocabulary_5_tid=All&page='+str(i)
pages.append(url)
for item in pages:
page=requests.get(item)
soup = BeautifulSoup(page.text, 'lxml')
def get_data(soup):
default_data = {'name': 'n/a', 'clinic': 'n/a', 'profession': 'n/a', 'region': 'n/a', 'city': 'n/a'}
for doctor in soup.select('.view-practitioners .practitioner'):
doctor_data = {}
if doctor.select_one('.practitioner__name').text.strip():
doctor_data['name'] = doctor.select_one('.practitioner__name').text
if doctor.select_one('.practitioner__clinic').text.strip():
doctor_data['clinic'] = doctor.select_one('.practitioner__clinic').text
if doctor.select_one('.practitioner__profession').text.strip():
doctor_data['profession'] = doctor.select_one('.practitioner__profession').text
if doctor.select_one('.practitioner__region').text.strip():
doctor_data['region'] = doctor.select_one('.practitioner__region').text
if doctor.select_one('.practitioner__city').text.strip():
doctor_data['city'] = doctor.select_one('.practitioner__city').text
yield ChainMap(doctor_data, default_data)
for doctor in get_data(soup):
print('name:\t\t', doctor['name'])
print('clinic:\t\t',doctor['clinic'])
print('profession:\t',doctor['profession'])
print('city:\t\t',doctor['city'])
print('region:\t\t',doctor['region'])
print('-' * 80)
但是它仅适用于地址:site.com/index.php到site.com 我需要从所有包含index.php的地址(例如site.com/categories/rings/index.php)重定向到site.com/categories/rings/ 谢谢!
答案 0 :(得分:1)
您的RewriteCond
规则是问题所在
RewriteRule ^(.*/)?index\.php$ https://%{HTTP_HOST}/$1 [L,R=301]
如果要保留端口RewriteCond
,可以使用此端口:
RewriteCond %{SERVER_PORT} ^443$
RewriteRule ^(.*/)?index\.php$ https://%{HTTP_HOST}/$1 [L,R=301]