我每隔X分钟就在抓取不同的新闻网站上工作,我有3个功能分别在一个特定网站上提供抓取功能,是否可以将所有3个功能组合为一个功能? 这是一个函数中的代码示例。我每个人说的功能相同,但要刮擦的网址不同,我想每隔x分钟运行一次,所以有办法让两个同时运行一个lambda而不是单独运行吗?
import json
import boto3
import uuid
import feedparser
from bs4 import BeautifulSoup
import requests
def generate_id(stri):
return 'youm' + stri.split('/')[-1]
def translate_date(dt):
months_egypt = ['كانون الثاني', 'شهر فبراير', 'مارس', 'أبريل', 'قد', 'يونيو', 'يوليو', 'أغسطس', 'سبتمبر', 'شهر اكتوبر', 'شهر نوفمبر', 'ديسمبر']
months_english = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
days_egypt = ['الإثنين', 'الثلاثاء', 'الأربعاء', 'الخميس', 'يوم الجمعة', 'يوم السبت', 'الأحد']
days_english = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
for day in days_egypt:
if day in dt:
dt = dt.replace(day, days_english[days_egypt.index(day)][:3])
break
for month in months_egypt:
if month in dt:
dt = dt.replace(month, months_english[months_egypt.index(month)][:3])
dt = dt.replace('،', ',')
return dt
def lambda_handler(event, context):
client = boto3.resource('dynamodb')
youm = feedparser.parse('x-site')
entries = youm.entries
print(entries)
table = client.Table("news")
for entry in entries:
print(entry['link'])
id = generate_id(entry['link'])
print(id)
record = table.get_item(Key={'_id': id})
if 'Item' not in record:
print(entry.link)
r = requests.get(entry.link)
soup = BeautifulSoup(r.content, 'html.parser')
entry['content'] = soup.find('div', {'id': 'articleBody'}).text
entry['author'] = soup.find('span', {'class': 'writeBy'}).text
written_by = 'كتب -'
item = {
'title': entry['title'],
'image': entry['links'][1]['href'],
'link': entry['link'],
'date': translate_date(entry['published']),
'id': entry['link'],
'content': entry['content'],
'author': entry['author'],#.replace(written_by, '').replace(written_by[::-1], ''),
'source': 'youm',
'_id': generate_id(entry['link']),
}
table.put_item(Item=item)
print('and its in')
else:
print('item already in db', record['Item']['_id'])