我有2个这样的功能:
import requests
from bs4 import BeautifulSoup
import datetime
import xlwt
from xlwt import Formula
today = datetime.date.today().strftime("%Y%m%d")
keywords = ('PNC', 'Huntington', 'KeyCorp', 'Fifth Third')
for keyword in keywords:
keyword.replace("+", " ")
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
def article_fetch(keyword):
url = 'https://www.americanbanker.com/search?query={}'.format(keyword)
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.text, 'html.parser')
mylist = []
cols = "KeyWord", "Article", "URL", "Publication Date"
mylist.append(cols)
for articles in soup.find_all("div", "feed-item"):
article = articles.find("h4").text.strip()
timestamp = articles.find("span", "timestamp").text.strip()
article_url = 'https://{}'.format(articles.find("a")["href"][2:])
link = 'HYPERLINK("{}", "Link" )'.format(article_url)
item = [keyword, article, Formula(link), timestamp]
mylist.append(item)
book = xlwt.Workbook()
sheet = book.add_sheet("Articles")
for i, row in enumerate(mylist):
for j, col in enumerate(row):
sheet.write(i, j, col)
book.save("C:\Python\American Banker\American Banker {}.xls".format(today))
for keyword in keywords:
article_fetch(keyword)
print('Workbook Saved')
当我尝试应用此功能时:
def wind_index(result):
if result > 10:
return 1
elif (result > 0) & (result <= 5):
return 1.5
elif (result > 5) & (result <= 10):
return 2
def get_thermal_index(temp, hum):
return wind_index(temp - 0.4*(temp-10)*((1-hum)/100))
我收到此错误:ValueError:系列的真值不明确。使用a.empty,a.bool(),a.item(),a.any()或a.all()。
使用这些功能,我还能做什么来为我的DataFrame获取新列?
答案 0 :(得分:2)
您可以使用Series.apply
:
def get_thermal_index(temp, hum):
return (temp - 0.4*(temp-10)*((1-hum)/100)).apply(wind_index)