我想将pandas数据框中的三列转换为单个日期列。问题在于列之一是日列。我无法将其转换为该月份和年份的确切日期。谁能帮我解决这个问题。看起来像这样:
BirthMonth BirthYear Day
0 5 88 1st Monday
1 10 87 3rd Tuesday
2 12 87 2nd Saturday
3 1 88 1st Tuesday
4 2 88 1st Monday
答案 0 :(得分:2)
根据您对我的第一条评论的回复,我的回答如下。我想这就是您要寻找的东西:
import re
import time
import calendar
import numpy as np
days = ['1st Monday', '3rd Tuesday', '4th wednesday']
months = [2, 3, 5]
years = [1990, 2000, 2019]
def extract_numeric(text: str):
return int(re.findall(r'\d+', text)[0])
def weekday_to_number(weekday: str):
return time.strptime(weekday, "%A").tm_wday
def get_date(number: int, weekday: int, month: int, year: int) -> str:
""" 3rd Tuesday translates to number: 3, weekday: 1 """
firstday, n_days = calendar.monthrange(year, month)
day_list = list(range(7)) * 6
month_days = day_list[firstday:][:n_days]
day = (np.where(np.array(month_days) == weekday)[0] + 1)[number - 1]
return '{}/{}/{}'.format(day, month, year)
numbers = []
weekdays = []
for day in days:
number, weekday = day.split()
numbers.append(extract_numeric(number))
weekdays.append(weekday_to_number(weekday))
dates = []
for number, weekday, month, year in zip(numbers, weekdays, months, years):
dates.append(get_date(number, weekday, month, year))
print(dates) # ['5/2/1990', '21/3/2000', '22/5/2019']
答案 1 :(得分:1)
使用日历模块从几天中获取day
。然后将day,monyh,year
转换为DateTime
import calendar
import datetime
def get_date(rows):
day = {'monday':0,'tuesday':1,'wednesday':2,'thursday':3,'friday':4,'saturday':5,'sunday':6}
day_num = day.get(rows.days.split()[1].lower())
weekday_num = [week[day_num] for week in calendar.monthcalendar(rows.years, rows.months) if week[day_num] >0][int(rows.days.split()[0][0])-1]
return datetime.date(rows.years, rows.months, weekday_num)
将上述函数应用于所有行
df['date'] = df(lambda row: get_date(row), axis=1)
df
>>
days months years date
0 1st Monday 8 2015 2015-08-03
1 3rd Tuesday 12 2017 2017-12-19
2 4th wednesday 5 2019 2019-05-22
答案 2 :(得分:0)
将编辑后的@ArnoMaeck视为正确答案
答案 3 :(得分:0)
编辑以匹配新数据框
我使用pandas dayofweek函数的解决方案:
import numpy as np
import pandas as pd
from datetime import date
from dateutil.relativedelta import relativedelta
#generate dataframe
df=pd.DataFrame({'BirthMonth':[5, 10, 12, 1 ,2],
'BirthYear':[88, 87, 87, 88, 88],
'Day':['1st Monday', '3rd Tuesday', '2nd Saturday','1st Tuesday','1st Monday']})
#Assuming the year refers to 19xx
df.BirthYear=1900+df.BirthYear
#list of day names
weekday=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
#Identify day name in input df
days_ex=[s.split()[1].title() for s in df.Day]
#initialize output list
dateout= ["" for x in range(len(days_ex))]
for j in range(len(days_ex)):
#Identify the day number in the week (Monday is 1, Sunday is 7)
daynum=np.nonzero(np.char.rfind(weekday,days_ex[j])==0)[0][0]
#create start and end date for the month
date_start=date(df.BirthYear[j],df.BirthMonth[j],1)
date_end=date_start+relativedelta(months=+1)
#daily index range within month of interest
idx=pd.date_range(date_start,date_end,freq='d').dayofweek
# Find matching date based on input df
realday=np.where(idx==daynum)[0][int(df.Day[j][0])-1]+1
#output list
dateout[j]=str(realday)+'/'+str(df.BirthMonth[j])+'/'+str(df.BirthYear[j])
我得到的结果是:
['2/5/1988', '20/10/1987', '12/12/1987', '5/1/1988', '1/2/1988']
答案 4 :(得分:0)
不是很快的解决方案(因为它涉及2个嵌套循环),但我希望这能解决您的问题
import pandas as pd
import datetime
import calendar
pd.set_option('display.max_rows', 100)
cols = ['day', 'month', 'year']
data = [
['1st Monday', 8, 2015],
['3rd Tuesday', 12, 2017],
['4th Wednesday', 5, 2019]
]
df = pd.DataFrame(data=data, columns=cols)
df['week_number'] = df['day'].str.slice(0, 1)
df['week_number'] = df['week_number'].astype('int')
df['day_name'] = df['day'].str.slice(4)
def generate_dates(input_df, index_num):
_, days = calendar.monthrange(input_df.loc[index_num, 'year'], input_df.loc[index_num, 'month'])
df_dates = pd.DataFrame()
for i in range(1, days + 1):
df_dates.loc[i - 1, 'date'] = datetime.date(input_df.loc[index_num, 'year'], input_df.loc[index_num, 'month'],
i)
df_dates.loc[i - 1, 'year'] = input_df.loc[index_num, 'year']
df_dates.loc[i - 1, 'days'] = calendar.weekday(input_df.loc[index_num, 'year'],
input_df.loc[index_num, 'month'], i)
df_dates.loc[i - 1, 'day_name'] = df_dates.loc[i - 1, 'date'].strftime("%A")
df_dates['week_number'] = 1
df_dates['week_number'] = df_dates.groupby('day_name')['week_number'].cumsum()
return df_dates
dates = pd.DataFrame(columns=['date', 'year', 'days', 'day_name', 'week_number'])
for row in df.index:
dates = pd.concat([dates, generate_dates(df, row)])
df2 = df.merge(dates, on=['year', 'day_name', 'week_number'])
print(df2)