''' #获取奇怪的格式化错误,对不起缩进 将df2的第三列中不存在的值添加到df1。 值已映射到公共列['Email']。 df1包含2列['Email']和['Date'] df2包含2列['Email']和['SpecificValue'] '''
我尝试加入,合并等。 las,这里提供的所有示例对现实总是如此陌生,荒谬 而且永远无法在现实的电器箱中使用。
df1.merge(df2, how='outer')
未完成工作,但未更改。
pd.merge(df1, df2, left_on=key_or_keys, right_index=True, how='left', sort=False)
给我索引错误。
'''
#df1
df1 = pd.read_excel('YourExcel.xlsx')
#df2
df2 = pd.read_csv('YourCSV.csv')
# Delete unwanted/unneeded columns if exist
del df1['Address'], df1['Number of dogs']
'''
'''
Strip of seconds(diverted by empty space in this case), replace day
with 01, so we get the same day value for all dates
'''
'''
df1['Date'] = df1['Date'].str.split(' ', expand=True)[0]
df1['Date'] = df1['Date'].str.slice_replace(3, 5, '01')
# Replace values in cell to standardize
df2.replace('Expert review Nr23', 'Value1', inplace=True)
df2.replace('Audit - Nr3', 'Value1', inplace=True)
df2.replace('Critic Nr2532', 'Value1', inplace=True)
'''
'''
df1 df2
Email Date Email SpecificValue
email1 04/31/2019 email1 1a
email2 04/30/2019 email2 1a
email1 04/27/2019 email3 2b
email3 04/01/2019 email4 3c
email4 03/28/2019
email2 03/24/2019
email2 03/22/2019
'''
'''
Email Date SpecificValue
email1 04/01/2019 1a
email2 04/01/2019 1a
email1 04/01/2019 1a
email3 04/01/2019 2b
email4 03/01/2019 3c
email2 03/01/2019 1a
email2 03/01/2019 1a
'''