我正在尝试将此r代码转换为pandas代码。
Dependents.Numeric = recode(start.DF$Dependents, '3+'='3') %>% as.character() %>% as.numeric()
start.DF = start.DF %>% mutate(HouseholdSize = ifelse(is.na(Dependents.Numeric) | is.na(Married), NA,
ifelse(CoapplicantIncome > 0 | Married == "Yes",
Dependents.Numeric + 2,
Dependents.Numeric +1)))
start.DF = start.DF %>% mutate(IncomePC = TotalIncome/HouseholdSize)
我想找到问题的答案并找到答案,但是它不在python
中,而是用r
编写。因此,如果有人可以帮助我,我将不胜感激。谢谢
答案 0 :(得分:2)
R代码:
start.DF <- data.frame(
Dependents = c('2', '3+', '1'),
Married = c('yes', NA, 'no'),
CoapplicantIncome = c(45, 0, 75),
TotalIncome = c(100, 67, 80)
)
library(dplyr)
Dependents.Numeric = recode(start.DF$Dependents, '3+'='3') %>% as.character() %>% as.numeric()
start.DF = start.DF %>% mutate(HouseholdSize = ifelse(is.na(Dependents.Numeric) | is.na(Married), NA,
ifelse(CoapplicantIncome > 0 | Married == "Yes",
Dependents.Numeric + 2,
Dependents.Numeric +1)))
start.DF = start.DF %>% mutate(IncomePC = TotalIncome/HouseholdSize)
start.DF
Dependents Married CoapplicantIncome TotalIncome HouseholdSize IncomePC
1 2 yes 45 100 4 25.00000
2 3+ <NA> 0 67 NA NA
3 1 no 75 80 3 26.66667
Python代码:
import pandas as pd
import numpy as np
start_df = pd.DataFrame.from_dict({
'Dependents': ('2', '3+', '1'),
'Married': ('yes', np.nan, 'no'),
'CoapplicantIncome': (45, 0, 75),
'TotalIncome': (100, 67, 80)
})
Dependents_Numeric = start_df['Dependents'].str.replace('3\+', '3').astype(float)
start_df['HouseholdSize'] = np.nan
cond1 = pd.isna(Dependents_Numeric) | pd.isna(start_df['Married'])
cond2 = (start_df['CoapplicantIncome'] > 0) | (start_df['Married'] == 'yes')
start_df.loc[~cond1 & cond2, 'HouseholdSize'] = Dependents_Numeric[~cond1 & cond2] + 2
start_df.loc[~cond1 & ~cond2, 'HouseholdSize'] = Dependents_Numeric[~cond1 & ~cond2] + 1
start_df['IncomePC'] = start_df['TotalIncome'] / start_df['HouseholdSize']
print(start_df)
CoapplicantIncome Dependents Married TotalIncome HouseholdSize IncomePC
0 45 2 yes 100 4.0 25.000000
1 0 3+ NaN 67 NaN NaN
2 75 1 no 80 3.0 26.666667
答案 1 :(得分:0)
您可以使用 datar
将 R 代码翻译成 python:
>>> from datar.all import (
... NA, c, f, tibble, as_numeric, recode, if_else, is_na, mutate
... )
>>>
>>> start_DF = tibble(
... Dependents = c('2', '3+', '1'),
... Married = c('Yes', NA, 'No'),
... CoapplicantIncome = c(45, 0, 75),
... TotalIncome = c(100, 67, 80)
... )
>>>
>>> Dependents_Numeric = as_numeric(recode(start_DF.Dependents, **{'3+': '3'}))
>>>
>>> start_DF >> mutate(
... HouseholdSize = if_else(
... is_na(Dependents_Numeric) | is_na(f.Married),
... NA,
... if_else(
... (f.CoapplicantIncome > 0) | (f.Married == "Yes"),
... Dependents_Numeric + 2,
... Dependents_Numeric + 1
... )
... ),
...
... IncomePC = f.TotalIncome/f.HouseholdSize
... )
>>> # You don't need to write a new mutate.
>>> # 'HouseholdSize' can be recycled in side the first mutate
>>> # start_DF >> mutate(IncomePC = f.TotalIncome/f.HouseholdSize)
>>>
Dependents Married CoapplicantIncome TotalIncome HouseholdSize IncomePC
<object> <object> <int64> <int64> <float64> <float64>
0 2 Yes 45 100 4.0 25.000000
1 3+ NaN 0 67 NaN NaN
2 1 No 75 80 3.0 26.666667