SO社区
我正在用python a进行编码,我需要创建一个函数,该函数的输入将按预期工资,约束(即广泛,有针对性或多样化),区域(1-13)和级别(1- 2)。
数据示例为:
MajorII Area_ID Level_ID expected_wage
2655 52 1 1 0.907616
2621 18 11 2 0.776567
2652 49 1 2 0.730930
2608 5 10 1 0.628810
2644 41 1 1 0.505208
2659 56 2 1 0.503492
2617 14 11 2 0.471512
2667 64 3 1 0.445349
2704 101 7 2 0.436971
2673 70 3 2 0.412259
2612 9 11 1 0.411015
2717 114 9 1 0.408277
2653 50 1 2 0.407092
我想从此处返回基于约束的3个数据帧。 例如,广泛约束定义为:
实际上,我正在使用此功能
def sortOptions(sample_data, constrain, area, level):
# Number
options = np.empty(0)
num_options_needed = 4
if constrain == 'Broad':
# Code Broad: 1 option in area-level, 3 options outside area, 2 options in level.
# a) Sorted option in area-level first, then in area but no level, the in level but no area and then neither in area or level:
utilities_to_sort1 = utilities.copy()
q = '(Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
util_area_level = utilities_to_sort1.query(q)
utilities_to_sort1 = utilities_to_sort1[~utilities_to_sort1.isin(util_area_level)].dropna()
q = 'not (Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort1.query(q)
util_area_level = util_area_level.append(aux)
utilities_to_sort1 = utilities_to_sort1[~utilities_to_sort1.isin(aux)].dropna()
q = '(Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort1.query(q)
util_area_level = util_area_level.append(aux)
utilities_to_sort1 = utilities_to_sort1[~utilities_to_sort1.isin(aux)].dropna()
q = 'not (Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort1.query(q)
util_area_level = util_area_level.append(aux)
utilities_to_sort1 = utilities_to_sort1[~utilities_to_sort1.isin(aux)].dropna()
# b) 3 options outside area:
utilities_to_sort2 = utilities.copy()
q = '(Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
util_notarea_level1 = utilities_to_sort1.query(q)
utilities_to_sort2 = utilities_to_sort2[~utilities_to_sort2.isin(util_notarea_level1)].dropna()
q = 'not (Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort2.query(q)
util_notarea_level1 = util_notarea_level1.append(aux)
utilities_to_sort2 = utilities_to_sort2[~utilities_to_sort2.isin(aux)].dropna()
q = 'not (Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort2.query(q)
util_notarea_level1 = util_notarea_level1.append(aux)
utilities_to_sort2 = utilities_to_sort2[~utilities_to_sort2.isin(aux)].dropna()
q = '(Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort2.query(q)
util_notarea_level1 = util_notarea_level1.append(aux)
utilities_to_sort2 = utilities_to_sort2[~utilities_to_sort2.isin(aux)].dropna()
# c) 3 options outside area:
utilities_to_sort3 = utilities.copy()
q = '(Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
util_notarea_level2 = utilities_to_sort1.query(q)
utilities_to_sort3 = utilities_to_sort3[~utilities_to_sort3.isin(util_notarea_level2)].dropna()
q = 'not (Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort3.query(q)
util_notarea_level2 = util_notarea_level2.append(aux)
utilities_to_sort3 = utilities_to_sort3[~utilities_to_sort3.isin(aux)].dropna()
q = 'not (Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort3.query(q)
util_notarea_level2 = util_notarea_level2.append(aux)
utilities_to_sort3 = utilities_to_sort3[~utilities_to_sort3.isin(aux)].dropna()
q = '(Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort3.query(q)
util_notarea_level2 = util_notarea_level2.append(aux)
utilities_to_sort3 = utilities_to_sort3[~utilities_to_sort3.isin(aux)].dropna()
# d)
utilities_to_sort4 = utilities.copy()
q = 'not (Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
util_notarea_notlevel = utilities_to_sort4.query(q)
utilities_to_sort4 = utilities_to_sort4[~utilities_to_sort4.isin(util_notarea_notlevel)].dropna()
q = '(Level_ID == {0}) and not (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort4.query(q)
util_notarea_notlevel = util_notarea_notlevel.append(aux)
utilities_to_sort4 = utilities_to_sort4[~utilities_to_sort4.isin(aux)].dropna()
q = 'not (Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort1.query(q)
util_notarea_notlevel = util_notarea_notlevel.append(aux)
utilities_to_sort4 = utilities_to_sort4[~utilities_to_sort4.isin(aux)].dropna()
q = '(Level_ID == {0}) and (Area_ID == {1})'.format(level, area)
aux = utilities_to_sort4.query(q)
util_notarea_notlevel = util_notarea_notlevel.append(aux)
utilities_to_sort4 = utilities_to_sort4[~utilities_to_sort4.isin(aux)].dropna()
opts1 = util_area_level['MajorID'].astype(int)
opts2 = util_notarea_level1['MajorID'].astype(int)
opts3 = util_notarea_level2['MajorID'].astype(int)
opts4 = util_notarea_notlevel['MajorID'].astype(int)
elif constrain == 'Targeted':
# Code Targeted
options = np.empty(0)
elif constrain == 'Diverse':
# Code Diverse
options = np.empty(0)
else:
print("Constrain is not define. It must be Broad, Diverse or Targeted")
return opts1, opts2, opts3, opts4
它返回了我想要的东西,但是必须有一种最优雅,最有效的方法来实现它!
df = pd.DataFrame({'MajorID' : [52, 18, 49, 5, 41, 56, 14, 64, 101, 70, 9, 114, 50],
'Area_ID' : [1, 11, 1, 10, 1, 2, 11, 3, 7, 3, 11, 9, 1 ],
'Level_ID' :[1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2],
'expected_wage' :[0.907616, 0.776567, 0.730930, 0.628810, 0.505208, 0.503492, 0.471512, 0.445349, 0.436971, 0.412259, 0.411015, 0.408277, 0.407092] })
opts1, opts2, opts3, opts4 = sortOptions(df, "Broad", 2, 1)
opts1
MajorID Area_ID Level_ID
5 56 2 1
0 52 1 1
3 5 10 1
4 41 1 1
7 64 3 1
10 9 11 1
11 114 9 1
1 18 11 2
2 49 1 2
6 14 11 2
8 101 7 2
9 70 3 2
12 50 1 2
opts2
MajorID Area_ID Level_ID
1 18 11 2
2 49 1 2
6 14 11 2
8 101 7 2
9 70 3 2
12 50 1 2
5 56 2 1
0 52 1 1
3 5 10 1
4 41 1 1
7 64 3 1
10 9 11 1
11 114 9 1