多种条件下的熊猫功能-值错误

时间:2019-12-11 05:15:37

标签: python pandas function if-statement

我正在尝试创建具有多个条件的函数。这将添加具有计算量的新列。部分内容在下面。

我有looked at other answers,但无法找出问题所在。

# import modules
import pandas as pd
import numpy as np

#data
data2 = pd.DataFrame({'Unit of Measure' : ['EA', 'EA', 'AA'],
                     'round' : ['no', 'yes', 'no'],
                     'add_gst' : ['no', 'yes', 'no'],
                     'Unit Charge' : [1.8, 2.5, 3.0],
                     'Time Units' : [1.0, 1.5, 2.0]})

# function
def include_gst(df):
    rounded = 0.0
    if(data2['Unit of Measure'] == 'EA' and data2['round'] == 'no' and data2['add_gst'] == 'yes'):
        rounded = data['Unit Charge'] * data['Time Units']
        rounded = rounded.round(2) # to two decimal places
        rounded = rounded * 1.10 # add gst
        rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
        rounded = rounded.round() # round to nearest five cents - step 2 - to nearest integer
        rounded = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05
        return  rounded
    else:
        return 1.0

data2['incl_gst'] = data2.apply(include_gst, axis = 1)

错误:

  

ValueError :(“系列的真值不明确。请使用a.empty,   a.bool(),a.item(),a.any()或a.all()。','发生在索引0')

让它正常工作会很好。谢谢。

1 个答案:

答案 0 :(得分:1)

这里不需要apply在幕后循环,您可以对解决方案进行矢量化-添加了括号,因为&的优先级运算符用于按位AND进行掩蔽,新列由{{ 3}}:

#changed data for match first row
data2 = pd.DataFrame({'Unit of Measure' : ['EA', 'EA', 'AA'],
                     'round' : ['no', 'yes', 'no'],
                     'add_gst' : ['yes', 'yes', 'no'],
                     'Unit Charge' : [1.8, 2.5, 3.0],
                     'Time Units' : [1.0, 1.5, 2.0]})
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units
0              EA    no     yes          1.8         1.0
1              EA   yes     yes          2.5         1.5
2              AA    no      no          3.0         2.0
  Unit of Measure round add_gst  Unit Charge  Time Units  

m = (data2['Unit of Measure'] == 'EA') & (data2['round'] == 'no') & (data2['add_gst'] == 'yes')
rounded = data2['Unit Charge'] * data2['Time Units']
rounded = rounded.round(2) # to two decimal places
rounded = rounded * 1.10 # add gst
rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
rounded = rounded.round() # round to nearest five cents - step 2 - to nearest integer
rounded = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05


data2['incl_gst'] = np.where(m, rounded, 1.0)
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units  incl_gst
0              EA    no     yes          1.8         1.0       2.0
1              EA   yes     yes          2.5         1.5       1.0
2              AA    no      no          3.0         2.0       1.0

编辑:

因为应该简化12次elif语句的解决方案:

def include_gst(df):
    rounded = 0.0
    if (df['Unit of Measure'] == 'EA') and (df['round'] == 'no') and (df['add_gst'] == 'yes'):
        rounded = df['Unit Charge'] * df['Time Units']
        rounded = round(rounded, 2) # to two decimal places
        rounded = rounded * 1.10 # add gst
        rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
        rounded = round(rounded) # round to nearest five cents - step 2 - to nearest integer
        rounded = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05
        return  rounded
    else:
        return 1.0

data2['incl_gst'] = data2.apply(include_gst, axis = 1)
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units  incl_gst
0              EA    no     yes          1.8         1.0       2.0
1              EA   yes     yes          2.5         1.5       1.0
2              AA    no      no          3.0         2.0       1.0

或者:

data2 = pd.DataFrame({'Unit of Measure' : ['EA', 'EA', 'AA'],
                     'round' : ['no', 'yes', 'no'],
                     'add_gst' : ['yes', 'yes', 'no'],
                     'Unit Charge' : [1.8, 2.5, 3.0],
                     'Time Units' : [1.0, 1.5, 2.0]})
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units
0              EA    no     yes          1.8         1.0
1              EA   yes     yes          2.5         1.5
2              AA    no      no          3.0         2.0

m1 = (data2['Unit of Measure'] == 'EA') & (data2['round'] == 'no') & (data2['add_gst'] == 'yes')
m2 = (data2['Unit of Measure'] == 'EA') & (data2['round'] == 'yes') & (data2['add_gst'] == 'yes')

rounded = data2['Unit Charge'] * data2['Time Units']
rounded = rounded.round(2) # to two decimal places
rounded = rounded * 1.10 # add gst
rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
rounded = rounded.round() # round to nearest five cents - step 2 - to nearest integer
rounded1 = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05
rounded2 = rounded * 0.08

data2['incl_gst'] = np.select([m1, m2], [rounded1, rounded2], default=1.0)
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units  incl_gst
0              EA    no     yes          1.8         1.0      2.00
1              EA   yes     yes          2.5         1.5      6.56
2              AA    no      no          3.0         2.0      1.00