用apply替换for循环

时间:2017-04-15 18:33:35

标签: python-3.x pandas apply

我正在尝试使用apply和lambda函数替换下面注释掉的for循环,但我收到以下错误。我的python是生锈的,所以任何提示都非常感激。

错误:

File "<ipython-input-5-b29bfb93595e>", line 11
if (dataDF < dataDF.shift()) & (dataDF.shift(periods=1) < dataDF.shift(periods=2)):
                                                                                  ^

SyntaxError:语法无效

代码:

def get_recession_end():
    dataDF = pd.ExcelFile('gdplev.xls').parse(skiprows=7)[['Unnamed: 4',     'Unnamed: 5']].loc[246:]
    dataDF.columns = ['Quarter','dataDF']
    dataDF['dataDF'] = pd.to_numeric(dataDF['dataDF'])

    #quarters = []
    #for i in range(len(dataDF) - 2):
        #if (dataDF.iloc[i][1] < dataDF.iloc[i+1][1]) & (dataDF.iloc[i+1][1] <     dataDF.iloc[i+2][1]):
            #quarters.append(dataDF.iloc[i+2][0])
    #return quarters[0]

    quarters = dataDF.apply(lambda x: quarters = []
                                      if (dataDF < dataDF.shift()) &     (dataDF.shift(periods=1) < dataDF.shift(periods=2)):
                                          quarters.append(dataDF.shift(2)[0]))
    return quarters[0]

get_recession_end()

更新新版本:

代码:

def get_recession_end():
def get_recession_end():
    dataDF = pd.ExcelFile('gdplev.xls').parse(skiprows=7)[['Unnamed: 4', 
'Unnamed: 5']].loc[246:]#skiprows=17,skip_footer=(38))
    dataDF.columns = ['Quarter','dataDF']
    dataDF['dataDF'] = pd.to_numeric(dataDF['dataDF'])
   #quarters = []
    #for i in range(len(dataDF) - 2):
        #if (dataDF.iloc[i][1] < dataDF.iloc[i+1][1]) & (dataDF.iloc[i+1][1] 
< dataDF.iloc[i+2][1]):
            #quarters.append(dataDF.iloc[i+2][0])
    #return quarters[0]
    def do_the_foo(x):
        quarters = []
        if (dataDF < dataDF.shift()) & (dataDF.shift(periods=1) < 
dataDF.shift(periods=2)):
            quarters.append(dataDF.shift(2)[0])
        return quarters

    quarters = dataDF.loc[:(len(dataDF) - 2)].apply(do_the_foo)
    return quarters[0]


get_recession_end()

新错误:

   ---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/opt/conda/lib/python3.5/site-packages/pandas/indexes/base.py in 
get_loc(self, key, method, tolerance)
   1944             try:
-> 1945                 return self._engine.get_loc(key)
   1946             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item 
(pandas/hashtable.c:12368)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item 
(pandas/hashtable.c:12322)()

KeyError: 0

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-10-53e0a21f9faa> in <module>()
     18 
 19 
---> 20 get_recession_end()

<ipython-input-10-53e0a21f9faa> in get_recession_end()
     15 
     16     quarters = dataDF.loc[:-(len(dataDF) - 2)].apply(do_the_foo)
---> 17     return quarters[0]
     18 
     19 

 /opt/conda/lib/python3.5/site-packages/pandas/core/frame.py in 
__getitem__(self, key)
   1995             return self._getitem_multilevel(key)
   1996         else:
-> 1997             return self._getitem_column(key)
   1998 
   1999     def _getitem_column(self, key):

/opt/conda/lib/python3.5/site-packages/pandas/core/frame.py in 
_getitem_column(self, key)
   2002         # get column
   2003         if self.columns.is_unique:
-> 2004             return self._get_item_cache(key)
   2005 
   2006         # duplicate columns & possible reduce dimensionality

/opt/conda/lib/python3.5/site-packages/pandas/core/generic.py in 
_get_item_cache(self, item)
   1348         res = cache.get(item)
   1349         if res is None:
-> 1350             values = self._data.get(item)
   1351             res = self._box_item_values(item, values)
   1352             cache[item] = res

/opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in get(self, 

项目,快速路径)        3288        3289如果不是isnull(item):      - &GT; 3290 loc = self.items.get_loc(item)        3291其他:        3292 indexer = np.arange(len(self.items))     [ISNULL(self.items)]

/opt/conda/lib/python3.5/site-packages/pandas/indexes/base.py in 
get_loc(self, key, method, tolerance)
   1945                 return self._engine.get_loc(key)
   1946             except KeyError:
-> 1947                 return 
self._engine.get_loc(self._maybe_cast_indexer(key))
   1948 
   1949         indexer = self.get_indexer([key], method=method, 
tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item 
(pandas/hashtable.c:12368)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item 
(pandas/hashtable.c:12322)()

KeyError: 0

1 个答案:

答案 0 :(得分:0)

lambda expressions仅限于一个表达式。您已尝试使用多个语句。 lambda表达式只是内联一个小函数的一种方便方法,并且总是可以被常规函数替换。由于lambda引用了外部函数作用域中的变量,因此还应该在该作用域中定义替换函数。

结果看起来并不比你想要替换的原件好。所以,我认为真正的答案是你不能这样使用lambda。

def get_recession_end():
    dataDF = pd.ExcelFile('gdplev.xls').parse(skiprows=7)[['Unnamed: 4',     'Unnamed: 5']].loc[246:]
    dataDF.columns = ['Quarter','dataDF']
    dataDF['dataDF'] = pd.to_numeric(dataDF['dataDF'])

    #quarters = []
    #for i in range(len(dataDF) - 2):
        #if (dataDF.iloc[i][1] < dataDF.iloc[i+1][1]) & (dataDF.iloc[i+1][1] <     dataDF.iloc[i+2][1]):
            #quarters.append(dataDF.iloc[i+2][0])
    #return quarters[0]

    def do_the_foo(x):
        quarters = []
        if (dataDF < dataDF.shift()) & (dataDF.shift(periods=1) < dataDF.shift(periods=2)):
            quarters.append(dataDF.shift(2)[0])
        return quarters

    quarters = dataDF.apply(do_the_foo)

get_recession_end()