Question

我试图确定某一列连续多少天高于或低于某个阈值。

>>> df.head()
            Open   High    Low  Close  Volume
Date
2004-08-19  49.96  51.98  47.93  50.12     NaN
2004-08-20  50.69  54.49  50.20  54.10     NaN
2004-08-23  55.32  56.68  54.47  54.65     NaN
2004-08-24  55.56  55.74  51.73  52.38     NaN
2004-08-25  52.43  53.95  51.89  52.95     NaN
>>>

对于上面的示例，我希望另一列df [＆＃39; RDA＆＃39;]连续每天增加，即Open列超过50.连续每天低于50，我＆＃ 39; d喜欢第二列df [＆＃39; RDB＆＃39;]增加和df [＆＃39; RDA＆＃39;]重置为0.我已尝试if / then逻辑但它没有＆＃39; t那样，并给我一个值错误：

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all(). how can i sort it out

我希望我的数据框的输出看起来像这样：

>>> df.head()
            Open   High    Low  Close  Volume    RDA   RDB
Date
2004-08-19  51.96  51.98  47.93  50.12     NaN    1      0
2004-08-20  50.69  54.49  50.20  54.10     NaN    2      0
2004-08-23  55.32  56.68  54.47  54.65     NaN    3      0
2004-08-24  45.56  55.74  51.73  52.38     NaN    0      1
2004-08-25  42.43  53.95  51.89  52.95     NaN    0      2
2004-08-26  41.96  51.98  47.93  50.12     NaN    0      3
2004-08-27  40.69  54.49  50.20  54.10     NaN    0      4
2004-08-28  55.32  56.68  54.47  54.65     NaN    1      0
2004-08-29  55.56  55.74  51.73  52.38     NaN    2      0
2004-08-30  52.43  53.95  51.89  52.95     NaN    3      0
>>>

这是熊猫可以做到的事情吗？我知道你可以计算一列中的值，但我到目前为止还没能找到连续值的方法。带有2个变量的if / then语句可以工作，但就像我上面提到的那样，当我尝试这个时，我得到一个值错误。任何帮助将不胜感激。

Answer 1

我会对np.sign和Open之间的差异使用50。当小于-1时，50，0时50，1大于50时为np.diff。
接下来，我将使用cumsum来确定何时从一个值切换到另一个值
然后我将使用cumcount来定义连续符号组
接下来，我将使用np.where来获取群组内的计数
最后，我将使用cumcounts拆分o = df.Open.values - 50 signs = np.sign(o) changes = np.append(False, signs[:-1] != signs[1:]) g = changes.cumsum() cumcounts = df.groupby(g).cumcount() + 1 a = np.where(signs == 1, cumcounts, 0) b = np.where(signs == -1, cumcounts, 0) df.assign(RDA=a, RDB=b) Open High Low Close Volume RDA RDB Date 2004-08-19 51.96 51.98 47.93 50.12 NaN 1 0 2004-08-20 50.69 54.49 50.20 54.10 NaN 2 0 2004-08-23 55.32 56.68 54.47 54.65 NaN 3 0 2004-08-24 45.56 55.74 51.73 52.38 NaN 0 1 2004-08-25 42.43 53.95 51.89 52.95 NaN 0 2 2004-08-26 41.96 51.98 47.93 50.12 NaN 0 3 2004-08-27 40.69 54.49 50.20 54.10 NaN 0 4 2004-08-28 55.32 56.68 54.47 54.65 NaN 1 0 2004-08-29 55.56 55.74 51.73 52.38 NaN 2 0 2004-08-30 52.43 53.95 51.89 52.95 NaN 3 0

" Looks for a pattern in the buffers.
" Usage :GrepBuffers [pattern] [matchCase] [matchWholeWord] [prefix]
" If pattern is not specified then usage instructions will get printed.
" If matchCase = '1' then exclude matches that do not have the same case. If matchCase = '0' then ignore case.
" If prefix == 'c' then put results in the QuickFix list. If prefix == 'l' then put results in the location list for the current window.
function! s:GrepBuffers(...)
    if a:0 > 4
        throw "Too many arguments"
    endif

    if a:0 >= 1
        let l:pattern = a:1
    else
        echo 'Usage :GrepBuffers [pattern] [matchCase] [matchWholeWord] [prefix]'
        return
    endif

    let l:matchCase = 0
    if a:0 >= 2
        if a:2 !~ '^\d\+$' || a:2 > 1 || a:2 < 0
            throw "ArgumentException: matchCase value '" . a:2 . "' is not in the bounds [0,1]."
        endif
        let l:matchCase = a:2
    endif

    let l:matchWholeWord = 0
    if a:0 >= 3
        if a:3 !~ '^\d\+$' || a:3 > 1 || a:3 < 0
            throw "ArgumentException: matchWholeWord value '" . a:3 . "' is not in the bounds [0,1]."
        endif
        let l:matchWholeWord = a:3
    endif

    let l:prefix = 'c'
    if a:0 >= 4
        if a:4 != 'c' && a:4 != 'l'
            throw "ArgumentException: prefix value '" . a:4 . "' is not 'c' or 'l'."
        endif
        let l:prefix = a:4
    endif

    let ignorecase = &ignorecase
    let &ignorecase = l:matchCase == 0
    try
        if l:prefix == 'c'
            let l:vimgrep = 'vimgrep'
        elseif l:prefix == 'l'
            let l:vimgrep = 'lvimgrep'
        endif

        if l:matchWholeWord
            let l:pattern = '\<' . l:pattern . '\>'
        endif

        let str = 'silent ' . l:vimgrep . ' /' . l:pattern . '/'

        for buf in getbufinfo()
            if buflisted(buf.bufnr) " Skips unlisted buffers because they are not used for normal editing
                if !bufexists(buf.bufnr)
                    throw 'Buffer does not exist: "' . buf.bufnr . '"'
                elseif empty(bufname(buf.bufnr)) && getbufvar(buf.bufnr, '&buftype') != 'quickfix'
                    if len(getbufline(buf.bufnr, '2')) != 0 || strlen(getbufline(buf.bufnr, '1')[0]) != 0
                        echohl warningmsg | echomsg 'Skipping unnamed buffer: [' . buf.bufnr . ']' | echohl normal
                    endif
                else
                    let str = str . ' ' . fnameescape(bufname(buf.bufnr))
                endif
            endif
        endfor

        try
            execute str
        catch /^Vim\%((\a\+)\)\=:E\%(683\|480\):/ "E683: File name missing or invalid pattern --- E480: No match:
            " How do you want to handle this exception?
            echoerr v:exception
            return
        endtry

        execute l:prefix . 'window'
    "catch /.*/
    finally
        let &ignorecase = ignorecase
    endtry
endfunction

Answer 2

首先，在数据框中添加一个标志列，以指示Open是否高于目标价格50（真或假）。

然后，您可以使用compare-cumsum-groupby pattern来识别此标记的累积分组，并将cumsum应用于每个此类组。

我们现在需要反转标志，使其为零，零为1，然后使用相同的策略计算rdb。

最后，我们删除flag列（我使用.iloc[:, :-1]删除它，因为我将其添加为最后一列）并附加新的RDA和RDB列

target_price = 50
df = df.assign(flag=df.Open.gt(target_price))  # True if `Open` greater than `target_price`, otherwise False.

rda = df.groupby((df['flag'] != df['flag'].shift()).cumsum()).flag.cumsum()
df['flag'] = ~df['flag']  # Invert flag for RDB.
rdb = df.groupby((df['flag'] != df['flag'].shift()).cumsum()).flag.cumsum()

df = df.iloc[:, :-1].assign(RDA=rda, RDB=rdb)
>>> df
      Date   Open   High    Low  Close  Volume  RDA  RDB
0  8/19/04  51.96  51.98  47.93  50.12     NaN    1    0
1  8/20/04  50.69  54.49  50.20  54.10     NaN    2    0
2  8/23/04  55.32  56.68  54.47  54.65     NaN    3    0
3  8/24/04  45.56  55.74  51.73  52.38     NaN    0    1
4  8/25/04  42.43  53.95  51.89  52.95     NaN    0    2
5  8/26/04  41.96  51.98  47.93  50.12     NaN    0    3
6  8/27/04  40.69  54.49  50.20  54.10     NaN    0    4
7  8/28/04  55.32  56.68  54.47  54.65     NaN    1    0
8  8/29/04  55.56  55.74  51.73  52.38     NaN    2    0
9  8/30/04  52.43  53.95  51.89  52.95     NaN    3    0

Answer 3

这也可以使用Python提供的functools.reduce方法完成。首先创建一个可迭代的目标数据，所以在你的情况下：

target = df.Open > 50

这将是您稍后将functools.reduce传递给＆＃34;减少＆＃34;。 Reduce基本上是map，但在列表元素中保留一个值。这可以用来做你想要的。

我会尝试分解您可以使用的功能（在帖子末尾完整显示）。

functools.reduce可让您访问两个参数。您的累计值以及您当前的列表项。它还允许您传入自己的初始化程序（在查看任何内容之前的第一项）。有了这个，我们可以查看我们的列表，如果它是True，由上面的目标系列确定，我们可以在列表的最后一个元素中加1，否则在我们的累加器中添加0

通过将初始化程序设置为0中的值为[0]的列表，这需要一点点精力，因此在第一遍中它可以采用＆＃34; last＆＃ 34;元素，并做一些事情，而不是错误。

完成后，列表前面会出现一个拖尾者0，您可以使用切片[1:]删除该切片器RDB，仅使用第二个元素及之后的颜色。

您的True列完全相同，但您要确保目标列表中不是not，只需要在条件语句中添加import functools # Create a boolean series of your Open column target = df.Open > 50 # For every item in your boolean series add a 1 to the previous value if it's over 50, otherwise reset df['RDA'] = functools.reduce(lambda x, y: x + ([x[-1] + 1] if y else [0]), target, [0])[1:] # Repeat, but for every `False` value in the series df['RDB'] = functools.reduce(lambda x, y: x + ([x[-1] + 1] if not y else [0]), target, [0])[1:] >>> df.head() Open High Low Close Volume RDA RDB Date 2004-08-19 49.96 51.98 47.93 50.12 NaN 0 1 2004-08-20 50.69 54.49 50.20 54.10 NaN 1 0 2004-08-23 55.32 56.68 54.47 54.65 NaN 2 0 2004-08-24 55.56 55.74 51.73 52.38 NaN 3 0 2004-08-25 52.43 53.95 51.89 52.95 NaN 4 0。< / p>

完整代码如下所示：

{  
"compilerOptions":{  
  "allowSyntheticDefaultImports":true,
  "declaration":false,
  "emitDecoratorMetadata":true,
  "experimentalDecorators":true,
  "lib":[  
     "dom",
     "es2015"
  ],
  "module":"es2015",
  "moduleResolution":"node",
  "sourceMap":true,
  "target":"es5"
},
"include":[  
   "src/**/*.ts"
],
"exclude":[  
   "node_modules"
],
"compileOnSave":false,
"atom":{  
   "rewriteTsconfig":false
 }

Answer 4

<bean id="geoFilter"
    class="com.package.CountryGeoFilter" />

Answer 5

我最初误解了，并没有意识到你想要在它发生变化时重新启动计数。我认为最好的方法是在行中使用DataFrame.apply，如下所示：

In [226]: def increment(row):
     ...:     global rda
     ...:     global rdb
     ...:     if row.Open > 50:
     ...:         row.RDA = int(next(rda))
     ...:         rdb = count()
     ...:     else:
     ...:         row.RDB = next(rdb)
     ...:         rda = int(count())
     ...:     return row
In [227]: df['RDA'] = 0
In [228]: df['RDB'] = 0
In [229]: df.apply(increment, axis=1)
             Open   High    Low  Close  Volume  RDA  RDB
Date                                                    
2004-08-19  49.96  51.98  47.93  50.12     NaN  0.0  1.0
2004-08-20  50.69  54.49  50.20  54.10     NaN  0.0  0.0
2004-08-23  55.32  56.68  54.47  54.65     NaN  1.0  0.0
2004-08-24  55.56  55.74  51.73  52.38     NaN  2.0  0.0
2004-08-25  52.43  53.95  51.89  52.95     NaN  3.0  0.0

我不知道为什么他们会在列中出现漂浮物，我猜大熊猫认为这就是你想要的。数据最初来自count int。我通常不喜欢全局变量，但DataFrame.apply在变量超出increment函数时无法访问变量。

根据数据帧中的另一个单元格值更改单元格值

5 个答案: