熊猫-如何使用熊猫编写一个更好的for / while循环

时间:2018-11-17 10:19:24

标签: python pandas

我是Pandas的新手,目前我有这样的系列:

import pandas as pd  

index = [x for x in range(75860, 76510, 10)]
# number of occurrence
value = [1, 1, 4, 6, 7, 7, 7, 7, 8, 7, 7, 7, 8, 6, 6, 7, 15, 23, 26, 30, 31, 28, 22, 22, 21, 19, 14, 15, 15, 14, 12, 12, 13, 14, 14, 15, 15, 19, 19, 23, 25, 34, 38, 39, 40, 41, 35, 35, 30, 26, 23, 23, 29, 25, 25, 25, 23, 21, 19, 16, 14, 7, 6, 4, 1]

sample_ser = pd.Series(value, index=index)

该系列表示度量以及已经计算了多少次。

我正在尝试计算自定义参数,但是我正在使用标准的python for循环,我想知道是否有更好的方法可以完成此操作,这是其中之一。

感谢帮助。

# return limits where 68% of total count took place
# starting from most_counted length we add the highest count closest to most_counted length
# if 2 count are equal we look for the next label, the one with highest count is choose

def active_area(sample_ser):

    # this is the label we have the most occurrence
    most_counted = 76310

    target = sample_ser.sum()*0.68

    total_count = 0

    high_label = most_counted + 10
    low_label = most_counted - 10

    while total_count < target:
        # index out of bound
        if low_label < sample_ser.index[0]:
            total_count += sample_ser[high_label]
            high_label += 10
            continue
        # index out of bound
        if high_label >= sample_ser.index[-1]:
            total_count += sample_ser[low_label]
            low_label -= 10
            continue

        h_len = sample_ser[high_label]
        l_len = sample_ser[low_label]

        if h_len > l_len:
            total_count += h_len
            high_label += 10
            continue

        if h_len < l_len:
            total_count += l_len
            low_label -= 10
            continue

         if h_len == l_len:
            counter = 10
            while True:

                temp_high = high_label+counter
                temp_low = low_label-counter

                if temp_low < sample_ser.index[0]:
                    total_count += h_len
                    high_label += 10
                    break

                if temp_high >= sample_ser.index[-1]:
                    total_count += l_len
                    low_label -= 10
                    break

                h_len_temp = sample_ser[temp_high]
                l_len_temp = sample_ser[temp_low]

                if h_len_temp > l_len_temp:
                    total_count += h_len
                    high_label += 10
                    break

                if h_len_temp < l_len_temp:
                    total_count += l_len
                    low_label -= 10
                    break

                if h_len_temp == l_len_temp:
                    counter += 10
                    continue

    if low_label < sample_ser.index[0]:
        low_label = sample_ser.index[0]
    if high_label >= sample_ser.index[-1]:
        high_label = sample_ser.index[-1]

    return high_label, low_label

编辑:从开始的问题中删除了4个循环中的3个,让您更容易回答

1 个答案:

答案 0 :(得分:0)

尝试以下(在我看来,更多为pythonic)脚本。

我添加了一些测试打印输出。在最终版本中,将其删除 并将主要的processintg部分转换为函数。

import pandas as pd

def nxt(ser, kk : int):
    """Get key / value from ser for key == kk. If the given key absent, return (-1, 0)"""
    if kk in ser.index:
        val = ser[kk]
        return (kk, val)
    else:
        return (-1, 0)

# Create test Series
index = range(75860, 76510, 10)
value = [1, 1, 4, 6, 7, 7, 7, 7, 8, 7,
     7,  7,  8,  6,  6,  7, 15, 23, 26, 30,
    31, 28, 22, 22, 21, 19, 14, 15, 15, 14,
    12, 12, 13, 14, 14, 15, 15, 19, 19, 23,
    25, 34, 38, 39, 40, 41, 35, 35, 30, 26,
    23, 23, 29, 25, 25, 25, 23, 21, 19, 16,
    14,  7,  6,  4,  1]
sample_ser = pd.Series(value, index=index)

# Processing
target = sample_ser.sum()*0.68  # Target limit
# Index of the max value. Low / high indices start also from here
idmax = low_ind = high_ind = sample_ser.idxmax()
trg = sample_ser[idmax]    # The max value
while 1:
    # Get index / value for elements before / after the current range
    l_ind, l_val = nxt(sample_ser, low_ind - 10)
    h_ind, h_val = nxt(sample_ser, high_ind + 10)
    # Diagnostic printout - part 1
    print(f'L: {l_ind:5} {l_val:2}   R: {h_ind:5} {h_val:2}', end='    ')
    if (l_ind >= 0) and (l_val > h_val) and (trg + l_val <= target):
        # Previous element found, previous value higher,
        # sum of values within the target limit
        trg += l_val      # Add the current (left) value
        low_ind = l_ind   # Set new lower index
        side = 'Left:'    # For diagnostic printout
    elif (h_ind >= 0) and (trg + h_val) <= target:
        # Next element found, sum of values within the target limit
        trg += h_val      # Add the current (right) value
        high_ind = h_ind  # Set new upper index
        side = 'Right:'   # For diagnostic printout
    else:
        print()           # Diagnostic printout - instead of part 2
        break
    # Diagnostic printout - part 2
    print(f'{side:<6} {low_ind:5} {high_ind:5} {trg:3}')
print(f'Result: {low_ind:5} {high_ind:5} {trg:3}')