我是Pandas的新手,目前我有这样的系列:
import pandas as pd
index = [x for x in range(75860, 76510, 10)]
# number of occurrence
value = [1, 1, 4, 6, 7, 7, 7, 7, 8, 7, 7, 7, 8, 6, 6, 7, 15, 23, 26, 30, 31, 28, 22, 22, 21, 19, 14, 15, 15, 14, 12, 12, 13, 14, 14, 15, 15, 19, 19, 23, 25, 34, 38, 39, 40, 41, 35, 35, 30, 26, 23, 23, 29, 25, 25, 25, 23, 21, 19, 16, 14, 7, 6, 4, 1]
sample_ser = pd.Series(value, index=index)
该系列表示度量以及已经计算了多少次。
我正在尝试计算自定义参数,但是我正在使用标准的python for循环,我想知道是否有更好的方法可以完成此操作,这是其中之一。
感谢帮助。
# return limits where 68% of total count took place
# starting from most_counted length we add the highest count closest to most_counted length
# if 2 count are equal we look for the next label, the one with highest count is choose
def active_area(sample_ser):
# this is the label we have the most occurrence
most_counted = 76310
target = sample_ser.sum()*0.68
total_count = 0
high_label = most_counted + 10
low_label = most_counted - 10
while total_count < target:
# index out of bound
if low_label < sample_ser.index[0]:
total_count += sample_ser[high_label]
high_label += 10
continue
# index out of bound
if high_label >= sample_ser.index[-1]:
total_count += sample_ser[low_label]
low_label -= 10
continue
h_len = sample_ser[high_label]
l_len = sample_ser[low_label]
if h_len > l_len:
total_count += h_len
high_label += 10
continue
if h_len < l_len:
total_count += l_len
low_label -= 10
continue
if h_len == l_len:
counter = 10
while True:
temp_high = high_label+counter
temp_low = low_label-counter
if temp_low < sample_ser.index[0]:
total_count += h_len
high_label += 10
break
if temp_high >= sample_ser.index[-1]:
total_count += l_len
low_label -= 10
break
h_len_temp = sample_ser[temp_high]
l_len_temp = sample_ser[temp_low]
if h_len_temp > l_len_temp:
total_count += h_len
high_label += 10
break
if h_len_temp < l_len_temp:
total_count += l_len
low_label -= 10
break
if h_len_temp == l_len_temp:
counter += 10
continue
if low_label < sample_ser.index[0]:
low_label = sample_ser.index[0]
if high_label >= sample_ser.index[-1]:
high_label = sample_ser.index[-1]
return high_label, low_label
编辑:从开始的问题中删除了4个循环中的3个,让您更容易回答
答案 0 :(得分:0)
尝试以下(在我看来,更多为pythonic)脚本。
我添加了一些测试打印输出。在最终版本中,将其删除 并将主要的processintg部分转换为函数。
import pandas as pd
def nxt(ser, kk : int):
"""Get key / value from ser for key == kk. If the given key absent, return (-1, 0)"""
if kk in ser.index:
val = ser[kk]
return (kk, val)
else:
return (-1, 0)
# Create test Series
index = range(75860, 76510, 10)
value = [1, 1, 4, 6, 7, 7, 7, 7, 8, 7,
7, 7, 8, 6, 6, 7, 15, 23, 26, 30,
31, 28, 22, 22, 21, 19, 14, 15, 15, 14,
12, 12, 13, 14, 14, 15, 15, 19, 19, 23,
25, 34, 38, 39, 40, 41, 35, 35, 30, 26,
23, 23, 29, 25, 25, 25, 23, 21, 19, 16,
14, 7, 6, 4, 1]
sample_ser = pd.Series(value, index=index)
# Processing
target = sample_ser.sum()*0.68 # Target limit
# Index of the max value. Low / high indices start also from here
idmax = low_ind = high_ind = sample_ser.idxmax()
trg = sample_ser[idmax] # The max value
while 1:
# Get index / value for elements before / after the current range
l_ind, l_val = nxt(sample_ser, low_ind - 10)
h_ind, h_val = nxt(sample_ser, high_ind + 10)
# Diagnostic printout - part 1
print(f'L: {l_ind:5} {l_val:2} R: {h_ind:5} {h_val:2}', end=' ')
if (l_ind >= 0) and (l_val > h_val) and (trg + l_val <= target):
# Previous element found, previous value higher,
# sum of values within the target limit
trg += l_val # Add the current (left) value
low_ind = l_ind # Set new lower index
side = 'Left:' # For diagnostic printout
elif (h_ind >= 0) and (trg + h_val) <= target:
# Next element found, sum of values within the target limit
trg += h_val # Add the current (right) value
high_ind = h_ind # Set new upper index
side = 'Right:' # For diagnostic printout
else:
print() # Diagnostic printout - instead of part 2
break
# Diagnostic printout - part 2
print(f'{side:<6} {low_ind:5} {high_ind:5} {trg:3}')
print(f'Result: {low_ind:5} {high_ind:5} {trg:3}')