使用可迭代范围从df列中获取范围内的所有项目

时间:2019-07-29 22:03:01

标签: python pandas iterator range

我想创建一个可以应用于df列的函数,该函数将标识该列中的所有条目(“ C2017Value”),这些条目在范围列表(范围)中的任何范围内...并输出范围内的对应条目及其c值到结果字典{'c':C2017Value},如下所示:

results = {'c3': 268} #268 is within one of the ranges

我坚持使用代码,将不胜感激和反馈。

df #dataframe with two columns, 'c' and C2017Value
 'c1', 137674167
 'c2',  2166178
 'c3',  268

ranges = [
 (261, 4760),
 (12273391, 11104571063),
 (45695385, 4134339925),
 (15266178, 1376748162),
 (10106104, 97810284),
 (6492248, 588025190)
 ]

这是我尝试使用此功能的地方:

between_range = [c2017 for c2017
               in sorted(ranges)
               if ranges[0] <= value <= ranges[1]
               ][0]

def get_output_list(c2017value):
  output_list = []
  index = 0
  for c in df:
    if ranges[0][0] <= c2017value <= ranges[0][1]:
      output_list.append(c)
    else:
      index += 1
  return output_list

def get_output_list0(df, ranges):
  output_list = []
  index = 0
  for c in df:
    if c.column_value('C2017Value') == xrange() ranges[index]:
      output_list.append(c)
    else:
      index += 1
  return output_list

def get_output_list1(C2017Value):
    for x, y in sorted(ranges):
        if any(x <= C2017Value < y):
            for c in ms_df:
                output.append(c)

def get_output_list2(CValue):
    output = []
    ranges = get_ranges()
    for c in ms_df:
        ##if MINvalue<= CValue <=MAXvalue:
        if C2017Value in ranges(MINvalue, MAXvalue):
            return c
            output.append(c)
            break

def get_output_list3(C2017Value):
    ##ranges = get_ranges()
    for c in df:
        ##if MINvalue<= CValue <=MAXvalue:
        if CValue in ranges:
            return c

def get_output_list4(df, C2017Value, ranges[0:1]):
    ##ranges = get_ranges()
    for c in df_countries:
    ##if MINvalue<= CValue <=MAXvalue:
        if C2017Value in ranges:
        #if C2017Value in range(ranges):    
        #return c
            output.append(c)
            return output

def get_output_list5(C2017Value:
    for c in df_countries:
        for x in sorted(ranges):
            range_list = ranges[range_name]
            if any(start <= number < end for start,end):
                results.setdefault(range_name, 0) += 1

def get_output_list6(C2017Value):
    for c in ms_df:
        for x, y in sorted(ranges):
            if any(x <= C2017Value < y):
                output.append(c)

这两个可能是最有前途的尝试:

between_range = [c2017 for c2017
               in sorted(ranges)
               if ranges[0] <= value <= ranges[1]
               ][0]


def get_output_list(c2017value):
  output_list = []
  index = 0
  for c in df:
    if ranges[0][0] <= c2017value <= ranges[0][1]:
      output_list.append(c)
    else:
      index += 1
  return output_list

between_range收到以下错误消息: “ <='int'和'str'的实例之间不支持”

1 个答案:

答案 0 :(得分:0)

apply()与用于检查范围值的函数一起使用,我可以创建带有结果的新DF

def check_ranges(value):
    for a, b in ranges:
        if a <= value <= b:
            return True
    return False

results = df[ df['C2017Value'].apply(check_ranges) ]

工作代码:

import pandas as pd

df = pd.DataFrame([
        ['c1', 137674167],
        ['c2', 2166178],
        ['c3', 268],
     ], columns=['c', 'C2017Value'])

ranges = [
    (261, 4760),
    (12273391, 11104571063),
    (45695385, 4134339925),
    (15266178, 1376748162),
    (10106104, 97810284),
    (6492248, 588025190)
]

def check_ranges(value):
    for a, b in ranges:
        if a <= value <= b:
            return True
    return False

results = df[ df['C2017Value'].apply(check_ranges) ]

print(results)

结果:

   c   C2017Value
0  c1   137674167
2  c3         268

它也可以获取范围作为参数,然后需要lambda

def check_ranges(value, ranges):
    for a, b in ranges:
        if a <= value <= b:
            return True
    return False

results = df[ df['C2017Value'].apply(lambda x, r=ranges:check_ranges(x,r)) ]

编辑:类似的代码可以给出

    c  C2017Value                    range
0  c1   137674167  (12273391, 11104571063)
1  c2     2166178                     None
2  c3         268              (261, 4760)

它返回(a, b)而不是True,返回None而不是False(但是它可以返回FalseNaN

def get_range(value, ranges):
    for a, b in ranges:
        if a <= value <= b:
            return (a, b)
    return None

df['range'] = df['C2017Value'].apply(lambda x, r=ranges:get_range(x,r))

print(df)

工作代码:

import pandas as pd

df = pd.DataFrame([
        ['c1', 137674167],
        ['c2', 2166178],
        ['c3', 268],
     ], columns=['c', 'C2017Value'])

ranges = [
    (261, 4760),
    (12273391, 11104571063),
    (45695385, 4134339925),
    (15266178, 1376748162),
    (10106104, 97810284),
    (6492248, 588025190)
]

def get_range(value, ranges):
    for a, b in ranges:
        if a <= value <= b:
            return (a, b)
    return None

df['range'] = df['C2017Value'].apply(lambda x, r=ranges:get_range(x,r))

print(df)

results = df[ df['range'].notnull() ]

print(results)

结果:

    c  C2017Value                    range
0  c1   137674167  (12273391, 11104571063)
1  c2     2166178                     None
2  c3         268              (261, 4760)

    c  C2017Value                    range
0  c1   137674167  (12273391, 11104571063)
2  c3         268              (261, 4760)