根据计数总和将记录拆分为存储桶

时间:2014-04-11 21:59:01

标签: oracle oracle11g

我的桌子如下所示。我需要找到一种方法来根据一个计数总和挑选出电话号码(这个数字总是不同,但在本例中我们使用130)。

因此,其中一个解决方案是第1行到第5行和第11行(如果从这些行中添加CountOfPeople值,您将得到130)。或1-4,6,7,9,11,12。无论选择哪个电话号码都没关系,只要总数是130。

有时你可能无法准确地得到130,所以“尽可能接近但不超过”将成为规则。

有办法做到这一点吗?

AutoID  Phone Number    Count Of People
1   5565787 57
2   2342343 30
3   2654456 17
4   3868556 12
5   9856756 12
6   9756456 4
7   4346365 4
8   2376743 3
9   9756343 3
10  2524349 3
11  2029393 2
12  9285656 1

3 个答案:

答案 0 :(得分:3)

我不确定纯SQL可以解决问题。但您可以使用table functions。这是您的问题的一个小例子。 首先,我们需要创建表类型:

create type t_bucket_row as object(
    phone_number varchar2(10),
    count_of_people number,
    bucket_no number);
/
create type t_bucket_table as table of t_bucket_row; 
/

包含测试数据的表格:

create table test_data as 
with t as (
  select 1 AutoID, '5565787' Phone_Number, 57 Count_Of_People from dual union all
  select 2,   '2342343', 30 from dual union all
  select 3,   '2654456', 17 from dual union all
  select 4,   '3868556', 12 from dual union all
  select 5,   '9856756', 12 from dual union all
  select 6,   '9756456', 4 from dual union all
  select 7,   '4346365', 4 from dual union all
  select 8,   '2376743', 3 from dual union all
  select 9,   '9756343', 3 from dual union all
  select 10,  '2524349', 3 from dual union all
  select 11,  '2029393', 2 from dual union all
  select 12,  '9285656', 1 from dual)
select * from t;

然后我们创建一个实现客户端分发算法的函数(对不起,代码中没有注释它是如何工作的,但是它有效;如果需要,我可以稍后编写)。在这里,我们创建一个表类型的变量,用手机和桶号填充它,然后从函数返回它。之后,在SQL查询中,我们将函数的结果用作FROM子句中的表。参数p_sum是您希望的客户计数总和:

create or replace function get_buckets(p_sum number) return t_bucket_table is
  buckets t_bucket_table := t_bucket_table();
  type bucket_sums is table of number index by binary_integer;
  sums bucket_sums;
  counter number := 0;
  found boolean;
begin
  sums(1) := 0;

-- next line was edited to fix bug in resuult of distribution:
  for i in (select t.*, rownum from test_data t order by t.count_of_people desc) loop
    buckets.extend;
    counter := counter + 1;
    buckets(counter) := t_bucket_row(i.phone_number, i.count_of_people, 0);

    if i.count_of_people > p_sum then
       continue;
    end if;

    found := false;
    for j in 1..sums.count loop
      if sums(j) + i.count_of_people <= p_sum then
         sums(j) := sums(j) + i.count_of_people;
         buckets(counter).bucket_no := j;
         found := true;
         exit;
      end if;
    end loop;
    if not found then
       sums(sums.count + 1) := i.count_of_people;
       buckets(counter).bucket_no := sums.count;
    end if;

  end loop; 

  return buckets;
end;
/

现在我们可以执行此功能了。结果是:

SQL> select * from table(get_buckets(130));

PHONE_NUMB COUNT_OF_PEOPLE  BUCKET_NO
---------- --------------- ----------
5565787                 57          1
2342343                 30          1
2654456                 17          1
3868556                 12          1
9856756                 12          1
9756456                  4          2
4346365                  4          2
2376743                  3          2
9756343                  3          2
2524349                  3          2
2029393                  2          1
9285656                  1          2

12 rows selected.

Buckets发布:

select bucket_no, sum(count_of_people) from table(get_buckets(130)) group by bucket_no;

 BUCKET_NO SUM(COUNT_OF_PEOPLE)
---------- --------------------
        1           130
        2            18

如果count_of_people超过p_sum,则会转到存储桶“0”:

SQL> select * from table(get_buckets(35));

PHONE_NUMB COUNT_OF_PEOPLE  BUCKET_NO
---------- --------------- ----------
5565787                 57          0
2342343                 30          1
2654456                 17          2
3868556                 12          2
9856756                 12          3
9756456                  4          1
4346365                  4          2
2376743                  3          3
9756343                  3          3
2524349                  3          3
2029393                  2          2
9285656                  1          1

12 rows selected.

SQL> select bucket_no, sum(count_of_people) from table(get_buckets(35)) group by bucket_no;

 BUCKET_NO SUM(COUNT_OF_PEOPLE)
---------- --------------------
         1                   35
         2                   35
         3                   21
         0                   57

答案 1 :(得分:3)

对于&#34;第一桶&#34;解决方案这是递归子查询因子中的一个很好的练习。以下查询为您提供了这样一个存储桶(尽管电话号码连接到一个字符串):

with source$ as (
    select 1 as AutoID, '5565787' as Phone_Number, 12 as Count_Of_People from dual union all
    select 2, '2342343', 3 from dual union all
    select 3, '2654456', 1 from dual union all
    select 4, '3868556', 12 from dual union all
    select 5, '9856756', 4 from dual union all
    select 6, '9756456', 4 from dual union all
    select 7, '4346365', 57 from dual union all
    select 8, '2376743', 3 from dual union all
    select 9, '9756343', 3 from dual union all
    select 10, '2524349', 30 from dual union all
    select 11, '2029393', 2 from dual union all
    select 12, '9285656', 17 from dual
),
permutator$ (autoid, phone_number, count_of_people, autoid_list, phone_number_list, count_of_people_sum, count_of_people_list) as (
    select S.autoid, phone_number, count_of_people,
        to_char(autoid), cast(phone_number as varchar2(4000)), count_of_people, to_char(count_of_people)
    from source$ S
    union all
    select S.autoid, S.phone_number, S.count_of_people,
        P.autoid_list||'|'||S.autoid, P.phone_number_list||'|'||S.phone_number, P.count_of_people_sum + S.count_of_people, P.count_of_people_list||'+'||S.count_of_people
    from permutator$ P
        join source$ S
            on S.autoid > P.autoid
    where P.count_of_people_sum + S.count_of_people <= 130
)
search depth first by autoid asc set siblings_order$,
priority_ordered$ as (
    select P.*,
        row_number() over (partition by null order by abs(count_of_people_sum-130), siblings_order$ asc) as your_best_call$
    from permutator$ P
)
select autoid_list, phone_number_list, count_of_people_sum, count_of_people_list
from priority_ordered$
where your_best_call$ = 1
;

...如果你想要一个原始项目的逐行列表,那么替换最后一个......

select autoid_list, phone_number_list, count_of_people_sum, count_of_people_list
from priority_ordered$
where your_best_call$ = 1
;

...与......

select autoid, count_of_people, phone_number
from priority_ordered$ PO
start with your_best_call$ = 1
connect by PO.autoid_list||'|'||prior PO.autoid = prior PO.autoid_list
;

在Oracle的对象关系功能的帮助下,电话号码集合可以以非常优雅的方式由收集器对象(通过成员方法将数据收集到其成员集合属性,返回的对象)解析它的一个新实例)。此解决方案的一个小型SQL * Plus假脱机示例:

SQL> set verify off

SQL> define maxcountofpeoplesum = 130
SQL> @@23023283-split-records-into-buckets-based-on-a-sum-of-counts.sql

COUNT_OF_PEOPLE_SUM     AUTOID PHONE_NUMBER    COUNT_OF_PEOPLE
------------------- ---------- --------------- ---------------
                130          1 5565787                      12
                130          2 2342343                       3
                130          3 2654456                       1
                130          5 9856756                       4
                130          6 9756456                       4
                130          7 4346365                      57
                130         10 2524349                      30
                130         11 2029393                       2
                130         12 9285656                      17

9 rows selected.

SQL> define maxcountofpeoplesum = 15
SQL> @@23023283-split-records-into-buckets-based-on-a-sum-of-counts.sql

COUNT_OF_PEOPLE_SUM     AUTOID PHONE_NUMBER    COUNT_OF_PEOPLE
------------------- ---------- --------------- ---------------
                 15          1 5565787                      12
                 15          2 2342343                       3

SQL> define maxcountofpeoplesum = 200
SQL> @@23023283-split-records-into-buckets-based-on-a-sum-of-counts.sql

COUNT_OF_PEOPLE_SUM     AUTOID PHONE_NUMBER    COUNT_OF_PEOPLE
------------------- ---------- --------------- ---------------
                148          1 5565787                      12
                148          2 2342343                       3
                148          3 2654456                       1
                148          4 3868556                      12
                148          5 9856756                       4
                148          6 9756456                       4
                148          7 4346365                      57
                148          8 2376743                       3
                148          9 9756343                       3
                148         10 2524349                      30
                148         11 2029393                       2
                148         12 9285656                      17

12 rows selected.

SQL> define maxcountofpeoplesum = 147
SQL> @@23023283-split-records-into-buckets-based-on-a-sum-of-counts.sql

COUNT_OF_PEOPLE_SUM     AUTOID PHONE_NUMBER    COUNT_OF_PEOPLE
------------------- ---------- --------------- ---------------
                147          1 5565787                      12
                147          2 2342343                       3
                147          4 3868556                      12
                147          5 9856756                       4
                147          6 9756456                       4
                147          7 4346365                      57
                147          8 2376743                       3
                147          9 9756343                       3
                147         10 2524349                      30
                147         11 2029393                       2
                147         12 9285656                      17

11 rows selected.

我非常确定可以增强查询以查询所有存储桶,正如Dmitry的解决方案所做的那样,但这会导致更加严重且可能性能很差的查询。德米特里的解决方案对您的问题看起来更简单,更直接。

享受。

答案 2 :(得分:3)

您也可以尝试使用User-Defined Aggregate function。将尝试以一个小例子展示你。 首先,我们需要创建表类型:

create or replace type TTN as table of number;
/

然后我们创建了需要实现的例程来定义用户定义的聚合函数。

create or replace type TO_BALANCED_BUCKET as object
(
   summ TTN,
   result int,

   static function ODCIAggregateInitialize(sctx in out nocopy TO_BALANCED_BUCKET) return number,

   member function ODCIAggregateIterate(self in out nocopy TO_BALANCED_BUCKET, value in number)
      return number,

   member function ODCIAggregateTerminate(self in TO_BALANCED_BUCKET,
                                          returnValue out number,
                                          flags in number) return number,

   member function ODCIAggregateMerge(self in out nocopy TO_BALANCED_BUCKET, ctx2 in TO_BALANCED_BUCKET)
      return number
)
/
create or replace type body TO_BALANCED_BUCKET is

   static function ODCIAggregateInitialize(sctx in out nocopy TO_BALANCED_BUCKET) return number is
   begin
      sctx := TO_BALANCED_BUCKET(TTN(0), 1);
      return ODCIConst.Success;
   end;

   member function ODCIAggregateIterate(self in out nocopy TO_BALANCED_BUCKET, value in number)
      return number is      
      b_FoundGroup boolean := false;
   begin
      if value > 130 then
         result := 0;
      else         
         for li in 1..summ.count loop
             if summ(li) + value <= 130 then
                b_FoundGroup := true;
                summ(li) := summ(li) + value;
                result := li;   
                exit;
             end if;
         end loop;         
         if not b_FoundGroup then
            summ.extend;
            summ(summ.count) := value;            
            result := summ.count;
         end if;         
      end if;  
      return ODCIConst.Success;
   end;

   member function ODCIAggregateTerminate(self in TO_BALANCED_BUCKET,
                                          returnValue out number,
                                          flags in number) return number is
   begin
      returnValue := self.result;      
      return ODCIConst.Success;
   end;

   member function ODCIAggregateMerge(self in out nocopy TO_BALANCED_BUCKET, ctx2 in TO_BALANCED_BUCKET)
      return number is
   begin
      return ODCIConst.Error;
   end;

end;
/

然后我们自己创建聚合函数。

create or replace function balanced_bucket(input number) return number
   parallel_enable
   aggregate using TO_BALANCED_BUCKET; 
/

最后查询本身

with test_data as (
    select 1 as AutoID, '5565787' as Phone_Number, 12 as Count_Of_People from dual union all
    select 2, '2342343', 3 from dual union all
    select 3, '2654456', 1 from dual union all
    select 4, '3868556', 12 from dual union all
    select 5, '9856756', 4 from dual union all
    select 6, '9756456', 4 from dual union all
    select 7, '4346365', 57 from dual union all
    select 8, '2376743', 3 from dual union all
    select 9, '9756343', 3 from dual union all
    select 10, '2524349', 30 from dual union all
    select 11, '2029393', 2 from dual union all
    select 12, '9285656', 17 from dual
)
select t.phone_number, t.count_of_people, 
       balanced_bucket(t.count_of_people) over(order by t.count_of_people desc) balanced_bucket
  from test_data t

希望此解决方案有所帮助。客户的分配算法是Dmity&#39; s。