迭代两个数据集以创建不同的结果数据集

时间:2017-10-31 19:32:56

标签: loops if-statement sas nested-loops

在SAS中,我有以下两个数据集:

数据集#1:人们用餐偏好的数据

   ID |  Meal   | Meal_rank
    1   Lobster       1
    1   Cake          2
    1   Hot Dog       3
    1   Salad         4
    1   Fries         5
    2   Burger        1
    2   Hot Dog       2
    2   Pizza         3
    2   Fries         4
    3   Hot Dog       1
    3   Salad         2
    3   Soup          3
    4   Lobster       1
    4   Hot Dog       2
    4   Burger        3

数据集#2:有关膳食可用性的数据

  Meal   | Units_available
  Hot Dog     2
  Burger      1
  Pizza       2

在SAS中,我想找到一种方法来派生如下所示的结果数据集(不更改数据集#1或#2中的任何内容):

   ID |  Assigned_Meal
    1   Hot Dog
    2   Burger
    3   Hot Dog
    4   Meal cannot be assigned (out of stock/unavailable)

结果由一个过程驱动,该过程遍历每个人的膳食(由他们的“ID”值确定),直到:

  1. 在有足够单位的地方找到一顿饭。
  2. 已根据可用性数据检查所有餐点。
  3. 值得注意的是:

    1. 有些人列出了无法用餐的情况。
    2. 我正在使用的数据集远远大于此示例(数千行)。

      以下是用于创建两个样本数据集的SAS代码:

          proc sql;
             create table work.ppl_meal_pref
                 (ID char(4),
                  Meal char(20),
                  Meal_rank num);
      
          insert into work.ppl_meal_pref
              values('1','Lobster',1)
              values('1','Cake',2)
              values('1','Hot Dog',3)
              values('1','Salad',4)
              values('1','Fries',5)
              values('2','Burger',1)
              values('2','Hot Dog',2)
              values('2','Pizza',3)
              values('2','Fries',4)
              values('3','Hot Dog',1)
              values('3','Salad',2)
              values('3','Soup',3)
              values('4','Lobster',1)
              values('4','Hot Dog',2)
              values('4','Burger',3)
              ;
          quit;
          run;
      
          proc sql;
             create table work.lunch_menu
                 (FoodName char(14),
                  Units_available num);
      
          insert into work.lunch_menu
              values('Hot Dog',2)
              values('Burger',1)
              values('Pizza',1)
              ;
          quit;
          run;
      

      我试图实现循环来执行此任务,但无济于事(见下文)。

              data work.assign_meals;
      
          length FoodName $ 14 Units_available 8;
          if (_n_ = 1) then do;
              declare hash lookup(dataset:'work.lunch_menu', duplicate: 'error', ordered: 'ascending', multidata: 'NO');
                  lookup.defineKey('FoodName');
                  lookup.defineData('Units_available');
                  lookup.defineDone();
          end;
      
          do until (eof_pref);
              set work.ppl_meal_pref END = eof_pref;
              rc = lookup.FIND();
              IF rc ne 0 THEN DO;
                  Units_available = 0;
              end;
          output;
          end;
          stop;
          run;
      

3 个答案:

答案 0 :(得分:1)

另一种方法:modify - 随时随地使用膳食可用性数据集。这比哈希方法稍微简洁一些,但可能效果不是很好。另一方面,即使您的lunch_menu数据集太大而无法方便地存入记忆中,它仍然可以工作,并且您可以记录之后遗留的膳食。我已重命名变量以确保输入数据集之间的一致性:

proc sql;
   create table work.ppl_meal_pref
       (ID char(4),
        Food char(20),
        Meal_rank num);

insert into work.ppl_meal_pref
    values('1','Lobster',1)
    values('1','Cake',2)
    values('1','Hot Dog',3)
    values('1','Salad',4)
    values('1','Fries',5)
    values('2','Burger',1)
    values('2','Hot Dog',2)
    values('2','Pizza',3)
    values('2','Fries',4)
    values('3','Hot Dog',1)
    values('3','Salad',2)
    values('3','Soup',3)
    values('4','Lobster',1)
    values('4','Hot Dog',2)
    values('4','Burger',3)
    ;
quit;
run;

proc sql;
   create table work.lunch_menu
       (Food char(20),
        Units_available num);

insert into work.lunch_menu
    values('Hot Dog',2)
    values('Burger',1)
    values('Pizza',1)
    ;
quit;
run;

proc datasets lib = work nolist nowarn nodetails;
    modify lunch_menu;
    index create Food /unique;
    run;
quit;

/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
    length AssignedFood $ 20;
    do until(last.ID);
        set ppl_meal_pref;
        by ID;
        if missing(AssignedFood) then do;
            modify lunch_menu key = Food;
            if _iorc_ then _error_ = 0;
            else if units_available > 0 then do;
                AssignedFood = Food;
                AssignedFoodRank = Meal_Rank;
                units_available + -1;
                replace lunch_menu;
            end;
        end;
    end;
    output assigned_meals;  
run;

答案 1 :(得分:1)

这是使用来自ealfons1的示例数据的基于哈希的代码。为密钥设置不同的变量名(Meal与FoodName)意味着您必须在FIND()中使用额外的语法(或者您可以在SET或DATASET说明符中重命名)

它还将输出更新的库存水平数据集。跟踪未分配的条件,即对于未获得膳食分配的每个ID,哪些偏好已经用完/没有库存,将需要额外的代码和输出数据。

data meal_assignments;
  if 0 then set meals_stock; * prep PDV;
  declare hash stock (dataset:'meals_stock');
  stock.defineKey('FoodName');
  stock.defineData('FoodName', 'Units_available');
  stock.defineDone();

  do until (lastrow_flag);
    assigned = 0;
    stocked = 0;
    do until (last.ID);
      set ppl_meal_pref end=lastrow_flag;
      by ID Meal_rank; * error will happen if meal_rank is not monotonic;
      if assigned then continue; * alread assigned;
      if stock.find(key:Meal) ne 0 then continue; * off the menu;
      stocked = 1;
      if Units_available <  1 then continue; * out of stock or missing count;
      Units_available + (-1);
      if stock.replace() = 0 then do; * hash replace worked;
        assigned = 1;
        OUTPUT;
      end;
      else put 'WARNING: Problem with stock hash ' Meal=;
    end;
    if not assigned then do;
      if stocked then Meal = 'Ran out'; else Meal = 'Not stocked';
      OUTPUT;
    end;
  end;

  keep ID Meal;

  stock.output(dataset:'meals_stock_after_assignments');

  stop;
run;

options nocenter;
title "Meals report";
proc print noobs data=meal_assignments; title2 "Assignments";
proc print noobs data=meals_stock_after_assignments; title2 "New stock levels";
proc sql;
  title2 "Usage summary";
  select A.Meal, A.have_count, B.had_count, B.had_count - A.have_count as use_count
  from 
  (select FoodName as Meal, Units_available as have_count from meals_stock_after_assignments) as A
  join 
  (select FoodName as Meal, Units_available as had_count from meals_stock) as B
  on A.Meal = B.Meal
  ;
quit;

&#39;想要&#39;这里是基于队列的:

  • 先到先得,优先等级解决方案。
    • ID上的随机排队顺序可以提供一定程度的感知&#39;公平性

更困难的解决方案将基于全球规划,例如:

  • 为大多数人提供最高优先级
  • 为大多数人服务,成本最低
  • 等...

答案 2 :(得分:0)

我之前从未使用过哈希表的替换功能而且我没有测试过这段代码,但根据我的理解,这应该可以完成这项工作:

/* build a dataset assign_meals with variables ID and Assigned_Meal */
data work.assign_meals (keep=ID Assigned_Meal);

    /* Do that while reading ppl_meal_pref */
    set work.ppl_meal_pref;
    /* Take care can use first.ID to know you start a new ID */
    by ID;
    /* Remember if someone is served (without retain, SAS forgets all values when reading a new observation) */
    retain served;
    if first.ID then served = 0;

    /* but first read lunch_menu into memory */
    length FoodName $ 14 Units_available 8;
    if (_n_ = 1) then do;
        declare hash lookup(dataset:'work.lunch_menu', 
            duplicate: 'error', 
            ordered: 'ascending', 
            multidata: 'NO');

        lookup.defineKey('FoodName');
        lookup.defineData('Units_available');
        lookup.defineDone();
    end;

    if not served then do;
        /* Look up if the desired meal is available */
        rc = lookup.FIND();
        IF rc eq 0 THEN DO;
            if Units_available gt 0 then do;
                /* Serve this customer */
                output;
                served = 1;
                Assigned_Meal= Meal;

                /* Remember the a meal is used */
                Units_available = Units_available - 1;
                lookup.REPLACE();
            end;
        end;
    end;
run;

我目前没有时间对其进行测试。如果它不起作用,请告诉我,所以我可以稍后再这样做。