在SAS中,我有以下两个数据集:
数据集#1:人们用餐偏好的数据
ID | Meal | Meal_rank
1 Lobster 1
1 Cake 2
1 Hot Dog 3
1 Salad 4
1 Fries 5
2 Burger 1
2 Hot Dog 2
2 Pizza 3
2 Fries 4
3 Hot Dog 1
3 Salad 2
3 Soup 3
4 Lobster 1
4 Hot Dog 2
4 Burger 3
数据集#2:有关膳食可用性的数据
Meal | Units_available
Hot Dog 2
Burger 1
Pizza 2
在SAS中,我想找到一种方法来派生如下所示的结果数据集(不更改数据集#1或#2中的任何内容):
ID | Assigned_Meal
1 Hot Dog
2 Burger
3 Hot Dog
4 Meal cannot be assigned (out of stock/unavailable)
结果由一个过程驱动,该过程遍历每个人的膳食(由他们的“ID”值确定),直到:
值得注意的是:
我正在使用的数据集远远大于此示例(数千行)。
以下是用于创建两个样本数据集的SAS代码:
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Meal char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(FoodName char(14),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
我试图实现循环来执行此任务,但无济于事(见下文)。
data work.assign_meals;
length FoodName $ 14 Units_available 8;
if (_n_ = 1) then do;
declare hash lookup(dataset:'work.lunch_menu', duplicate: 'error', ordered: 'ascending', multidata: 'NO');
lookup.defineKey('FoodName');
lookup.defineData('Units_available');
lookup.defineDone();
end;
do until (eof_pref);
set work.ppl_meal_pref END = eof_pref;
rc = lookup.FIND();
IF rc ne 0 THEN DO;
Units_available = 0;
end;
output;
end;
stop;
run;
答案 0 :(得分:1)
另一种方法:modify
- 随时随地使用膳食可用性数据集。这比哈希方法稍微简洁一些,但可能效果不是很好。另一方面,即使您的lunch_menu
数据集太大而无法方便地存入记忆中,它仍然可以工作,并且您可以记录之后遗留的膳食。我已重命名变量以确保输入数据集之间的一致性:
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Food char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(Food char(20),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc datasets lib = work nolist nowarn nodetails;
modify lunch_menu;
index create Food /unique;
run;
quit;
/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
length AssignedFood $ 20;
do until(last.ID);
set ppl_meal_pref;
by ID;
if missing(AssignedFood) then do;
modify lunch_menu key = Food;
if _iorc_ then _error_ = 0;
else if units_available > 0 then do;
AssignedFood = Food;
AssignedFoodRank = Meal_Rank;
units_available + -1;
replace lunch_menu;
end;
end;
end;
output assigned_meals;
run;
答案 1 :(得分:1)
这是使用来自ealfons1的示例数据的基于哈希的代码。为密钥设置不同的变量名(Meal与FoodName)意味着您必须在FIND()中使用额外的语法(或者您可以在SET或DATASET说明符中重命名)
它还将输出更新的库存水平数据集。跟踪未分配的条件,即对于未获得膳食分配的每个ID,哪些偏好已经用完/没有库存,将需要额外的代码和输出数据。
data meal_assignments;
if 0 then set meals_stock; * prep PDV;
declare hash stock (dataset:'meals_stock');
stock.defineKey('FoodName');
stock.defineData('FoodName', 'Units_available');
stock.defineDone();
do until (lastrow_flag);
assigned = 0;
stocked = 0;
do until (last.ID);
set ppl_meal_pref end=lastrow_flag;
by ID Meal_rank; * error will happen if meal_rank is not monotonic;
if assigned then continue; * alread assigned;
if stock.find(key:Meal) ne 0 then continue; * off the menu;
stocked = 1;
if Units_available < 1 then continue; * out of stock or missing count;
Units_available + (-1);
if stock.replace() = 0 then do; * hash replace worked;
assigned = 1;
OUTPUT;
end;
else put 'WARNING: Problem with stock hash ' Meal=;
end;
if not assigned then do;
if stocked then Meal = 'Ran out'; else Meal = 'Not stocked';
OUTPUT;
end;
end;
keep ID Meal;
stock.output(dataset:'meals_stock_after_assignments');
stop;
run;
options nocenter;
title "Meals report";
proc print noobs data=meal_assignments; title2 "Assignments";
proc print noobs data=meals_stock_after_assignments; title2 "New stock levels";
proc sql;
title2 "Usage summary";
select A.Meal, A.have_count, B.had_count, B.had_count - A.have_count as use_count
from
(select FoodName as Meal, Units_available as have_count from meals_stock_after_assignments) as A
join
(select FoodName as Meal, Units_available as had_count from meals_stock) as B
on A.Meal = B.Meal
;
quit;
&#39;想要&#39;这里是基于队列的:
更困难的解决方案将基于全球规划,例如:
答案 2 :(得分:0)
我之前从未使用过哈希表的替换功能而且我没有测试过这段代码,但根据我的理解,这应该可以完成这项工作:
/* build a dataset assign_meals with variables ID and Assigned_Meal */
data work.assign_meals (keep=ID Assigned_Meal);
/* Do that while reading ppl_meal_pref */
set work.ppl_meal_pref;
/* Take care can use first.ID to know you start a new ID */
by ID;
/* Remember if someone is served (without retain, SAS forgets all values when reading a new observation) */
retain served;
if first.ID then served = 0;
/* but first read lunch_menu into memory */
length FoodName $ 14 Units_available 8;
if (_n_ = 1) then do;
declare hash lookup(dataset:'work.lunch_menu',
duplicate: 'error',
ordered: 'ascending',
multidata: 'NO');
lookup.defineKey('FoodName');
lookup.defineData('Units_available');
lookup.defineDone();
end;
if not served then do;
/* Look up if the desired meal is available */
rc = lookup.FIND();
IF rc eq 0 THEN DO;
if Units_available gt 0 then do;
/* Serve this customer */
output;
served = 1;
Assigned_Meal= Meal;
/* Remember the a meal is used */
Units_available = Units_available - 1;
lookup.REPLACE();
end;
end;
end;
run;
我目前没有时间对其进行测试。如果它不起作用,请告诉我,所以我可以稍后再这样做。