在Base SAS中,我有一个带有哈希对象的脚本来进行表查找。条件如下。 表A是原始的主表,它将使用表B进行查找。 查找关键字是AssetName和Voltage。 电压始终为33或11。
因此,现在我们可以想象它是通过使用AssetName和Voltage从表B获取一些数据来进行表B的查找。
让我们看看我拥有的示例代码。
data ncpdm.ncp_load_re (drop=excp_code re_state re_supply_zone)
work.excp_ncp_load_re;
length excp_code $50 re_state re_supply_zone $30;
if _n_=1 then do;
declare hash pmu_list(dataset:"ncpdm.ncp_asset_pmu");
pmu_list.definekey('assetname','voltage');
pmu_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
set asset_re (RENAME=(pmu=assetname voltage=voltage_));
data_dttm=datetime();
voltage_=strip(voltage_);
voltage=cats('132/',voltage_);
mnemonic_tnbt=strip(mnemonic_tnbt);
assetname=mnemonic_tnbt;
rc=pmu_list.find();
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
end;
else do;
output ncpdm.ncp_load_re;
end;
keep mnemonic_tnbt excp_code re_state re_supply_zone
region zone state_code state business_area_code business_area
supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
pmu_name substation_name_tnbt functional_location voltage
re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage
period
data_dttm
active_flag
program
scod_date
kick_off_date
iom_date
geo_longitude
geo_latitude;
run;
从上面的代码中,我将那些无法映射/查找的内容设置为输出到excp表。然后,我使用相同的哈希对象代码,但将excp作为数据源再次查找同一表,代码如下。 (我将电压更改为33或11(与现有电压相反)。
/*2nd round lookup for failed record*/
data ncpdm.ncp_load_rev2 (drop=excp_code re_state re_supply_zone)
work.excp_ncp_load_re (drop=excp_code re_state re_supply_zone);
length excp_code $50 re_state re_supply_zone $30;
if _n_=1 then do;
declare hash pmu_list(dataset:"ncpdm.ncp_asset_pmu");
pmu_list.definekey('assetname','voltage');
pmu_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
set work.excp_ncp_load_re;
data_dttm=datetime();
if voltage='132/11' then voltage = '132/33';
else if voltage='132/33' then voltage='132/11';
mnemonic_tnbt=strip(mnemonic_tnbt);
assetname=mnemonic_tnbt;
re_state=state;
re_station=station;
re_ca_no=ca_no;
re_customer_name=applicant_name;
re_capacity=capacity;
re_commission_date=commission_date;
re_technology=technology;
geo_latitude=lat;
geo_longitude=lng;
rc=pmu_list.find();
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
end;
else do;
output ncpdm.ncp_load_rev2;
end;
keep mnemonic_tnbt excp_code re_state re_supply_zone
region zone state_code state business_area_code business_area
supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
pmu_name substation_name_tnbt functional_location voltage
re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage
period
data_dttm
active_flag
program
scod_date
kick_off_date
iom_date
geo_longitude
geo_latitude;
run;
问题是,对于那些在我的第一个哈希对象中没有匹配的电压的问题,我设法在第二个哈希对象代码中进行了查找,但仍然得到未映射的记录。一旦我追加了从第一个哈希对象和第二个哈希对象生成的表,我得到的记录仍然少于期望的结果。
我不知道如何应用更好的逻辑。不知何故,我使用第二个哈希对象进行查找的方法不是必需的,但我只是不知道哪种方法更好。
有更好的方法吗?
答案 0 :(得分:0)
您可以在一个步骤中执行两个查找。在第一个find()
失败后,将key2
的值更改为其备用值,然后执行另一个find()
。注意:如果未检测到匹配项,则应将查找卫星变量明确设置为“丢失”,以便输出数据集准确无误-如果未重置,则卫星查找变量将包含最近一次先前匹配的值。
示例代码:
此示例为主表和查找表生成数据。每个表都有一些卫星变量,这些变量将被携带到输出数据集中。还创建了一个状态变量来指示发生的匹配或不匹配的类型。
data have(keep=key1 key2 have:);
length key1 $6 key2 $2;
do _n_ = 1 to 5000;
key1 = repeat(byte(26*ranuni(123)+rank('A')),5);
key2 = ifc(ranuni(123) < 0.35, '11', '33');
if ranuni(123) < 0.02 then key2 = '22';
array have(3); * satellite (non-key) variables in have;
do i = 1 to 3;
have(i) = _n_ * 10000 + i;
end;
output;
end;
format have: 8.;
run;
data lookup(keep=key1 key2 look:);
length key1 $6 key2 $2;
array look(5); * satellite (non-key) variables in lookup;
do i = 1 to 26;
key1 = repeat(byte(i-1+rank('A')),5);
array key2s(2) $2 _temporary_ ('11','33');
do j = 1 to 2;
key2 = key2s(j);
do k = 1 to 5;
look(k) = i * 1000 + j * 100 + k;
end;
if ranuni(123) < 0.40 then output;
end;
end;
format look: 8.;
run;
data match no_match;
length key1 $6 key2 $2 match_status $30;
if _n_ = 1 then do;
if 0 then set lookup; * prep pdv at compilation time, set is never executed at runtime;
declare hash lookup(dataset:'lookup');
lookup.defineKey('key1', 'key2');
do i = 1 to 5; drop i;
lookup.defineData(cats('look',i));
end;
lookup.defineDone();
end;
set have;
rc = lookup.find();
if rc = 0 then do;
match_status = 'match on actual keys';
output match;
return; * to top of step;
end;
* first lookup failed, try the alternate key2;
if key2 = '11' then
key2='33';
else
if key2 = '33' then
key2 = '11';
else do;
call missing (of look:); * clear lookup values that find() loaded at last prior match;
match_status = 'no match, key2 invalid';
output no_match;
return; * to top of step;
end;
rc = lookup.find();
if rc = 0 then do;
match_status = 'match after key2 swap';
output match;
return; * to top of step;
end;
else do;
call missing (of look:); * clear lookup values that find() loaded at last prior match;
match_status = 'no match with 11 or 33';
output no_match;
return; * to top of step;
end;
run;