我不知道如何正确执行查找(SAS)

时间:2018-12-11 20:33:02

标签: sas

在Base SAS中,我有一个带有哈希对象的脚本来进行表查找。条件如下。 表A是原始的主表,它将使用表B进行查找。 查找关键字是AssetName和Voltage。 电压始终为33或11。

因此,现在我们可以想象它是通过使用AssetName和Voltage从表B获取一些数据来进行表B的查找。

让我们看看我拥有的示例代码。

data ncpdm.ncp_load_re (drop=excp_code re_state re_supply_zone)
     work.excp_ncp_load_re;
  length excp_code $50 re_state re_supply_zone $30;

  if _n_=1 then do;
     declare hash pmu_list(dataset:"ncpdm.ncp_asset_pmu");
     pmu_list.definekey('assetname','voltage');
     pmu_list.definedata('region','zone','state_code','state',
                         'business_area_code','business_area',
                         'supply_zone_code','supply_zone',
                         'sub_supply_zone_code','sub_supply_zone',
                         'pmu_name','substation_name_tnbt','functional_location');
     pmu_list.definedone();
     call missing(region,zone,state_code,state,
                  business_area_code,business_area,
                  supply_zone_code,supply_zone,
                  sub_supply_zone_code,sub_supply_zone,
                  pmu_name,substation_name_tnbt,functional_location);
  end;
  set asset_re (RENAME=(pmu=assetname voltage=voltage_));
  data_dttm=datetime();

  voltage_=strip(voltage_);
voltage=cats('132/',voltage_);
mnemonic_tnbt=strip(mnemonic_tnbt);
 assetname=mnemonic_tnbt;


  rc=pmu_list.find();
  if (rc^=0) then do;
     excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
     output work.excp_ncp_load_re;
  end;
  else do;
  output ncpdm.ncp_load_re;
  end;

  keep mnemonic_tnbt excp_code re_state re_supply_zone
       region zone state_code state business_area_code business_area
       supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
       pmu_name substation_name_tnbt functional_location voltage
       re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage

period
 data_dttm
 active_flag                    
program             
scod_date               
kick_off_date       
iom_date
geo_longitude
geo_latitude;
run;

从上面的代码中,我将那些无法映射/查找的内容设置为输出到excp表。然后,我使用相同的哈希对象代码,但将excp作为数据源再次查找同一表,代码如下。 (我将电压更改为33或11(与现有电压相反)。

/*2nd round lookup for failed record*/
data ncpdm.ncp_load_rev2 (drop=excp_code re_state re_supply_zone)
     work.excp_ncp_load_re (drop=excp_code re_state re_supply_zone);
  length excp_code $50 re_state re_supply_zone $30;

  if _n_=1 then do;
     declare hash pmu_list(dataset:"ncpdm.ncp_asset_pmu");
     pmu_list.definekey('assetname','voltage');
     pmu_list.definedata('region','zone','state_code','state',
                         'business_area_code','business_area',
                         'supply_zone_code','supply_zone',
                         'sub_supply_zone_code','sub_supply_zone',
                         'pmu_name','substation_name_tnbt','functional_location');
     pmu_list.definedone();
     call missing(region,zone,state_code,state,
                  business_area_code,business_area,
                  supply_zone_code,supply_zone,
                  sub_supply_zone_code,sub_supply_zone,
                  pmu_name,substation_name_tnbt,functional_location);
  end;
  set work.excp_ncp_load_re;
  data_dttm=datetime();

  if voltage='132/11' then voltage = '132/33';
  else if voltage='132/33' then voltage='132/11';

mnemonic_tnbt=strip(mnemonic_tnbt);
  assetname=mnemonic_tnbt;
re_state=state;
re_station=station;
re_ca_no=ca_no;
re_customer_name=applicant_name;
re_capacity=capacity;
re_commission_date=commission_date;
re_technology=technology;

geo_latitude=lat;
geo_longitude=lng;

  rc=pmu_list.find();
  if (rc^=0) then do;
     excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
     output work.excp_ncp_load_re;
  end;
  else do;
  output ncpdm.ncp_load_rev2;
  end;

  keep mnemonic_tnbt excp_code re_state re_supply_zone
       region zone state_code state business_area_code business_area
       supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
       pmu_name substation_name_tnbt functional_location voltage
       re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage

period
 data_dttm
 active_flag                    
program             
scod_date               
kick_off_date       
iom_date
geo_longitude
geo_latitude;
run;

问题是,对于那些在我的第一个哈希对象中没有匹配的电压的问题,我设法在第二个哈希对象代码中进行了查找,但仍然得到未映射的记录。一旦我追加了从第一个哈希对象和第二个哈希对象生成的表,我得到的记录仍然少于期望的结果。

我不知道如何应用更好的逻辑。不知何故,我使用第二个哈希对象进行查找的方法不是必需的,但我只是不知道哪种方法更好。

有更好的方法吗?

1 个答案:

答案 0 :(得分:0)

您可以在一个步骤中执行两个查找。在第一个find()失败后,将key2的值更改为其备用值,然后执行另一个find()。注意:如果未检测到匹配项,则应将查找卫星变量明确设置为“丢失”,以便输出数据集准确无误-如果未重置,则卫星查找变量将包含最近一次先前匹配的值。

示例代码:

此示例为主表和查找表生成数据。每个表都有一些卫星变量,这些变量将被携带到输出数据集中。还创建了一个状态变量来指示发生的匹配或不匹配的类型。

data have(keep=key1 key2 have:);
  length key1 $6 key2 $2;
  do _n_ = 1 to 5000;
    key1 = repeat(byte(26*ranuni(123)+rank('A')),5);
    key2 = ifc(ranuni(123) < 0.35, '11', '33');
    if ranuni(123) < 0.02 then key2 = '22';
    array have(3); * satellite (non-key) variables in have;
    do i = 1 to 3;
      have(i) = _n_ * 10000 + i;
    end;
    output;
  end;
  format have: 8.;
run;

data lookup(keep=key1 key2 look:);
  length key1 $6 key2 $2;
  array look(5); * satellite (non-key) variables in lookup;
  do i = 1 to 26;
    key1 = repeat(byte(i-1+rank('A')),5);
    array key2s(2) $2 _temporary_ ('11','33');
    do j = 1 to 2;      
      key2 = key2s(j);
      do k = 1 to 5;
        look(k) = i * 1000 + j * 100 + k;
      end;
      if ranuni(123) < 0.40 then output;
    end;
  end;
  format look: 8.;
run;

data match no_match;

  length key1 $6 key2 $2 match_status $30;

  if _n_ = 1 then do;

    if 0 then set lookup; * prep pdv at compilation time, set is never executed at runtime;

    declare hash lookup(dataset:'lookup');
    lookup.defineKey('key1', 'key2');
    do i = 1 to 5; drop i;
      lookup.defineData(cats('look',i));
    end;
    lookup.defineDone();
  end;

  set have;

  rc = lookup.find();

  if rc = 0 then do;
    match_status = 'match on actual keys';
    output match;
    return; * to top of step;
  end;

  * first lookup failed, try the alternate key2;

  if key2 = '11' then 
    key2='33';
  else
  if key2 = '33' then
    key2 = '11';
  else do;
    call missing (of look:); * clear lookup values that find() loaded at last prior match;
    match_status = 'no match, key2 invalid';
    output no_match;
    return; * to top of step;
  end;

  rc = lookup.find();

  if rc = 0 then do;
    match_status = 'match after key2 swap';
    output match;
    return; * to top of step;
  end;
  else do;
    call missing (of look:); * clear lookup values that find() loaded at last prior match;
    match_status = 'no match with 11 or 33';
    output no_match;
    return; * to top of step;
  end;
run;