
时间:2017-09-28 22:44:16

标签: sql postgresql interpolation linear-interpolation

CDC增长图表数据集提供了我正在努力完成的一个很好的例子:  http://www.cdc.gov/growthcharts/html_charts/statage.htm



with tmp (chart_label, sex, age, tau, val) as (values 
('bmi for age','F',2,0.03,14.14735),
('bmi for age','F',2,0.05,14.39787),
('bmi for age','F',2,0.1,14.80134),
('bmi for age','F',2,0.25,15.52808),
('bmi for age','F',2,0.5,16.4234),
('bmi for age','F',2,0.75,17.42746),
('bmi for age','F',2,0.85,18.01821),
('bmi for age','F',2,0.9,18.44139),
('bmi for age','F',2,0.95,19.10624),
('bmi for age','F',2,0.97,19.56411),
('bmi for age','F',2.041667,0.03,14.13226),
('bmi for age','F',2.041667,0.05,14.38019),
('bmi for age','F',2.041667,0.1,14.77965),
('bmi for age','F',2.041667,0.25,15.49976),
('bmi for age','F',2.041667,0.5,16.38804),
('bmi for age','F',2.041667,0.75,17.38582),
('bmi for age','F',2.041667,0.85,17.97371),
('bmi for age','F',2.041667,0.9,18.39526),
('bmi for age','F',2.041667,0.95,19.05824),
('bmi for age','F',2.041667,0.97,19.51534))
select * from tmp;



select interp('bmi for age', 'F', 2.02, 15);


('bmi for age','F',2,0.1,14.80134),
('bmi for age','F',2,0.25,15.52808),


我唯一要做的就是this post,以及关于SO link 1link 2的其他类似问题

1 个答案:

答案 0 :(得分:0)




create or replace function cdcInterp(_valtype text, 
                                     _insex character(1), 
                                     _inage numeric, 
                                     _inval numeric)
-- _valtype should be one of either 'bmi for age', 'wt for age', or 'ht for age'
-- _insex should be one of either 'M' or 'F' 
returns numeric as 
-- make a lookup table
with lkup as (
  select * 
  from cdc_chart_value 
  where chart_label = _valtype
    and sex = _insex
  order by abs(age - _inage) asc, age, tau 
  -- order by ensures that I am using the closest age, 
  -- with ties defaulting to the younger age
  -- 10 is a magic number: it is the number of taus for each age 
  -- (0.03, 0.05, 0.10, 0.25, 0.50, 0.75, 0.85, 0.90, 0.95, 0.97)
  limit 10
-- find high and low values needed to do interpolation
  vals as (select 
            -- x1 is the lower value
            (SELECT lkup.val FROM lkup WHERE lkup.val <= _inval ORDER BY lkup.val DESC LIMIT 1) as x1,
            -- x2 is the upper value
            (SELECT lkup.val FROM lkup WHERE lkup.val >= _inval ORDER BY lkup.val ASC  LIMIT 1) as x2,
            -- y1 is the lower tau
            (SELECT lkup.tau FROM lkup WHERE lkup.val <= _inval ORDER BY lkup.val DESC LIMIT 1) as y1,
            -- y2 is the upper tau
            (SELECT lkup.tau FROM lkup WHERE lkup.val >= _inval ORDER BY lkup.val ASC  LIMIT 1) as y2
          from lkup)

-- interpolate, or not, as needed
     WHEN vals.x1 = vals.x2 THEN vals.y1 -- if equal, then return the exact tau
     when vals.x1 is null then vals.y2 -- if the lower value is null, then return the lowest tau (.03)
     when vals.x2 is null then vals.y1 -- if the upper value is null, then returr the highest tau (.97)
     ELSE                (vals.y1 + (_inval-vals.x1)/(vals.x2-vals.x1)*(vals.y2-vals.y1)) -- otherwise interpolate linearly
   END AS y
FROM vals
language sql stable;



create or replace function interp2(_valtype text, 
                                   _insex character(1), 
                                   _inage numeric, 
                                   _inval numeric)
returns numeric as 
  x1 numeric;
  x2 numeric;
  y1 numeric;
  y2 numeric;
  y numeric;
  -- the overhead of creating/dropping a temporary table is bad
  drop table if exists _tmp_lkup;
  create temp table _tmp_lkup   as 
    (select * 
      from cdc_chart_value 
      where chart_label = _valtype
        and sex = _insex
      order by abs(age - _inage) asc, age, tau 
      -- order by ensures that I am using the closest age, 
      -- with ties defaulting to the younger age
      -- 10 is a magic number: it is the number of taus for each age 
      -- (0.03, 0.05, 0.10, 0.25, 0.50, 0.75, 0.85, 0.90, 0.95, 0.97)
      limit 10
  x1 := (SELECT _tmp_lkup.val FROM _tmp_lkup WHERE _tmp_lkup.val <= _inval ORDER BY _tmp_lkup.val DESC LIMIT 1);
  x2 := (SELECT _tmp_lkup.val FROM _tmp_lkup WHERE _tmp_lkup.val >= _inval ORDER BY _tmp_lkup.val ASC  LIMIT 1);
  y1 := (SELECT _tmp_lkup.tau FROM _tmp_lkup WHERE _tmp_lkup.val <= _inval ORDER BY _tmp_lkup.val DESC LIMIT 1);
  y2 := (SELECT _tmp_lkup.tau FROM _tmp_lkup WHERE _tmp_lkup.val >= _inval ORDER BY _tmp_lkup.val ASC  LIMIT 1);

  -- interpolate, or not, as needed
  y := (select CASE
         WHEN x1 = x2 THEN y1 -- if equal, then return the exact tau
         when x1 is null then y2 -- if the lower value is null, then return the lowest tau (.05)
         when x2 is null then y1 -- if the upper value is null, then retunr the highest tau (.95)
         ELSE                (y1 + (_inval-x1)/(x2-x1)*(y2-y1)) -- otherwise interpolate linearly
  return y; 
$$ language plpgsql volatile;


另一种可能的解决方案可能是使用python / scipy扩展中的griddata插值。