通过postgresql循环获取活动行程的数量

时间:2017-07-05 12:02:50

标签: postgresql loops gtfs

我正在寻找一种方法,通过postgresql查询获取一天中每分钟的GTFS Feed中的活动行程数。

对于每次旅行,我都有一个非规范化表中的开始和结束时间(以秒为单位)。它看起来像这样:

denormalized_trips table

给我在给定时间范围内活动的旅行的查询(例如这里43000到43600秒)看起来像这样:

SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips

LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1

INNER JOIN gtfs_calendar AS calendar
  ON trips.service_id = calendar.service_id
  AND calendar.agency_key = trips.agency_key
  AND calendar.wednesday = 1

WHERE (
  trip_start_time BETWEEN 46800 AND 47100
  AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
  SELECT 0
  FROM gtfs_calendar_dates AS date_exceptions
  WHERE date = '2017-07-03'
  AND date_exceptions.agency_key = trips.agency_key
  AND date_exceptions.service_id = calendar.service_id
  AND exception_type = 2
);

这将导致12次旅行将在下午13:00至13:05之间开始。

现在我想要一整天都这样做。我想获得在例如一段时间内活动的旅行量。 1分钟或者更像是5分钟。 我尝试了一个循环,但这似乎只给我一个结果。 以下是我现在提出的建议:

CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS INTEGER AS
$BODY$
DECLARE 
 count INTEGER;
BEGIN 
  FOR counter IN 43130..50000 BY 60 LOOP
    SELECT
    COUNT(trips.trip_id)
    INTO count
    FROM denormalized_trips AS trips

    LEFT JOIN gtfs_calendar_dates AS calendar_dates
    ON calendar_dates.service_id = trips.service_id
    AND calendar_dates.agency_key = trips.agency_key
    AND date = '2017-07-03'
    AND exception_type = 1

    INNER JOIN gtfs_calendar AS calendar
      ON trips.service_id = calendar.service_id
      AND calendar.agency_key = trips.agency_key
      AND calendar.wednesday = 1

    WHERE (
      trip_start_time BETWEEN counter AND counter + 60
      AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
    )
    AND NOT EXISTS (
      SELECT 0
      FROM gtfs_calendar_dates AS date_exceptions
      WHERE date = '2017-07-03'
      AND date_exceptions.agency_key = trips.agency_key
      AND date_exceptions.service_id = calendar.service_id
      AND exception_type = 2
    );
  END LOOP;
  RETURN count;
END; 
$BODY$ LANGUAGE plpgsql STABLE;

调用SELECT get_active_trips(1);

的结果

sum of entries

现在我希望得到类似于表或结果数组的内容,而不仅仅是1个条目。我该怎么做?

非常感谢任何帮助。

2 个答案:

答案 0 :(得分:1)

有两种语法可以创建函数返回值集:returns setof <type>returns table(<columns definition>)Documentation

还有几种方法可以从plpgsql函数返回这些值:return nextreturn queryDocumentation

因此,如果你想获得一系列整数,你可以用这种方式重写你的函数:

CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS SETOF INTEGER AS
$BODY$
DECLARE 
 count INTEGER;
BEGIN 
  FOR counter IN 43130..50000 BY 60 LOOP
    SELECT
    COUNT(trips.trip_id)
    INTO count
    FROM denormalized_trips AS trips
    <rest of query here>
    ;
    RETURN NEXT count;
  END LOOP;
  RETURN;
END; 
$BODY$ LANGUAGE plpgsql STABLE;

或使用RETURN QUERY

CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS SETOF INTEGER AS
$BODY$
BEGIN 
  FOR counter IN 43130..50000 BY 60 LOOP
    RETURN QUERY
      SELECT
      COUNT(trips.trip_id)
      FROM denormalized_trips AS trips
      <rest of query here>
      ;
  END LOOP;
  RETURN;
END; 
$BODY$ LANGUAGE plpgsql STABLE;

如果您想要返回多于一列:

CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS TABLE (counter_value int, active_trips_count int) AS
$BODY$
BEGIN 
  FOR counter IN 43130..50000 BY 60 LOOP
    SELECT
    COUNT(trips.trip_id)
    INTO active_trips_count
    FROM denormalized_trips AS trips
    <rest of query here>
    ;
    counter_value := counter;
    RETURN NEXT; -- There is no parameters, current values of counter_value and active_trips_count will be returned
  END LOOP;
  RETURN;
END; 
$BODY$ LANGUAGE plpgsql STABLE;

或使用RETURN QUERY

CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS TABLE (counter_value int, active_trips_count int) AS
$BODY$
BEGIN 
  FOR counter IN 43130..50000 BY 60 LOOP
    RETURN QUERY
      SELECT
        counter,
        COUNT(trips.trip_id)
      FROM denormalized_trips AS trips
      <rest of query here>
      ;
  END LOOP;
  RETURN;
END; 
$BODY$ LANGUAGE plpgsql STABLE;

最后还有returns table的替代声明:

CREATE OR REPLACE FUNCTION get_active_trips(
  n int,
  out counter_value int, 
  out active_trips_count int)
RETURNS SETOF RECORD AS

<强>更新

但是(!)我填写说,可以使用单个查询简化您的任务,无需循环。

考虑以下查询(我使用了来自sqlfiddle的简化查询):

select
  counter,
  count(trips.trip_id)
from
  generate_series(43130, 50000, 60) as counter left join
    denormalized_trips as trips on (trip_start_time between counter and counter + 60) 
group by counter 
order by counter;

答案 1 :(得分:0)

创建类似下面的类型

drop type if exists get_active_trips_out;
create type get_active_trips_out as
(
   Count int
);

从函数返回数据时使用该表类型,如下所示

CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS setof get_active_trips_out AS
$BODY$
DECLARE 
 count INTEGER;
r get_active_trips_out%rowtype;
BEGIN 
 create temp table tmp_count
(
  Count int
)on commit drop;
  FOR counter IN 43130..50000 BY 60 LOOP
    Insert into tmp_count
    SELECT
    COUNT(trips.trip_id)
    FROM denormalized_trips AS trips

    LEFT JOIN gtfs_calendar_dates AS calendar_dates
    ON calendar_dates.service_id = trips.service_id
    AND calendar_dates.agency_key = trips.agency_key
    AND date = '2017-07-03'
    AND exception_type = 1

    INNER JOIN gtfs_calendar AS calendar
      ON trips.service_id = calendar.service_id
      AND calendar.agency_key = trips.agency_key
      AND calendar.wednesday = 1

    WHERE (
      trip_start_time BETWEEN counter AND counter + 60
      AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
    )
    AND NOT EXISTS (
      SELECT 0
      FROM gtfs_calendar_dates AS date_exceptions
      WHERE date = '2017-07-03'
      AND date_exceptions.agency_key = trips.agency_key
      AND date_exceptions.service_id = calendar.service_id
      AND exception_type = 2
    );
  END LOOP;
 for r in 
 Select * from tmp_count
 loop
 return next r
 end loop;
END;