因此,在Postgres中,我创建了一个函数,该函数创建一个表vehicletracks_tracks并填写从原始表中计算出的信息。原始表格包含以下属性:id bigint NOT NULL DEFAULT, session character varying(32), client_id character varying(32) NOT NULL, vehicle_type smallint NOT NULL, geolocation geography(Geometry,4326) NOT NULL and "timestamp" timestamp without time zone NOT NULL.
在函数中,我通过观察原始表中两个连续行之间的时间和速度来计算它们之间的轨迹和边界。创建曲目后,我会计算每个曲目的距离和持续时间。
该函数大约需要6分30秒,然后创建输出表并用行填充。原始表大约有500万行。当您查看该过程时,是否有可能减少其执行时间?
这是我的功能:
CREATE OR REPLACE FUNCTION vehicletracks()
RETURNS void AS
$BODY$
DECLARE
vrstica1 RECORD;
vrstica2 RECORD;
track_number INT;
BEGIN
SET TIME ZONE 'UTC';
DROP TABLE IF EXISTS vehicletracks_tracks;
CREATE TABLE vehicletracks_tracks (
session character varying(32),
client_id character varying(32) NOT NULL,
vehicle_type smallint NOT NULL,
track_number SERIAL PRIMARY KEY,
track_distance DOUBLE PRECISION,
track_duration INTERVAL,
track_start TIMESTAMP WITH TIME ZONE,
track_stop TIMESTAMP WITH TIME ZONE
);
DROP VIEW IF EXISTS group_by_client_session_vehicle;
CREATE VIEW group_by_client_session_vehicle AS
SELECT DISTINCT client_id, session, vehicle_type
FROM roaduserspositions
GROUP BY client_id, session, vehicle_type
ORDER BY session, client_id, vehicle_type;
FOR vrstica1 IN (SELECT * FROM group_by_client_session_vehicle)
LOOP
DROP VIEW IF EXISTS velocity_difference_view;
DROP TABLE IF EXISTS difference_table;
CREATE TABLE difference_table AS
SELECT id, session, client_id, vehicle_type, geolocation, timestamp, time_difference, distance_difference
FROM (SELECT t.*, t.timestamp - LAG(t.timestamp) OVER (ORDER BY t.session, t.client_id, t.vehicle_type, t.timestamp ASC) AS time_difference,
ST_Distance(geolocation, LAG(geolocation) OVER (ORDER BY t.session, t.client_id, t.vehicle_type, t.timestamp ASC)) AS distance_difference
FROM (SELECT *
FROM timonwww_roaduserspositions
WHERE client_id = vrstica1.client_id AND session = vrstica1.session AND vehicle_type = vrstica1.vehicle_type) AS t) AS tab
ORDER BY session, client_id, vehicle_type, timestamp;
CREATE VIEW velocity_difference_view AS (
SELECT *, distance_difference / EXTRACT(EPOCH FROM time_difference) AS velocity
FROM difference_table
WHERE EXTRACT(EPOCH FROM time_difference) > 0
UNION
SELECT *, 0 AS velocity
FROM difference_table
WHERE EXTRACT(EPOCH FROM time_difference) = 0
ORDER BY session, client_id, vehicle_type, timestamp);
DROP TABLE IF EXISTS new_time_difference;
CREATE TABLE new_time_difference (
track_id INTEGER,
LIKE velocity_difference_view INCLUDING ALL);
track_number := 1;
INSERT INTO new_time_difference(track_id, id, session, client_id, vehicle_type, geolocation, timestamp, time_difference)
SELECT
CASE
WHEN EXTRACT(EPOCH FROM time_difference) < 300 AND velocity < 121 THEN track_number
ELSE track_number + 1
END AS track_id, id, session, client_id, vehicle_type, geolocation, timestamp, time_difference
FROM velocity_difference_view OFFSET 1;
track_number := 1;
FOR vrstica2 IN (SELECT * FROM velocity_difference_view OFFSET 1)
LOOP
IF EXTRACT(EPOCH FROM vrstica2.time_difference) < 300 AND vrstica2.velocity < 121 THEN
INSERT INTO new_time_difference VALUES(track_number, vrstica2.id, vrstica2.session, vrstica2.client_id, vrstica2.vehicle_type, vrstica2.geolocation, vrstica2.timestamp, vrstica2.time_difference);
ELSE
track_number := track_number + 1;
END IF;
END LOOP;
DROP TABLE IF EXISTS geolocation_difference_table;
CREATE TABLE geolocation_difference_table AS
SELECT *
FROM (SELECT *, ST_Distance(geolocation, LAG(geolocation) over (ORDER BY timestamp ASC)) as geolocation_difference
FROM new_time_difference
WHERE track_id IN (SELECT * FROM generate_series((SELECT MIN(new_time_difference.track_id) FROM new_time_difference), (SELECT MAX(track_id) FROM new_time_difference)) num)) AS tab;
DROP TABLE IF EXISTS track_distance_table;
CREATE TABLE track_distance_table AS
SELECT track_id, SUM(geolocation_difference) AS track_distance, SUM(time_difference) AS track_duration, MIN(timestamp) AS track_start, MAX(timestamp) AS track_stop
FROM geolocation_difference_table
GROUP BY track_id
ORDER BY track_id ASC;
INSERT INTO vehicletracks_tracks(session, client_id, vehicle_type, track_distance, track_duration, track_start, track_stop)
SELECT vrstica1.session, vrstica1.client_id, vrstica1.vehicle_type, track_distance, track_duration, track_start, track_stop
FROM (SELECT * FROM track_distance_table WHERE track_distance > 0 AND track_distance IS NOT NULL) AS t;
END LOOP;
DROP TABLE IF EXISTS vehicletracks_binding;
CREATE TABLE vehicletracks_binding AS
SELECT id, track_id
FROM geolocation_difference_table
ORDER BY track_id ASC;
DROP VIEW IF EXISTS group_by_client_session_vehicle;
DROP VIEW IF EXISTS velocity_difference_view;
DROP TABLE IF EXISTS difference_table;
DROP TABLE IF EXISTS new_time_difference;
DROP TABLE IF EXISTS geolocation_difference_table;
DROP TABLE IF EXISTS track_distance_table;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;