/**
STEP 1: Get all the valid information and store in temp historic data ---- DONE
STEP 2: If any values are 0 or missing
STEP 3: Reduce the count of data-points from default(8)
STEP 4: Calculate Average of all the datapoints
STEP 5: Calculate Sigma
*/
DECLARE prev_year_date, prev_prev_year_date date;
DECLARE data_points, calculation_weeks, prev_year, prev_prev_year int;
DECLARE days_in_week int;
/* Since the number of days in a week will always be constant (7), using a static value */
SET days_in_week = 7;
/* Fetching all the constants from the configuration */
truncate table ro_temp_historic_data;
truncate table average_volume;
truncate table sigma;
truncate table range1;
truncate table range2;
truncate table expected_volume;
truncate table mean_result;
truncate table previous_week_volume;
truncate table current_week_volume;
truncate table final_paf_data;
truncate table paf_score;
truncate table market_ms;
truncate table change_in_ms;
INSERT INTO ro_temp_historic_data
SELECT r.ro_id, date_inserted, IF(fuel_type = 'ms', actual_volume_ms, actual_volume_hsd) as actual_volume, YEAR(date_inserted),user_id,start_date
FROM retail_outlets ro
left outer JOIN volume_trackers r
ON ro.ro_id = r.ro_id
WHERE (
(r.date_inserted >=(DATE_SUB((ro.historic_end_date), INTERVAL ((8 * 7) - 1) DAY))
AND r.date_inserted <= ro.historic_end_date)
OR
(
r.date_inserted >= DATE_SUB(DATE_SUB((ro.historic_end_date), INTERVAL 364 DAY), INTERVAL ((8 * 7) - 1) DAY)
AND r.date_inserted <= DATE_SUB((ro.historic_end_date), INTERVAL 364 DAY))
OR
(
r.date_inserted >= DATE_SUB(DATE_SUB((ro.historic_end_date), INTERVAL 728 DAY), INTERVAL ((8 * 7) - 1) DAY)
AND r.date_inserted <= DATE_SUB((ro.historic_end_date), INTERVAL 728 DAY)))
and ro.pricing_module=1 and if (fuel_type='ms',ro.ms,ro.hsd)=1;
insert into average_volume
select
a.ro_id,sum(a.actual_volume)/count(a.ro_id) age_volume,
(count(a.ro_id))datapoints,user_id,start_date,year
from ro_temp_historic_data a
where a.generated_by = user_id
and a.start_date = start_date
and a.actual_volume !=0
group by a.ro_id,a.year;
insert into sigma
select
a.ro_id,sqrt(sum(power((a.average_volume-d.actual_volume),2)/(a.datapoints-1))) sigma,a.average_volume,user_id,start_date,a.cur_year
from average_volume a,ro_temp_historic_data d
where a.ro_id=d.ro_id and
(a.start_date=start_date and
a.user_id=user_id and
a.cur_year=d.year) and
d.actual_volume !=0
group by a.ro_id,a.cur_year,a.average_volume;
END
我有这个程序需要3-4分钟才能运行。每个插入的输出用于以下插入查询,因此我必须保存它。它搜索大约500万个数据,最终结果大约为70 000行。
我已经为表编制索引,甚至创建了分区,但没有任何帮助
Ro_temp_historic_data:Contains index on ro_id and year
'ro_id', 'int(11)', 'YES', 'MUL', NULL, ''
'date_inserted', 'date', 'YES', '', NULL, ''
'actual_volume', 'float', 'YES', '', NULL, ''
'year', 'varchar(10)', 'YES', '', NULL, ''
'generated_by', 'int(11)', 'YES', '', NULL, ''
Average_volume contains indexes on ro_id
'ro_id', 'int(11)', 'YES', 'MUL', NULL, ''
'average_volume', 'float', 'YES', '', NULL, ''
'datapoints', 'int(11)', 'YES', '', NULL, ''
'user_id', 'int(11)', 'YES', '', NULL, ''
'start_date', 'date', 'YES', '', NULL, ''
'cur_year', 'int(11)', 'YES', '', NULL, ''
'start_date', 'date', 'YES', '', NULL, ''
Sigma contains index on ro_id
'ro_id', 'int(11)', 'YES', 'MUL', NULL, ''
'sigma', 'float', 'YES', '', NULL, ''
'average_volume', 'float', 'YES', '', NULL, ''
'user_id', 'int(11)', 'YES', '', NULL, ''
'start_date', 'date', 'YES', '', NULL, ''
'year', 'int(11)', 'YES', '', NULL, ''