JavaScript 中的雪花 UDF 未按预期计算

时间:2021-04-26 11:44:05

标签: javascript user-defined-functions snowflake-cloud-data-platform

我正在尝试计算已记录作业已运行的分钟数。 每个作业都有开始时间和结束时间。

在这种特殊情况下,工作时间在 01:00 到 10:00 之间,并且只有工作日(周末除外)

为了计算这个,我尝试制作了一个基于 JavaScript 的 UDF,如下所示:

CREATE OR REPLACE FUNCTION JobRuns(f datetime, t datetime)
RETURNS DOUBLE
LANGUAGE JAVASCRIPT
AS
$$
    // Based on the Calculation of Business Hours in JavaScript
    // https://www.c-sharpcorner.com/UploadFile/36985e/calculating-business-hours-in-javascript/
    
    function workingMinutesBetweenDates(startDate, endDate) {    
        // Store minutes worked    
        var minutesWorked = 0;    
    
        // Validate input    
        if (endDate < startDate) {    
            return 0;    
        }    
        
        // Loop from your Start to End dates (by hour)    
        var current = startDate;    
        
        // Define work range    
        var workHoursStart = 1;    
        var workHoursEnd = 10;    
        var includeWeekends = false;    
        
        // Loop while currentDate is less than end Date (by minutes)    
        while (current <= endDate) {    
            // Is the current time within a work day (and if it occurs on a weekend or not)     
            if (current.getHours() >= workHoursStart && current.getHours() <= workHoursEnd && (includeWeekends ? current.getDay() !== 0 && current.getDay() !== 6 : true)) {    
                minutesWorked++;    
            }    
        
            // Increment current time    
            current.setTime(current.getTime() + 1000 * 60);    
        }    
        
        // Return the number of minutes
        return minutesWorked;    
    }   
    return workingMinutesBetweenDates(F,T);
$$
;

但我得到的结果在某些情况下与我的预期相差甚远。

enter image description here

JS逻辑从这里抓取; https://www.c-sharpcorner.com/UploadFile/36985e/calculating-business-hours-in-javascript/ 并且当我查看代码时,我看不到任何可能导致这些差异的缺陷。

我正在使用这些测试数据

CREATE OR REPLACE TABLE "SLA_Test" (
    "DocumentID" VARCHAR(16777216),
    "From" TIMESTAMP_NTZ(9),
    "To" TIMESTAMP_NTZ(9),
    "ExpectedTime" INT
  );
  
  INSERT INTO "SLA_Test"
  VALUES
    ('ACD7EFC1-8D17-46E3-84DB-C08067466866','2021-03-03 07:12:34.567','2021-03-03 08:12:34.567',60),
    ('C41FB599-D1EC-4461-BBAF-1AFF67D2F3C2','2021-03-03 09:55:00.000','2021-03-04 01:05:00.000',10),
    ('B741C663-732B-4FD3-839D-E70330C58990','2021-03-03 09:55:00.000','2021-03-04 00:05:00.000',5),
    ('C5893C51-F5CE-40E4-85F7-775515BC3E3D','2021-03-03 19:55:00.000','2021-03-04 01:05:00.000',5),
    ('BAF4ED57-8184-4CDF-8875-DFDA6EAC2033','2021-03-03 09:55:00.000','2021-03-05 01:05:00.000',550),
    ('F325059E-E78F-4DCE-B675-CC1C59669B3C','2021-03-05 09:55:00.000','2021-03-08 01:05:00.000',10),
    ('F325059E-E78F-4DCE-B675-CC1C59669B3C','2021-03-05 09:55:00.000','2021-03-07 01:05:00.000',5);

SELECT "DocumentID","From","To",
    DATEDIFF(second, "From", "To") AS "TotalElapsedTimeSecond",
    DATEDIFF(second, "From", "To")/60 AS "TotalElapsedTimeMinut",
    "ExpectedTime",
    JobRuns("From","To") AS "ElapsedTimeMinut"
FROM "SLA_Test";

UDF 没有返回预期时间的任何想法?

4 个答案:

答案 0 :(得分:2)

如果您创建工作时间表,则可以运行以下查询:

select
  t.id
  , sum(datediff(‘second’, 
               -- calculate the max of the two start time
               (case when t.start <= 
                          w.working_day_start_timestamp
                     then w.working_day_start_timestamp
                     else t.start
                end),
               -- calculate the min of the two end times
               (case when t.end >= 
                          w.working_day_end_timestamp
                     then w.working_day_end_timestamp
                     else t.end 
                end)
               )) / 3600 -- convert to hourly
  as working_hour_diff
from 
  working_days_times w,
  cross join time_intervals t
where -- select all intersecting intervals
  (
   t.start <= w.working_day_end_timestamp
   and
   t.end >= w.working_day_start_timestamp
  )
and -- select only working days
  w.is_working_day
group by
  t.id

本文还详细介绍了如何将其实现为 Javascript UDF:https://medium.com/dandy-engineering-blog/how-to-calculate-the-number-of-working-hours-between-two-timestamps-in-sql-b5696de66e51

答案 1 :(得分:1)

您是否在 Snowflake 之外进行了测试?我刚刚创建了以下文件并运行 node /tmp/dates.js 会产生与 Snowflake 匹配的输出

// Col1: function return, Col2: Expected
61 60
71 10
65 5
6 5
671 550
1271 10
671 5
function workingMinutesBetweenDates(startDate, endDate) {    
    // Store minutes worked    
    var minutesWorked = 0;    

    // Validate input    
    if (endDate < startDate) {    
        return 0;    
    }    
    
    // Loop from your Start to End dates (by hour)    
    var current = startDate;    
    
    // Define work range    
    var workHoursStart = 1;    
    var workHoursEnd = 10;    
    var includeWeekends = false;    
    
    // Loop while currentDate is less than end Date (by minutes)    
    while (current <= endDate) {    
        // Is the current time within a work day (and if it occurs on a weekend or not)     
        if (current.getHours() >= workHoursStart && current.getHours() <= workHoursEnd && (includeWeekends ? current.getDay() !== 0 && current.getDay() !== 6 : true)) {    
            minutesWorked++;    
        }    
    
        // Increment current time    
        current.setTime(current.getTime() + 1000 * 60);    
    }    
    
    // Return the number of minutes
    return minutesWorked;    
} 


console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-03 07:12:34.567'))), (new Date(Date.parse('2021-03-03 08:12:34.567')))), 60);
console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-03 09:55:00.000'))), (new Date(Date.parse('2021-03-04 01:05:00.000')))), 10);
console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-03 09:55:00.000'))), (new Date(Date.parse('2021-03-04 00:05:00.000')))), 5);
console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-03 19:55:00.000'))), (new Date(Date.parse('2021-03-04 01:05:00.000')))), 5);
console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-03 09:55:00.000'))), (new Date(Date.parse('2021-03-05 01:05:00.000')))), 550);
console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-05 09:55:00.000'))), (new Date(Date.parse('2021-03-08 01:05:00.000')))), 10);
console.log(workingMinutesBetweenDates((new Date(Date.parse('2021-03-05 09:55:00.000'))), (new Date(Date.parse('2021-03-07 01:05:00.000')))), 5);

答案 2 :(得分:1)

我发现代码至少有 2 个问题,我认为:

  1. 它总是会多计数至少 1。在 WHILE 语句的第一轮循环中,minutesWorked 递增,但此时没有实际工作 - 第一分钟直到 StartDate + 1 分钟才工作
  2. 您的工作日在 10 点结束,但您的逻辑包括小时部分 <= 10 的任何时间,因此它将增加 minutesWorked 到 10:59:59。我认为逻辑应该小于或等于: ... && current.getHours() < workHoursEnd

答案 3 :(得分:1)

这一切都可以在 SQL 中完成,

with SLA_Test(DocumentID, FromTime, ToTime, ExpectedTime) AS (
  SELECT column1, column2::timestamp_ntz, column3::timestamp_ntz, column4 
  FROM
  VALUES
    ('ACD7EFC1-8D17-46E3-84DB-C08067466866','2021-03-03 07:12:34.567','2021-03-03 08:12:34.567',60),
    ('C41FB599-D1EC-4461-BBAF-1AFF67D2F3C2','2021-03-03 09:55:00.000','2021-03-04 01:05:00.000',10),
    ('B741C663-732B-4FD3-839D-E70330C58990','2021-03-03 09:55:00.000','2021-03-04 00:05:00.000',5),
    ('C5893C51-F5CE-40E4-85F7-775515BC3E3D','2021-03-03 19:55:00.000','2021-03-04 01:05:00.000',5),
    ('BAF4ED57-8184-4CDF-8875-DFDA6EAC2033','2021-03-03 09:55:00.000','2021-03-05 01:05:00.000',550),
    ('F325059E-E78F-4DCE-B675-CC1C59669B3C','2021-03-05 09:55:00.000','2021-03-08 01:05:00.000',10),
    ('F325059E-E78F-4DCE-B675-CC1C59669B3C','2021-03-05 09:55:00.000','2021-03-07 01:05:00.000',5)
), days as (
    SELECT row_number() over(order by seq8())-1 as num
    FROM table(GENERATOR(rowcount=>30))
), enriched as (
    SELECT *,
        datediff('day', s.fromtime, s.totime) as tot_days
  from SLA_Test AS s
), day_sliced AS (
  select s.*
      ,d.*
      ,date_trunc('day',fromtime) f_s
      ,dateadd('day', d.num, f_s) as clip_day
      ,dateadd('hour', 1, clip_day) as clip_start
      ,dateadd('hour', 10, clip_day) as clip_end
      ,dayofweekiso(clip_day) as dowi
      ,dowi >=1 AND dowi <= 5 as work_day
      ,least(greatest(s.fromtime, clip_start),clip_end) as slice_start
      ,greatest(least(s.totime, clip_end), clip_start) as slice_end
      ,DATEDIFF('second', slice_start, slice_end) as slice_sec
      ,DATEDIFF('minute', slice_start, slice_end) as slice_min
  from enriched AS s
  join days AS d on d.num <= s.tot_days
  qualify work_day = true
)
SELECT 
    DocumentID
    ,FromTime
    ,ToTime
    ,ExpectedTime
    ,round(sum(slice_sec)/60,0) as elasped_time_minutes
FROM day_sliced
GROUP BY 1,2,3,4
ORDER BY 1,2;

它给出了预期的结果:

DOCUMENTID                              FROMTIME                TOTIME     EXPECTEDTIME ELASPED_TIME_MINUTES
ACD7EFC1-8D17-46E3-84DB-C08067466866    2021-03-03 07:12:34.567 2021-03-03 08:12:34.567 60  60
B741C663-732B-4FD3-839D-E70330C58990    2021-03-03 09:55:00.000 2021-03-04 00:05:00.000 5   5
BAF4ED57-8184-4CDF-8875-DFDA6EAC2033    2021-03-03 09:55:00.000 2021-03-05 01:05:00.000 550 550
C41FB599-D1EC-4461-BBAF-1AFF67D2F3C2    2021-03-03 09:55:00.000 2021-03-04 01:05:00.000 10  10
C5893C51-F5CE-40E4-85F7-775515BC3E3D    2021-03-03 19:55:00.000 2021-03-04 01:05:00.000 5   5
F325059E-E78F-4DCE-B675-CC1C59669B3C    2021-03-05 09:55:00.000 2021-03-07 01:05:00.000 5   5
F325059E-E78F-4DCE-B675-CC1C59669B3C    2021-03-05 09:55:00.000 2021-03-08 01:05:00.000 10  10
相关问题