在文本中查找Unix时间戳并替换为sql

时间:2012-08-09 20:07:33

标签: oracle datetime pattern-matching unix-timestamp

我有大型文本字符串,其中包含10位数的unix日期戳。我正在尝试编写一个查询来搜索这些10位数的文本字符串,并用常规日期格式替换它们。我有转换语句:

TRUNC(DATE '1970-01-01' + [timestamp]/86400),在输入值时效果很好。

实施例:     从双重选择TRUNC(日期'1970-01-01'+ 1022089483/86400); = 22-may-02

但我很难找到合适的方法来寻找和替换。另外,我不能使用正则表达式。所以,这是我的理论SQL:

replace([column],'[sql to find 10-digit number]'
          ,TRUNC(DATE '1970-01-01' + [10-digit number]/86400))

以下是一些示例文本:

  1022089483等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等等   blah blah 1022095218 blah blah blah blah

1 个答案:

答案 0 :(得分:2)

只是为了好玩,我想我会努力尝试。一个功能是使用instr查找10位数字的开头,然后重复调用该数字并替换任何使用格式化日期找到的数字。完全不确定这是一种明智的方法,还是以任何有效的方式......

create or replace function epoch_offset(p_value in varchar2, p_start number)
return number is
    l_value varchar2(4000);
    offset number := 0;
    prev_offset number := 0;
    digit_count number := 0;
    epoch_start number := 0;
    pos number := p_start;
begin
    -- replace all digits with a single one, to make searching with instr
    -- simpler
    l_value := translate(p_value, '1234567890', '9999999999');

    while true loop
        -- find the next digit, starting as pos; first time through, pos
        -- will be the p_start we were given, then it tracks where we have
        -- got to
        offset := instr(l_value, '9', pos);

        if offset = 0 then
            -- we didn't find a digit, check if we already had a 10-digit
            -- number and have just reached the end
            if digit_count = 10 then -- and pos > length(p_value) then
                -- original value ends with a timestamp; so we have a 10-digit
                -- number
                exit;
            else
                -- no more digits, and last set we saw was short than 10; so
                -- l_value does not contain any 10-digit numbers (at least,
                -- after p_start)
                epoch_start := 0;
                exit;
            end if;
        end if;

        if prev_offset > 0 and offset != prev_offset + 1 then
            -- we've found a digit, but there's a gap since the last one
            if digit_count = 10 then
                -- the gap denotes the end of a 10-digit number, which is
                -- what we're looking for
                exit;
            end if;

            -- we've potentially started a new 10-digit number, so reset
            epoch_start := offset;
            digit_count := 0;
            prev_offset := 0;
        else
            -- we've found a sequential digit
            prev_offset := offset;
        end if;

        -- mark where we are
        if digit_count = 0 then
            -- start of a potential digit-sequence, make a note
            epoch_start := offset;
        end if;
        digit_count := digit_count + 1;
        pos := offset + 1;
    end loop;

    return epoch_start;
end epoch_offset;
/

create or replace function epoch_replace(p_value in varchar2,
    p_start in number default 1)
return varchar2 as
    l_pos number;
    l_time number;
    l_value varchar2(4000);
begin
    -- for this iteration, find the start of a 10-digit number, starting
    -- from p_start (1 on first iteration, by default)
    l_pos := epoch_offset(p_value, p_start);
    if l_pos > 0 then
        -- found a 10-digit number; call this recursively before modifying -
        -- this means we'll replace numbers with dates working from the end,
        -- so the positions don't need to be adjusted for the difference
        -- between the number and date lengths
        l_value := epoch_replace(p_value, l_pos + 10);
        -- get the 10-digit number...
        l_time := to_number(substr(l_value, l_pos, 10));
        -- ... and convert it to a date, with the rest of the original value
        -- around it
        return substr(l_value, 1, l_pos - 1)
            || to_char(trunc(DATE '1970-01-01' + l_time/86400), 'DD-mon-RR')
            || substr(l_value, l_pos + 10);
    else
        -- didn't find a 10-digit number, so return what we started with
        return p_value;
    end if;
end epoch_replace;
/

可能会有其他边缘情况绊倒,但用一些明显的情况来阻止它:

with tmp_tab as (
    select '1022089483 blah blah blah blah blah 1022094450 blah blah blah blah blah blah 1022095218 blah blah blah blah' as value from dual
    union all
    select 'blah 1022089483 blah 1022094450' from dual
    union all
    select 'blah 1022089483 98765 1022094450 1234' from dual
    union all
    select 'blah 1022089483 98765 1022094450 1022095218 1234 123456789 12345678901 123' from dual
)
select epoch_replace(value) from tmp_tab;

EPOCH_REPLACE(VALUE)
------------------------------------------------------------------------------------------------------------------------
22-may-02 blah blah blah blah blah 22-may-02 blah blah blah blah blah blah 22-may-02 blah blah blah blah
blah 22-may-02 blah 22-may-02
blah 22-may-02 98765 22-may-02 1234
blah 22-may-02 98765 22-may-02 22-may-02 1234 123456789 12345678901 123