我编写了一个函数,它接受一个输入字符串并将输入文本包装到作为参数传入的指定宽度。现在,使用4000个字符串运行此功能需要大约290毫秒1,000次。
有人可以建议改善此功能的方法吗?我将尝试在网上找到的一些想法here,但我想知道这里是否有任何建议。
这是我的功能:
CREATE OR REPLACE FUNCTION SPLIT_STRING (
p_str VARCHAR2, --String input to split
p_del VARCHAR2 DEFAULT NULL, --Delimiter (If NULL, will only split on length [p_len NOT NULL])
p_len INTEGER DEFAULT NULL, --Length of each chunk (If NULL, will only split on delimiter)
p_force INTEGER DEFAULT NULL)
RETURN array_type IS
--
v_str_array array_type;
v_line_array array_type;
v_len PLS_INTEGER; -- Total string length
v_str VARCHAR2(32767); -- String being built
v_chunk VARCHAR2(32767); -- Current chunk
v_char VARCHAR2(1 CHAR); -- Current character
v_cur_len PLS_INTEGER; -- Current chuk length
v_idx PLS_INTEGER DEFAULT 1; -- For loop counter
v_start PLS_INTEGER DEFAULT 1;
v_stop PLS_INTEGER;
v_line PLS_INTEGER DEFAULT 0;
v_word binary_integer;
BEGIN
--Determine total string length
v_len:= LENGTH(p_str);
-- Split into individual word chunks by delimiter
IF p_del IS NOT NULL
THEN
FOR pos IN 1..v_len
LOOP
v_char:= SUBSTR(p_str, pos, 1);
IF (v_char = p_del) OR (pos = v_len)
THEN
v_stop:= pos+1;
v_chunk:=SUBSTR(p_str, v_start, v_stop-v_start);
v_str_array(v_idx):= v_chunk;
v_start:=v_stop;
v_line:=v_idx;
v_idx:=v_idx+1;
END IF;
END LOOP;
END IF;
--Split the string into chunks and add to an array
IF p_del IS NOT NULL AND p_len IS NOT NULL
THEN
-- Wrap words into each line
v_idx:=1;
v_cur_len:=0;
v_line:=0;
v_word:=v_str_array.FIRST;
WHILE v_str_array(v_word) IS NOT NULL
LOOP
IF(v_cur_len+LENGTH(v_str_array(v_word)) <= p_len)
THEN
IF(v_word <= 1)
THEN
v_chunk:=v_str_array(v_word);
ELSE
v_chunk:=v_chunk||v_str_array(v_word);
END IF;
IF v_line=0
THEN
v_line:=1;
END IF;
ELSIF (p_force = 1)
THEN
-- Force line width to match specification
v_chunk:=SUBSTR(v_str_array(v_word),1,p_len);
IF v_str_array.NEXT(v_word) IS NOT NULL
THEN
v_str_array(v_word+1) := SUBSTR(v_str_array(v_word),p_len+1,LENGTH(v_str_array(v_word)))|| v_str_array(v_word+1);
ELSE
v_str_array(v_word+1) := SUBSTR(v_str_array(v_word),p_len+1,LENGTH(v_str_array(v_word)));
END IF;
v_line:=v_line+1;
ELSE
-- Prevent words from being split if they are larger than width specification
v_chunk:=v_str_array(v_word);
v_line:=v_line+1;
END IF;
v_line_array(v_line):= v_chunk;
v_cur_len:= LENGTH(v_chunk);
v_word:=v_str_array.NEXT(v_word);
IF v_word IS NULL
THEN
EXIT;
END IF;
END LOOP;
ELSIF p_del IS NULL AND p_len IS NOT NULL
THEN
-- Chop into exact length lines
v_line:= CEIL(v_len/p_len);
v_start:=1;
v_stop:= p_len;
FOR line IN 1..v_line
LOOP
v_line_array(line):= SUBSTR(p_str, v_start, p_len);
v_start:=v_stop+1;
v_stop:=v_stop+p_len;
END LOOP;
ELSIF p_len IS NULL AND p_del IS NOT NULL
THEN
-- Split into word chunks by deliminator if length is not specified
v_line_array:= v_str_array;
ELSE
-- Do nothing to the string if no delimiter OR length is specified
v_line_array(1):= p_str;
RETURN v_line_array;
END IF;
RETURN v_line_array;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Exception in function "SPLIT_STRING": '||SQLERRM);
v_line_array(1):= p_str;
RETURN v_line_array;
END SPLIT_STRING;
/
这就是我在模拟表加载环境中“测试”函数的方法,在该环境中,对表中的每个条目调用该函数。
DECLARE
type array_type is table of varchar2(32767) index by binary_integer;
v_str_array array_type;
P_STR VARCHAR2(32767);
P_DEL VARCHAR2(32767);
P_LEN NUMBER;
P_FORCE NUMBER;
V_ITTERATIONS NUMBER;
V_EXECUTIONTIME NUMBER;
V_TIMESTART NUMBER;
BEGIN
P_STR := 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed, nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia. Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget, imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque. Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi. Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo. Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis. Aenean posuere, tortor sed cursus feugiat, nunc augue blandit nunc, eu sollicitudin urna dolor sagittis lacus. Donec elit libero, sodales nec, volutpat a, suscipit non, turpis. Nullam sagittis. Suspendisse pulvinar, augue ac venenatis condimentum, sem libero volutpat nibh, nec pellentesque velit pede quis nunc. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce id purus. Ut varius tincidunt libero. Phasellus dolor. Maecenas vestibulum mollis diam. Pellentesque ut neque. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. In dui magna, posuere eget, vestibulum et, tempor auctor, justo. In ac felis quis tortor malesuada pretium. Pellentesque auctor neque nec urna. Proin sapien ipsum, porta a, auctor quis, euismod ut, mi. Aenean viverra rhoncus pede. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut non enim eleifend felis pretium feugiat. Vivamus quis mi. Phasellus a est. Phas';
P_DEL := ' ';
P_LEN := 60;
P_FORCE := 1;
V_ITTERATIONS := 10;
--Get Start Time
V_TIMESTART:= DBMS_UTILITY.Get_time();
--Run Testing
FOR stress_test_loop IN 1..V_ITTERATIONS
LOOP
v_str_array:= SPLIT_STRING ( P_STR, P_DEL, P_LEN, P_FORCE );
END LOOP;
--Calculate run time
SELECT (dbms_utility.get_time() - V_TIMESTART) INTO V_EXECUTIONTIME FROM DUAL;
--Display output
DBMS_OUTPUT.Put_Line('Completed sucessfully.'||CHR(10)||'Itterations: '||V_ITTERATIONS||CHR(10)||'Execution Time: '||V_EXECUTIONTIME||'ms');
DBMS_OUTPUT.Put_Line('Function output: '||CHR(10));
FOR line IN 1..v_str_array.LAST
LOOP
DBMS_OUTPUT.Put_Line(v_str_array(line));
END LOOP;
COMMIT;
END;
该函数只是包装这样的文本(使用force = 0):
1---------n
This is a
test
testing
123...
然后用force = 1:
这样1---------n
This is a
test testi
g 123...
到目前为止,该函数在大量的sequntial调用中执行得非常糟糕:
对于至少500个thuosand,我想在1秒内达到理想状态(虽然这可能是一厢情愿的想法)。
SOLUTION:
CREATE OR REPLACE FUNCTION SPLIT_STRINGV2 (
p_str VARCHAR2, --String input to split
p_len INTEGER, --Length of each chunk (If NULL, will only split on delimiter)
p_del VARCHAR2 DEFAULT ' ', --Delimiter (If NULL, will only split on length [p_len NOT NULL])
p_force INTEGER DEFAULT 0)
RETURN T_VAR_32K IS
--
v_line_array T_VAR_32K;
v_len PLS_INTEGER; -- Total string length
v_chunk VARCHAR2(32767); -- Current chunk
v_char VARCHAR2(1 CHAR); -- Current character
v_line PLS_INTEGER DEFAULT 1;
v_start PLS_INTEGER DEFAULT 1;
v_pos PLS_INTEGER DEFAULT 1;
v_prev PLS_INTEGER;
v_next PLS_INTEGER;
v_numb PLS_INTEGER;
BEGIN
--Determine total string length
v_len:= LENGTH(p_str);
WHILE (v_pos<=v_len)
LOOP
v_chunk:= SUBSTR(p_str, v_start, p_len);
IF p_force = 0 AND p_del IS NOT NULL
THEN
v_prev:=INSTR(SUBSTR(p_str, v_start, p_len), p_del, -1);
v_next:=INSTR(SUBSTR(p_str, v_prev+1, v_len), p_del, 1)+v_prev;
v_char:=SUBSTR(v_chunk, LENGTH(v_chunk)-1, 1);
IF v_char=p_del
THEN
v_chunk:=SUBSTR(v_chunk, 1, p_len);
ELSE
v_numb:=LENGTH(SUBSTR(v_chunk, 1, p_len)) - length(replace(SUBSTR(v_chunk, 1, p_len), p_del));
IF v_prev != 0 AND v_numb <= 1
THEN
v_chunk:=SUBSTR(p_str, v_start, v_next);
ELSIF v_prev !=0 AND v_numb > 1
THEN
v_chunk:=SUBSTR(p_str, v_start, v_prev);
ELSE
v_chunk:=SUBSTR(v_chunk, 1, p_len);
END IF;
END IF;
ELSE
v_chunk:=SUBSTR(v_chunk, 1, p_len);
END IF;
IF v_chunk IS NOT NULL
THEN
v_line_array(v_line):=v_chunk;
END IF;
v_pos:= v_pos+LENGTH(v_chunk);
v_line:= v_line+1;
v_start := v_pos+1;
END LOOP;
RETURN v_line_array;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Exception in function "SPLIT_STRING": '||SQLERRM);
v_line_array(1):= p_str;
RETURN v_line_array;
END SPLIT_STRINGV2;
/
最后,对于1,000,000个函数调用,我的执行速度从大约20分钟降低到10秒。这是99%的改进!
答案 0 :(得分:2)
见这里:https://community.oracle.com/thread/3700483 主要想法是:
SELECT rn, txt
FROM ( SELECT ROWNUM AS rn, TRIM (REGEXP_SUBSTR (:p_txt, '.{1,' || :v_linesize || '}( |$)', 1, ROWNUM)) AS txt
FROM DUAL
CONNECT BY ROWNUM <= CEIL (LENGTH (:p_txt) / :v_linesize) + 10)
WHERE txt IS NOT NULL
ORDER BY rn
这里的分隔符是&#39; &#39;