在oracle中上传CSV时,消除从输入行返回的回车

时间:2016-03-21 13:38:07

标签: oracle csv plsql

有没有办法从csv上传的行中删除回车。

我的示例csv中的单元格如下:

"Administrator,, admin (admin): Mon Jan 25 09:48:22 GMT 2016
please fix this issue ASAP
---
Zaeem,, Muhammad (mzaeem): Mon Jan 25 14:22:05 PKT 2016
What you said has been added
---
."

当我上传此csv时,此单元格的行会生成新记录。我不希望这种情况发生。我正在使用UTL_FILE上传数据,并为此开发了一个存储过程。

存储过程是:

CREATE OR REPLACE PROCEDURE Import_DATA_File (P_FILENAME   IN     VARCHAR2,
                                                P_QUERY_REF IN VARCHAR2,
                                              o_msg           OUT VARCHAR2)
IS
   Infile                UTL_FILE.File_Type;
   Linebuf               VARCHAR2 (4000);
   V_Getstring           VARCHAR2 (100);

   -- Field Values Array
   TYPE Fieldvalue IS TABLE OF VARCHAR2 (100)
                         INDEX BY BINARY_INTEGER;

   Field_Position        Fieldvalue;

   Total_Rec_Count       NUMBER := 0;
   Total_Rec_Processed   NUMBER := 0;
BEGIN
   Infile := UTL_FILE.Fopen ('FILE_UPLOAD_DIR', P_FILENAME, 'R');

   LOOP
      ---
      UTL_FILE.Get_Line (Infile,Linebuf);

      Linebuf := Linebuf || '","';

      FOR I IN 1 .. 51
      LOOP
         Field_Position (I) :=
            Plsql_Delimited_String.Getstring (Linebuf,
                                              I,
                                              FALSE,
                                              '","');
      END LOOP;

      BEGIN
         Total_Rec_Count := Total_Rec_Count + 1;

         -- table insertion 
         INSERT INTO DATA_Q127_CT (query_ref,
                                   col_v1,
                                   col_v2,
                                   col_v3,
                                   col_v4,
                                   col_v5,
                                   col_v6,
                                   col_v7,
                                   col_v8,
                                   col_v9,
                                   col_v10,
                                   col_v11,
                                   col_v12,
                                   col_v13,
                                   col_v14,
                                   col_v15,
                                   col_v16,
                                   col_v17,
                                   col_v18,
                                   col_v19,
                                   col_v20,
                                   col_v21,
                                   col_v22,
                                   col_v23,
                                   col_v24,
                                   col_v25,
                                   col_v26,
                                   col_v27,
                                   col_v28,
                                   col_v29,
                                   col_v30,
                                   col_v31,
                                   col_v32,
                                   col_v33,
                                   col_v34,
                                   col_v35,
                                   col_v36,
                                   col_v37,
                                   col_v38,
                                   col_v39,
                                   col_v40,
                                   col_v41,
                                   col_v42,
                                   col_v43,
                                   col_v44,
                                   col_v45,
                                   col_v46,
                                   col_v47,
                                   col_v48,
                                   col_v49,
                                   col_v50)
              VALUES (
              P_QUERY_REF,
              REPLACE (field_position (1),'"',''),
                      REPLACE (field_position (2),'"',''),
                      REPLACE (field_position (3),'"',''),
                      REPLACE (field_position (4),'"',''),
                      REPLACE (field_position (5),'"',''),
                      REPLACE (field_position (6),'"',''),
                      REPLACE (field_position (7),'"',''),
                      REPLACE (field_position (8),'"',''),
                      REPLACE (field_position (9),'"',''),
                      REPLACE (field_position (10),'"',''),
                      REPLACE (field_position (11),'"',''),
                      REPLACE (field_position (12),'"',''),
                      REPLACE (field_position (13),'"',''),
                      REPLACE (field_position (14),'"',''),
                      REPLACE (field_position (15),'"',''),
                      REPLACE (field_position (16),'"',''),
                      REPLACE (field_position (17),'"',''),
                      REPLACE (field_position (18),'"',''),
                      REPLACE (field_position (19),'"',''),
                      REPLACE (field_position (20),'"',''),
                      REPLACE (field_position (21),'"',''),
                      REPLACE (field_position (22),'"',''),
                      REPLACE (field_position (23),'"',''),
                      REPLACE (field_position (24),'"',''),
                      REPLACE (field_position (25),'"',''),
                      REPLACE (field_position (26),'"',''),
                      REPLACE (field_position (27),'"',''),
                      REPLACE (field_position (28),'"',''),
                      REPLACE (field_position (29),'"',''),
                      REPLACE (field_position (30),'"',''),
                      REPLACE (field_position (31),'"',''),
                      REPLACE (field_position (32),'"',''),
                      REPLACE (field_position (33),'"',''),
                      REPLACE (field_position (34),'"',''),
                      REPLACE (field_position (35),'"',''),
                      REPLACE (field_position (36),'"',''),
                      REPLACE (field_position (37),'"',''),
                      REPLACE (field_position (38),'"',''),
                      REPLACE (field_position (39),'"',''),
                      REPLACE (field_position (40),'"',''),
                      REPLACE (field_position (41),'"',''),
                      REPLACE (field_position (42),'"',''),
                      REPLACE (field_position (43),'"',''),
                      REPLACE (field_position (44),'"',''),
                      REPLACE (field_position (45),'"',''),
                      REPLACE (field_position (46),'"',''),
                      REPLACE (field_position (47),'"',''),
                      REPLACE (field_position (48),'"',''),
                      REPLACE (field_position (49),'"',''),
                      REPLACE (field_position (50),'"','')
                      );


         Total_Rec_Processed := Total_Rec_Processed + 1;
      EXCEPTION
         WHEN OTHERS
         THEN
            -- ignoring error during database insertion
            NULL;
      END;
   END LOOP;

   IF UTL_FILE.is_open (infile)
   THEN
      UTL_FILE.Fclose (Infile);
   END IF;
EXCEPTION
   WHEN NO_DATA_FOUND
   THEN
      IF UTL_FILE.is_open (infile)
      THEN
         UTL_FILE.Fclose (Infile);
      END IF;

      IF total_rec_processed > 0
      THEN
         COMMIT;

         DELETE FROM DATA_Q127_CT
               WHERE QUERY_REF = 'QUERY_REF'
               OR COL_V50 = 'COL_V50';
               COMMIT;
      END IF;
   WHEN OTHERS
   THEN
      IF UTL_FILE.is_open (infile)
      THEN
         UTL_FILE.Fclose (Infile);
      END IF;

      o_msg := SQLERRM;
END;

函数Plsql_Delimited_String.Getstring的DDL是:

   FUNCTION GetString (Source_string    IN VARCHAR2,
                       Field_position   IN NUMBER,
                       UnTerminated     IN BOOLEAN DEFAULT FALSE,
                       Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN VARCHAR2
   IS
      iPtrEnd           PLS_INTEGER := 0;
      iPtrStart         PLS_INTEGER := 0;
      vcSourceStrCopy   VARCHAR2 (32000) := Source_string;
   BEGIN
      IF UnTerminated
      THEN
         vcSourceStrCopy := vcSourceStrCopy || Delimiter;
      END IF;

      IF Field_Position > 1
      THEN
         iPtrStart :=
            INSTR (vcSourceStrCopy,
                   Delimiter,
                   1,
                   Field_Position - 1)
            + LENGTH (Delimiter);
      ELSE
         iPtrStart := 1;
      END IF;

      iPtrEnd :=
         INSTR (vcSourceStrCopy,
                Delimiter,
                1,
                Field_Position);
      RETURN SUBSTR (vcSourceStrCopy, iPtrStart, (iPtrEnd - iPtrStart));
   END GetString;

我想知道,有什么方法可以消除来自传入数据的回车,并将值(如上所述)作为一行。

根据我的分析,在UTL_FILE.Get_Line(Infile,Linebuf)函数调用期间读取行。这个函数不知何故只读取字符串的第一行。换句话说,只读取此行"管理员,管理员(管理员):2016年1月25日09:48:22 GMT,字符串的其余部分将被忽略,并被视为新行。

我想知道如何编写这样一个循环来继续读取该行,直到找到整个字符串。

请帮忙!

3 个答案:

答案 0 :(得分:2)

如果我理解正确的问题,在你调用UTL_FILE.Get_Line (Infile,Linebuf);的循环顶部,你试图读入一个用户输入的文本字符串(逻辑上一个字段转到一列)但是该文本字符串包含UTL_FILE.Get_Line无法处理的回车以及需要拆除的回车。

您可以在获取文件之前让源系统去掉它们,还是在读取之前预先处理文件?因为你使用了单词" cell"我假设数据来自电子表格。您的数据源可以先搜索/替换吗?那会更好。

否则,您将不得不添加逻辑来检测您是否处于多行文本字段的开头并处理它。目前您在一行中读取并附加到linebuf。你可以处理它的一种方法是将行读入tmplinebuf,而不是看一下。如果它以双引号开头和结尾(只是假设文本字段被基于您向我们展示的唯一字段的引号括起来)那么它就完成了,所以将它附加到主linebuf并获得下一行。但是,如果它以双引号开头,但不是以一个多行字段结束,那么继续读入tmplinebuf,直到遇到一条不以双引号开头但以一条结尾的行。那么mutli-line字段包含在tmplinebuf中,所以将tmplinebuf附加到主linebuf并继续。可以把它想象成一个子循环来读取多行文本字段的所有行,并在将它添加到主linebuf之前将其构建在单独的缓冲区中。这样的事情。从技术上讲,你可以将多行文本直接读入linebuf,只要你正确地处理格式化,你真的不需要tmplinebuf。

BIG CAVEAT文本不能包含双引号: - /我强烈建议您在阅读之前通过源进行清理或预先处理。

编辑:另一个想法;源可以为您提供单元格(字段)中数据的长度吗?如果您可以使用,则GET_LINE()的第三个参数是要读取的数据长度,它取代GET_LINE()停止在行尾字符处。读入tmplinebuf,使用REPLACE()去除CHR(10)' s,将tmplinebuf附加到linebuf?

答案 1 :(得分:1)

如果您要求引用字段的分隔符,则应使用

'"' || Delimiter || '"' 

作为instr函数的子字符串。
您可以使用replace来删除这样的回车符,并且可以使用Trim删除GetString函数中的双引号

RETURN Trim( BOTH '"' FROM  REPLACE(SUBSTR (vcSourceStrCopy, iPtrStart, (iPtrEnd - iPtrStart)), '
', ' ')); -- here is next line, after '

你也可以像这样使用CHR:replace(str,chr(10),'')。或者如果你有换行和&回车,使用chr(10)|| chr(13)或chr(13)|| CHR(10)。
您可以使用十六进制编辑器或使用

的回显字符测试用于制动线的字符
dbms_output.put_line(asc(unknown_char));

您可以使用translate删除所有换行符:

RETURN Trim( BOTH '"' FROM  TRANSLATE(SUBSTR (vcSourceStrCopy, iPtrStart, (iPtrEnd - iPtrStart)), chr(13) || chr(10) || chr(9), '   '));

编辑: 在回答Gary_W后,我理解了你的问题 为了确保你reed所有字段的行,你应该测试最后读取的字符,如果它是双引号然后它是行的结尾,如果没有读取下一行并添加到缓冲区,所以尝试读取这样的行:

Linebuf := '';
LOOP
  UTL_FILE.Get_Line (Infile,part_Linebuf);
  Linebuf := Linebuf || part_Linebuf;
  EXIT WHEN SUBSTR(part_Linebuf, -1, 1) = '"'; -- if end of row is: "
  --EXIT WHEN SUBSTR(part_Linebuf, -2, 2) = '",'; -- if end of row is: ",
END LOOP;   

答案 2 :(得分:0)

感谢所有花时间帮助我的人们。那么这里的解决方案。它有点慢,但它完成了这项工作。

CREATE OR REPLACE PROCEDURE ARENA.Import_DATA_File5 (P_FILENAME   IN     VARCHAR2,
                                                P_QUERY_REF IN VARCHAR2,
                                              o_msg           OUT VARCHAR2)
IS
   Infile                UTL_FILE.File_Type;
   Linebuf               VARCHAR2 (32000);
   V_Getstring           VARCHAR2 (32000);
   v_sql VARCHAR2 (32000);
--   v_count NUMBER;

   -- Field Values Array
   TYPE Fieldvalue IS TABLE OF VARCHAR2 (4000)
                         INDEX BY BINARY_INTEGER;

   Field_Position        Fieldvalue;

   Total_Rec_Count       NUMBER := 0;
   Total_Rec_Processed   NUMBER := 0;

   V_RECORD_COMP         NUMBER := 0;
   V_TEMP_STR            VARCHAR2(32000);
   V_TEMP                NUMBER := 0;
   V_MULTIREC_START      NUMBER := 0;
   V_REC_POSTED          NUMBER := 1;

BEGIN
   Infile := UTL_FILE.Fopen ('FILE_UPLOAD_DIR', P_FILENAME, 'R');

   LOOP
      ---

      IF V_REC_POSTED = 1 THEN      
         UTL_FILE.Get_Line (Infile,Linebuf);
         V_REC_POSTED := 0;         
      END IF;                       

      SELECT INSTR(Linebuf, '","', 1, 7)
      INTO   V_TEMP
      FROM   DUAL;

      IF V_TEMP <> 0 OR V_RECORD_COMP <> 0 THEN --Single-line record or multi-line record picking complete
         V_TEMP_STR    := '';
         V_RECORD_COMP := 0;
         V_REC_POSTED  := 1;


      Linebuf := Linebuf || '","';
--   INSERT INTO DATA_FLU_TEST (COMMENTS)
--        VALUES ('---Linebuf
--        ' || Linebuf);

      FOR I IN 1 .. 51
      LOOP
         Field_Position (I) :=
            Plsql_Delimited_String.Getstring (Linebuf,--REPLACE(REPLACE(Linebuf, CHR(13)), CHR(10)),
                                              I,
                                              FALSE,
                                              '","');
      END LOOP;

      BEGIN
         Total_Rec_Count := Total_Rec_Count + 1;

         -- table insertion 
         INSERT INTO DATA_Q127_CT (query_ref,
                                   col_v1,
                                   col_v2,
                                   col_v3,
                                   col_v4,
                                   col_v5,
                                   col_v6,
                                   col_v7,
                                   col_v8,
                                   col_v9,
                                   col_v10,
                                   col_v11,
                                   col_v12,
                                   col_v13,
                                   col_v14,
                                   col_v15,
                                   col_v16,
                                   col_v17,
                                   col_v18,
                                   col_v19,
                                   col_v20,
                                   col_v21,
                                   col_v22,
                                   col_v23,
                                   col_v24,
                                   col_v25,
                                   col_v26,
                                   col_v27,
                                   col_v28,
                                   col_v29,
                                   col_v30,
                                   col_v31,
                                   col_v32,
                                   col_v33,
                                   col_v34,
                                   col_v35,
                                   col_v36,
                                   col_v37,
                                   col_v38,
                                   col_v39,
                                   col_v40,
                                   col_v41,
                                   col_v42,
                                   col_v43,
                                   col_v44,
                                   col_v45,
                                   col_v46,
                                   col_v47,
                                   col_v48,
                                   col_v49,
                                   col_v50)
              VALUES (
              P_QUERY_REF,
              field_position (1),
                      field_position (2),
                      field_position (3),
--                      REPLACE (field_position (3),CHR(10),' '),
--                      REPLACE(field_position (3), CHR(13)||CHR(10), ' '),
--                      TRIM(TRANSLATE(field_position (3), CHR(10), '\\n')),
--                      regexp_replace(field_position (3),'[[:cntrl:]]',''),
--Translate(field_position (3), CHR(13),'') ,
                      field_position (4),
                      field_position (5),
                      field_position (6),
                      field_position (7),
                      field_position (8),
                      field_position (9),
                      field_position (10),
                      field_position (11),
                      field_position (12),
                      field_position (13),
                      field_position (14),
                      field_position (15),
                      field_position (16),
                      field_position (17),
                      field_position (18),
                      field_position (19),
                      field_position (20),
                      field_position (21),
                      field_position (22),
                      field_position (23),
                      field_position (24),
                      field_position (25),
                      field_position (26),
                      field_position (27),
                      field_position (28),
                      field_position (29),
                      field_position (30),
                      field_position (31),
                      field_position (32),
                      field_position (33),
                      field_position (34),
                      field_position (35),
                      field_position (36),
                      field_position (37),
                      field_position (38),
                      field_position (39),
                      field_position (40),
                      field_position (41),
                      field_position (42),
                      field_position (43),
                      field_position (44),
                      field_position (45),
                      field_position (46),
                      field_position (47),
                      field_position (48),
                      field_position (49),
                      field_position (50)
                      );

--   INSERT INTO DATA_FLU_TEST (COMMENTS)
--        VALUES ('---Import_DATA_File
--        ' || field_position (3));

   COMMIT;

         Total_Rec_Processed := Total_Rec_Processed + 1;
      EXCEPTION
         WHEN OTHERS
         THEN

            NULL;
      END;


   ELSE
     V_TEMP_STR       := V_TEMP_STR || Linebuf;
     V_REC_POSTED     := 1;
     IF INSTR(Linebuf, '","', 1, 1) <> 0 AND V_MULTIREC_START = 1 THEN      --End of multi-line record
        SELECT REPLACE(REPLACE(V_TEMP_STR, CHR(10) , ''), CHR(13), '')
        INTO   Linebuf
        FROM   DUAL;
        V_RECORD_COMP := 1;
        V_REC_POSTED  := 0;
        V_MULTIREC_START     := 0;
     END IF;
     V_MULTIREC_START := 1;
   END IF;


   END LOOP;

   IF UTL_FILE.is_open (infile)
   THEN
      UTL_FILE.Fclose (Infile);
   END IF;




EXCEPTION
   WHEN NO_DATA_FOUND
   THEN
      IF UTL_FILE.is_open (infile)
      THEN
         UTL_FILE.Fclose (Infile);
      END IF;

      IF total_rec_processed > 0
      THEN
         COMMIT;

         DELETE FROM DATA_Q127_CT
               WHERE QUERY_REF = 'QUERY_REF'
               OR COL_V50 = 'COL_V50';
               COMMIT;
      END IF;
   WHEN OTHERS
   THEN
      IF UTL_FILE.is_open (infile)
      THEN
         UTL_FILE.Fclose (Infile);
      END IF;
v_sql := 'Error in '|| $$plsql_unit || ' at ' || $$plsql_line;
      o_msg := SQLERRM || '
      '|| v_sql;
--      DBMS_OUTPUT.put_line(v_sql);
END;