oracle存储过程将csv文件导入目标表

时间:2015-03-05 03:20:06

标签: oracle csv stored-procedures plsql import

我有一个存储过程,使用UTL函数将CSV数据插入到oracle表中。我难以理解的部分是在插入目标表之前必须使用regexp_substr。我在各种网站上查看了regexp_substr,但仍然没有完全掌握如何将其应用于此特定导入程序的概念。

例如,假设我们有以下几行代码:

             V_EMPNO := REGEXP_SUBSTR(V_LINE, '[^,]+', 1, 1);
             V_ENAME := REGEXP_SUBSTR(V_LINE, '[^,]+', 1, 2);

我遇到的问题是,有人可以向我解释这条线路的逻辑是如何工作的吗?我有一个包含75个字段的CSV文件和目标表,所以当然这只是一个抽样,我知道我已经将这部分写得很长。我假设字符串的最后一部分只是行号,所以我不断递增它。

我的另一个问题是,我应该继续将所有记录定义为变量,并在导入的所有字段上单独执行regexp_substr吗?

1 个答案:

答案 0 :(得分:3)

您可以使用以下逻辑而不是regexp_substr检查以下代码:

CREATE OR REPLACE PROCEDURE Import_Emp_File (P_FILENAME   IN     VARCHAR2,
                                             o_msg           OUT VARCHAR2)
IS
   Infile                UTL_FILE.File_Type;
   Linebuf               VARCHAR2 (4000);
   V_Getstring           VARCHAR2 (100);

   -- Field Values Array
   TYPE Fieldvalue IS TABLE OF VARCHAR2 (100)
                         INDEX BY BINARY_INTEGER;

   Field_Position        Fieldvalue;

   Total_Rec_Count       NUMBER := 0;
   Total_Rec_Processed   NUMBER := 0;
BEGIN
   Infile := UTL_FILE.Fopen ('YOUR_DIRECTORY', P_FILENAME, 'R');

   LOOP
      ---
      UTL_FILE.Get_Line (Infile, Linebuf);
      -- adding extra pipe in end of line to read all fields

      Linebuf := Linebuf || '|';

      -- Suppose the file is containing Six delimited strings with pipe (|)
      FOR I IN 1 .. 6
      LOOP
         Field_Position (I) :=
            Plsql_Delimited_String.Getstring (Linebuf,
                                              I,
                                              FALSE,
                                              '|');
      END LOOP;

      BEGIN
         Total_Rec_Count := Total_Rec_Count + 1;

         -- an example table
         INSERT INTO EMP (EMPLOYEE_NUMBER,
                          FIRST_NAME,
                          LAST_NAME,
                          DATE_OF_JOIN,
                          EMP_TYPE,
                          DATE_OF_REGN)
             VALUES (field_position (1),
                     field_position (2),
                     field_position (3),
                     field_position (4),
                     field_position (5),
                     field_position (6));

         Total_Rec_Processed := Total_Rec_Processed + 1;
      EXCEPTION
         WHEN OTHERS
         THEN
            -- ignoring error during database insertion
            NULL;
      END;
   END LOOP;

   IF UTL_FILE.is_open (infile)
   THEN
      UTL_FILE.Fclose (Infile);
   END IF;
EXCEPTION
   WHEN NO_DATA_FOUND
   THEN
      IF UTL_FILE.is_open (infile)
      THEN
         UTL_FILE.Fclose (Infile);
      END IF;

      IF total_rec_processed > 0
      THEN
         COMMIT;
      END IF;
   WHEN OTHERS
   THEN
      IF UTL_FILE.is_open (infile)
      THEN
         UTL_FILE.Fclose (Infile);
      END IF;

      o_msg := SQLERRM;
END;
/

还共享上述过程中使用的分隔字符串实用程序包,首先执行以下脚本,然后创建如上所示的过程:

CREATE OR REPLACE PACKAGE plsql_Delimited_String
IS
   FUNCTION Counter (Source_string   IN VARCHAR2,
                     UnTerminated    IN BOOLEAN DEFAULT FALSE,
                     Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER;

   PROCEDURE PutString (Source_string    IN OUT VARCHAR2,
                        String_to_add    IN     VARCHAR2,
                        Field_position   IN     NUMBER,
                        UnTerminated     IN     BOOLEAN DEFAULT FALSE,
                        Delimiter        IN     VARCHAR2 DEFAULT ',');

   PROCEDURE PutNumber (Source_string    IN OUT VARCHAR2,
                        number_to_add    IN     NUMBER,
                        Field_position   IN     NUMBER,
                        UnTerminated     IN     BOOLEAN DEFAULT FALSE,
                        Delimiter        IN     VARCHAR2 DEFAULT ',');

   PROCEDURE PutDate (Source_string    IN OUT VARCHAR2,
                      Date_to_add      IN     DATE,
                      Field_position   IN     NUMBER,
                      UnTerminated     IN     BOOLEAN DEFAULT FALSE,
                      Delimiter        IN     VARCHAR2 DEFAULT ',');

   FUNCTION GetString (Source_string    IN VARCHAR2,
                       Field_position   IN NUMBER,
                       UnTerminated     IN BOOLEAN DEFAULT FALSE,
                       Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN VARCHAR2;

   FUNCTION GetNumber (Source_string    IN VARCHAR2,
                       Field_position   IN NUMBER,
                       UnTerminated     IN BOOLEAN DEFAULT FALSE,
                       Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER;

   FUNCTION GetDate (Source_string    IN VARCHAR2,
                     Field_position   IN NUMBER,
                     UnTerminated     IN BOOLEAN DEFAULT FALSE,
                     Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN DATE;

   FUNCTION Locate (Source_string   IN VARCHAR2,
                    Search_String   IN VARCHAR2,
                    UnTerminated    IN BOOLEAN DEFAULT FALSE,
                    Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER;

   FUNCTION Locate (Source_string   IN VARCHAR2,
                    Search_date     IN DATE,
                    UnTerminated    IN BOOLEAN DEFAULT FALSE,
                    Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER;

   FUNCTION Locate (Source_string   IN VARCHAR2,
                    Search_number   IN NUMBER,
                    UnTerminated    IN BOOLEAN DEFAULT FALSE,
                    Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER;
END plsql_Delimited_String;
/

CREATE OR REPLACE PACKAGE BODY plsql_Delimited_String
IS
   FUNCTION Counter (Source_string   IN VARCHAR2,
                     UnTerminated    IN BOOLEAN DEFAULT FALSE,
                     Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER
   IS
      iModifier   PLS_INTEGER := 0;
      iOldSize    PLS_INTEGER := LENGTH (Source_string);
   BEGIN
      IF Unterminated
      THEN
         iModifier := 1;
      END IF;

      RETURN (iOldSize - LENGTH (REPLACE (Source_string, Delimiter)))
             + iModifier;
   END Counter;

   PROCEDURE PutString (Source_string    IN OUT VARCHAR2,
                        String_to_add    IN     VARCHAR2,
                        Field_position   IN     NUMBER,
                        UnTerminated     IN     BOOLEAN DEFAULT FALSE,
                        Delimiter        IN     VARCHAR2 DEFAULT ',')
   IS
      iStrLen       PLS_INTEGER := LENGTH (Source_String);
      iStrFragLen   PLS_INTEGER := LENGTH (String_to_add);
      iPtrStart     PLS_INTEGER := 0;
      iPtrEnd       PLS_INTEGER := 0;
      vcSepBuffer   VARCHAR2 (2000);
      iCounter      PLS_INTEGER;
   BEGIN
      -- 1. is the string Zero Length?
      IF iStrLen = 0
      THEN
         IF Field_Position > 1
         THEN
            FOR iCounter IN 2 .. Field_Position
            LOOP
               vcSepBuffer := vcSepBuffer || Delimiter;
            END LOOP;
         END IF;

         Source_string := vcSepBuffer || String_to_add;

         IF NOT UnTerminated
         THEN
            Source_string := Source_String || Delimiter;
            iStrLen := LENGTH (Source_string);
         END IF;
      ELSE
         -- 2. Cope with unterminated strings
         IF UnTerminated
         THEN
            Source_string := Source_string || Delimiter;
         END IF;

         -- 3. Locate the nth-1 separator
         IF Field_Position > 1
         THEN
            LOOP
               iPtrStart :=
                  (INSTR (Source_string || vcSepBuffer,
                          Delimiter,
                          1,
                          Field_Position - 1)
                   - 1)
                  + LENGTH (Delimiter);
               EXIT WHEN iPtrStart > 0;
               vcSepBuffer := vcSepBuffer || Delimiter;
            END LOOP;

            IF vcSepBuffer IS NOT NULL
            THEN
               iPtrEnd := iStrLen;
            ELSE
               iPtrEnd :=
                  INSTR (Source_string,
                         Delimiter,
                         1,
                         Field_Position);

               IF iPtrEnd = 0
               THEN
                  iPtrEnd := iStrLen;
               END IF;
            END IF;
         ELSE
            iPtrStart := 0;
            iPtrEnd :=
               INSTR (Source_string,
                      Delimiter,
                      1,
                      Field_Position);

            IF iPtrEnd = 0
            THEN
               iPtrEnd := iStrLen;
            END IF;
         END IF;

         -- 3. Rebuild the string
         Source_string :=
               SUBSTR (Source_string, 1, iPtrStart)
            || vcSepBuffer
            || String_to_add
            || Delimiter
            || SUBSTR (Source_string, iPtrEnd + LENGTH (Delimiter));

         -- 4. Sort out termination
         IF UnTerminated
         THEN
            Source_string :=
               SUBSTR (Source_String,
                       1,
                       (LENGTH (Source_string) - LENGTH (Delimiter)));
         END IF;
      END IF;
   END PutString;

   PROCEDURE PutNumber (Source_string    IN OUT VARCHAR2,
                        number_to_add    IN     NUMBER,
                        Field_position   IN     NUMBER,
                        UnTerminated     IN     BOOLEAN DEFAULT FALSE,
                        Delimiter        IN     VARCHAR2 DEFAULT ',')
   IS
   BEGIN
      -- 1. Just do datatype conversions and call the varchar2 varsion of put..
      plsql_Delimited_String.PutString (Source_string,
                                        TO_CHAR (number_to_add),
                                        Field_position,
                                        UnTerminated,
                                        Delimiter);
   END PutNumber;

   PROCEDURE PutDate (Source_string    IN OUT VARCHAR2,
                      Date_to_add      IN     DATE,
                      Field_position   IN     NUMBER,
                      UnTerminated     IN     BOOLEAN DEFAULT FALSE,
                      Delimiter        IN     VARCHAR2 DEFAULT ',')
   IS
   BEGIN
      -- 1. Just do datatype conversions and call the varchar2 varsion of put..
      plsql_Delimited_String.PutString (
         Source_string,
         TO_CHAR (date_to_add, 'DD-MON-YYYY HH24:MI:SS'),
         Field_position,
         UnTerminated,
         Delimiter);
   END PutDate;


   FUNCTION GetString (Source_string    IN VARCHAR2,
                       Field_position   IN NUMBER,
                       UnTerminated     IN BOOLEAN DEFAULT FALSE,
                       Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN VARCHAR2
   IS
      iPtrEnd           PLS_INTEGER := 0;
      iPtrStart         PLS_INTEGER := 0;
      vcSourceStrCopy   VARCHAR2 (2000) := Source_string;
   BEGIN
      IF UnTerminated
      THEN
         vcSourceStrCopy := vcSourceStrCopy || Delimiter;
      END IF;

      IF Field_Position > 1
      THEN
         iPtrStart :=
            INSTR (vcSourceStrCopy,
                   Delimiter,
                   1,
                   Field_Position - 1)
            + LENGTH (Delimiter);
      ELSE
         iPtrStart := 1;
      END IF;

      iPtrEnd :=
         INSTR (vcSourceStrCopy,
                Delimiter,
                1,
                Field_Position);
      RETURN SUBSTR (vcSourceStrCopy, iPtrStart, (iPtrEnd - iPtrStart));
   END GetString;                                         /* String Version */


   FUNCTION GetNumber (Source_string    IN VARCHAR2,
                       Field_position   IN NUMBER,
                       UnTerminated     IN BOOLEAN DEFAULT FALSE,
                       Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER
   IS
      iRc   PLS_INTEGER;
   BEGIN
      RETURN TO_NUMBER (GetString (Source_string,
                                   Field_Position,
                                   UnTerminated,
                                   Delimiter));
   END GetNumber;                                         /* Number Version */

   FUNCTION GetDate (Source_string    IN VARCHAR2,
                     Field_position   IN NUMBER,
                     UnTerminated     IN BOOLEAN DEFAULT FALSE,
                     Delimiter        IN VARCHAR2 DEFAULT ',')
      RETURN DATE
   IS
   BEGIN
      RETURN TO_DATE (GetString (Source_string,
                                 Field_Position,
                                 UnTerminated,
                                 Delimiter), 'DD-MON-YYYY HH24:MI:SS');
   END GetDate;                                             /* Date Version */

   FUNCTION Locate (Source_string   IN VARCHAR2,
                    Search_String   IN VARCHAR2,
                    UnTerminated    IN BOOLEAN DEFAULT FALSE,
                    Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER
   IS
      iHit        PLS_INTEGER := 0;
      iCounter    PLS_INTEGER;
      vcCompare   VARCHAR2 (2000);
   BEGIN
      FOR iCounter IN 1 .. Counter (Source_String, UnTerminated, Delimiter)
      LOOP
         IF GetString (Source_String,
                       iCounter,
                       UnTerminated,
                       Delimiter) = Search_String
         THEN
            iHit := iCounter;
            EXIT;
         END IF;
      END LOOP;

      RETURN iHit;
   END Locate;

   FUNCTION Locate (Source_string   IN VARCHAR2,
                    Search_date     IN DATE,
                    UnTerminated    IN BOOLEAN DEFAULT FALSE,
                    Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER
   IS
   BEGIN
      RETURN Locate (Source_string,
                     TO_CHAR (Search_date, 'DD-MON-YYYY HH24:MI:SS'),
                     UnTerminated,
                     Delimiter);
   END Locate;                                              /* Date Version */

   FUNCTION Locate (Source_string   IN VARCHAR2,
                    Search_number   IN NUMBER,
                    UnTerminated    IN BOOLEAN DEFAULT FALSE,
                    Delimiter       IN VARCHAR2 DEFAULT ',')
      RETURN NUMBER
   IS
   BEGIN
      RETURN Locate (Source_string,
                     TO_CHAR (Search_number),
                     UnTerminated,
                     Delimiter);
   END Locate;                                            /* Number Version */
END;                                                                /* Body */
/