将数据从csv文件导入postgres表而不先创建表

时间:2015-05-07 22:35:24

标签: postgresql csv

我有几十个csv文件,每个文件有超过一百列。我需要将这些文件上传到postgres表,以便处理它们并将数据传输到关系表。我不想手动处理每个文件以提取列名,因为这可能是一个重复的过程。 pgAdmin导入工具和COPY函数都不会处理第一行以创建表的列。那么处理这个问题的最佳方法是什么?

1 个答案:

答案 0 :(得分:0)

我不知道这是否是一种合理的方法(由于大量使用动态sql和用于提取列名的方法),这对我有用:

create or replace function data.csv_to_table (in target_table text, in csv_path text, in col_count integer)

returns void as $$

declare 

iter integer; --dummy integer to iterate with
col text; --dummy variable to iterate with
col_first text; --first column label, e.g., top left corner on a csv file or spreadsheet

begin
    set schema 'data';

    drop table if exists temp_table;

    create table temp_table ();

    -- add just enough number of columns
    for iter in 1..col_count
    loop
        execute 'alter table temp_table add column col_' || iter || ' varchar;';
    end loop;

    -- copy the data from csv file
    execute 'copy temp_table from ''' || csv_path || ''' with delimiter '',''';

    iter := 1;
    col_first := (select col_1 from temp_table limit 1);

    -- update the column names based on the first row which has the column names
    for col in execute 'select unnest(string_to_array(trim(temp_table::text, ''()''), '','')) from temp_table where col_1 = ''' || col_first || ''''
    loop
        execute 'alter table temp_table rename column col_' || iter || ' to ' || col;
        iter := iter + 1;
    end loop;

    -- delete the columns row
    execute 'delete from temp_table where ' || col_first || ' = ''' || col_first || '''';

    -- change the temp table name to the parameter given if not blank
    if length(target_table) > 0 then
        execute 'drop table if exists ' || target_table;
        execute 'alter table temp_table rename to ' || target_table;
    end if;

end;

$$ language plpgsql;