我有几十个csv文件,每个文件有超过一百列。我需要将这些文件上传到postgres表,以便处理它们并将数据传输到关系表。我不想手动处理每个文件以提取列名,因为这可能是一个重复的过程。 pgAdmin导入工具和COPY函数都不会处理第一行以创建表的列。那么处理这个问题的最佳方法是什么?
答案 0 :(得分:0)
我不知道这是否是一种合理的方法(由于大量使用动态sql和用于提取列名的方法),这对我有用:
create or replace function data.csv_to_table (in target_table text, in csv_path text, in col_count integer)
returns void as $$
declare
iter integer; --dummy integer to iterate with
col text; --dummy variable to iterate with
col_first text; --first column label, e.g., top left corner on a csv file or spreadsheet
begin
set schema 'data';
drop table if exists temp_table;
create table temp_table ();
-- add just enough number of columns
for iter in 1..col_count
loop
execute 'alter table temp_table add column col_' || iter || ' varchar;';
end loop;
-- copy the data from csv file
execute 'copy temp_table from ''' || csv_path || ''' with delimiter '',''';
iter := 1;
col_first := (select col_1 from temp_table limit 1);
-- update the column names based on the first row which has the column names
for col in execute 'select unnest(string_to_array(trim(temp_table::text, ''()''), '','')) from temp_table where col_1 = ''' || col_first || ''''
loop
execute 'alter table temp_table rename column col_' || iter || ' to ' || col;
iter := iter + 1;
end loop;
-- delete the columns row
execute 'delete from temp_table where ' || col_first || ' = ''' || col_first || '''';
-- change the temp table name to the parameter given if not blank
if length(target_table) > 0 then
execute 'drop table if exists ' || target_table;
execute 'alter table temp_table rename to ' || target_table;
end if;
end;
$$ language plpgsql;