清理一些数据以便导入postgresql的最佳方法?

时间:2017-02-01 16:24:24

标签: postgresql

我有两列YYMMDD格式的{ "name": "angulardemo", "version": "1.0.0", "description": "angulardemo", "main": "server.js", "scripts": { }, "keywords": [], "author": "", "license": "MIT", "dependencies": { "@angular/common": "~2.4.0", "@angular/compiler": "~2.4.0", "@angular/core": "~2.4.0", "@angular/forms": "~2.4.0", "@angular/http": "~2.4.0", "@angular/platform-browser": "~2.4.0", "@angular/platform-browser-dynamic": "~2.4.0", "@angular/router": "~3.4.0", "angular-in-memory-web-api": "~0.2.4", "core-js": "^2.4.1", "ejs": "^2.5.5", "express": "^4.14.1", "rxjs": "5.0.1", "systemjs": "0.19.40", "zone.js": "^0.7.4" }, "devDependencies": { "concurrently": "^3.1.0", "lite-server": "^2.2.2", "typescript": "~2.0.10", "canonical-path": "0.0.2", "http-server": "^0.9.0", "tslint": "^3.15.1", "lodash": "^4.16.4", "jasmine-core": "~2.4.1", "karma": "^1.3.0", "karma-chrome-launcher": "^2.0.0", "karma-cli": "^1.0.1", "karma-jasmine": "^1.0.2", "karma-jasmine-html-reporter": "^0.2.2", "protractor": "~4.0.14", "rimraf": "^2.5.4", "@types/node": "^6.0.46", "@types/jasmine": "^2.5.36" }, "repository": {} } 和HHMMSS格式的date,它们是time之类的字符串。这些记录接近25亿。在导入PostgreSQL之前清理数据的最佳方法是什么?有没有办法在导入时执行此操作,例如?

1 个答案:

答案 0 :(得分:2)

您可以使用函数to_timestamp()

将数据转换为带时区的时间戳
with example(d, t) as (
    values ('150103', '132244')
)

select d, t, to_timestamp(concat(d, t), 'yymmddhh24miss')
from example;

   d    |   t    |      to_timestamp      
--------+--------+------------------------
 150103 | 132244 | 2015-01-03 13:22:44+01
(1 row)

您可以使用临时列(d,t)将文件导入到表格中:

create table example(d text, t text);
copy example from ....

添加带时区列的时间戳,转换数据并删除冗余文本列:

alter table example add tstamp_column timestamptz;

update example
set tstamp_column = to_timestamp(concat(d, t), 'yymmddhh24miss');

alter table example drop d, drop t;