我有以下JSON数据,我正在尝试迁移到postgresql。
JSON Data:
{
"wsgi.multiprocess": true,
"HTTP_REFERER": "http://localhost:9000/",
"SCRIPT_NAME": "",
"REQUEST_METHOD": "GET",
"PATH_INFO": "/api/impressions/i/",
"HTTP_ORIGIN": "http://localhost:9000",
"SERVER_PROTOCOL": "HTTP/1.1",
"QUERY_STRING": "",
"CONTENT_LENGTH": "",
"HTTP_USER_AGENT": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36",
"HTTP_CONNECTION": "keep-alive",
"HTTP_COOKIE": "_ga=GA1.3.1851235816.1425597711; sessionid=ihukujut48uhatb1rqtzaed78jszqsyk; csrftoken=8F2CcluTFgGUdCV3mfgnhqxfh2crgDKj; customer=\"AmsrbY7bSj5wiDQPM7xcRa:1YdLVd:nKyRyZNx5aoLLmVRL4o9aN267vI\"",
"SERVER_NAME": "app.adomattic.com",
"REMOTE_ADDR": "182.186.59.228",
"HTTP_X_FIREPHP_VERSION": "0.0.6",
"wsgi.url_scheme": "http",
"SERVER_PORT": "80",
"uwsgi.node": "stage",
"HTTP_PUBLISHER_KEY": "ng2HM6ThZehtWHR2tgonBg",
"HTTP_DNT": "1",
"HTTP_HOST": "app.adomattic.com",
"wsgi.multithread": false,
"HTTP_CACHE_CONTROL": "max-age=0",
"REQUEST_URI": "/api/impressions/i/",
"HTTP_ACCEPT": "application/json, text/plain, */*",
"wsgi.run_once": false,
"REMOTE_PORT": "50740",
"HTTP_ACCEPT_LANGUAGE": "en-US,en;q=0.8,ur;q=0.6",
"uwsgi.version": "1.9.17.1-debian",
"CONTENT_TYPE": "",
"DOCUMENT_ROOT": "/usr/share/nginx/html",
"CSRF_COOKIE": "8F2CcluTFgGUdCV3mfgnhqxfh2crgDKj",
"HTTP_ACCEPT_ENCODING": "gzip, deflate, sdch"
}
要迁移此数据,首先我要在我的数据库中创建一个表:
CREATE TABLE filtered_data
(
ROW_ID INT
,MULTIPROCESS VARCHAR(10)
,HTTP_REFERER VARCHAR(100)
,SCRIPT_NAME VARCHAR(20)
,REQUEST_METHOD VARCHAR(10)
,PATH_INFO VARCHAR(40)
,HTTP_ORIGIN VARCHAR(100)
,SERVER_PROTOCOL VARCHAR(30)
,QUERY_STRING VARCHAR(50)
,CONTENT_LENGTH VARCHAR(20)
,HTTP_USER_AGENT VARCHAR(400)
,HTTP_CONNECTION VARCHAR(30)
,HTTP_COOKIE VARCHAR(500)
,SERVER_NAME VARCHAR(30)
,REMOTE_ADDR VARCHAR(30)
,FIREPHP_VERSION VARCHAR(20)
,URL_SCHEME VARCHAR(10)
,SERVER_PORT INT
,NODE VARCHAR(20)
,PUBLISHER_KEY VARCHAR(30)
,HTTP_DNT INT
,HTTP_HOST VARCHAR(30)
,MULTITHREAD VARCHAR(10)
,CACHE_CONTROL VARCHAR(20)
,REQUEST_URI VARCHAR(30)
,HTTP_ACCEPT VARCHAR(50)
,RUN_ONCE VARCHAR(10)
,REMOTE_PORT INT
,HTTP_ACCEPT_LANGUAGE VARCHAR(30)
,UWSGI_VERSION VARCHAR(30)
,CONTENT_TYPE VARCHAR(20)
,DOCUMENT_ROOT VARCHAR(40)
,CSRF_COOKIE VARCHAR(50)
,HTTP_ACCEPT_ENCODING VARCHAR(50)
);
创建此表后,我使用以下查询将JSON数据复制到此表中:
COPY raw_data(DATA) FROM 'metadata.txt' DELIMITERS '#' CSV;
此语句将整个文件加载到跨越多行的单个列中。
然后,我使用以下插入查询将JSON数据拆分为相应的列:
INSERT INTO filtered_data
SELECT
row_id
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.multiprocess:',2), ', HTTP_REFERER:',1)) AS MULTIPROCESS
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_REFERER:',2), ', SCRIPT_NAME:',1)) AS HTTP_REFERER
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SCRIPT_NAME:',2), ', REQUEST_METHOD:',1)) AS SCRIPT_NAME
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REQUEST_METHOD:',2), ', PATH_INFO:',1)) AS REQUEST_METHOD
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'PATH_INFO:',2), ', HTTP_ORIGIN:',1)) AS PATH_INFO
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ORIGIN:',2), ', SERVER_PROTOCOL:',1)) AS HTTP_ORIGIN
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SERVER_PROTOCOL:',2), ', QUERY_STRING:',1)) AS SERVER_PROTOCOL
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'QUERY_STRING:',2), ', CONTENT_LENGTH:',1)) AS QUERY_STRING
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CONTENT_LENGTH:',2), ', HTTP_USER_AGENT:',1)) AS CONTENT_LENGTH
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_USER_AGENT:',2), ', HTTP_CONNECTION:',1)) AS HTTP_USER_AGENT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_CONNECTION:',2), ', HTTP_COOKIE:',1)) AS HTTP_CONNECTION
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_COOKIE:',2), ', SERVER_NAME:',1)) AS HTTP_COOKIE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SERVER_NAME:',2), ', REMOTE_ADDR:',1)) AS SERVER_NAME
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REMOTE_ADDR:',2), ', HTTP_X_FIREPHP_VERSION:',1)) AS REMOTE_ADDR
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_X_FIREPHP_VERSION:',2), ', wsgi.url_scheme:',1)) AS FIREPHP_VERSION
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.url_scheme:',2), ', SERVER_PORT:',1)) AS URL_SCHEME
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', SERVER_PORT:',2), ', uwsgi.node:',1) AS INT) AS SERVER_PORT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'uwsgi.node:',2), ', HTTP_PUBLISHER_KEY:',1)) AS NODE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_PUBLISHER_KEY:',2), ', HTTP_DNT:',1)) AS PUBLISHER_KEY
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', HTTP_DNT:',2), ', HTTP_HOST:',1) AS INT) AS HTTP_DNT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_HOST:',2), ', wsgi.multithread:',1)) AS HTTP_HOST
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.multithread:',2), ', HTTP_CACHE_CONTROL:',1)) AS MULTITHREAD
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_CACHE_CONTROL:',2), ', REQUEST_URI:',1)) AS CACHE_CONTROL
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REQUEST_URI:',2), ', HTTP_ACCEPT:',1)) AS REQUEST_URI
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT:',2), ', wsgi.run_once:',1)) AS HTTP_ACCEPT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.run_once:',2), ', REMOTE_PORT:',1)) AS RUN_ONCE
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', REMOTE_PORT:',2), ', HTTP_ACCEPT_LANGUAGE:',1) AS INT) AS REMOTE_PORT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT_LANGUAGE:',2), ', uwsgi.version:',1)) AS HTTP_ACCEPT_LANGUAGE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'uwsgi.version:',2), ', CONTENT_TYPE:',1)) AS UWSGI_VERSION
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CONTENT_TYPE:',2), ', DOCUMENT_ROOT:',1)) AS CONTENT_TYPE
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'DOCUMENT_ROOT:',2), ', CSRF_COOKIE":',1)) AS DOCUMENT_ROOT
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CSRF_COOKIE:',2), ', HTTP_ACCEPT_ENCODING":',1)) AS CSRF_COOKIET
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT_ENCODING:',2), ', HTTP_ACCEPT_ENCODING":',1)) AS HTTP_ACCEPT_ENCODING
FROM raw_data;
但是,当我运行插入查询时,我收到错误 -
错误:整数的输入语法无效:“”
。 我只有三个字段作为整数,但都有有效值。为什么我收到此错误?
答案 0 :(得分:1)
如果您使用的是postgresql> = 9.3,则可以直接使用JSON函数填充记录。 (json_populate_record) functions-json
INSERT INTO filtered_data
SELECT * FROM json_populate_record(null::filtered_data, DATA);