我在vars中使用unicode char解析JSON时遇到了问题。 所以,我有下一个JSON(例子):
{
"SASJSONExport":"1.0",
"SASTableData+TEST":[
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":2,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":4,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0031"
},
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":2,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":2,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0032"
},
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":1,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":42,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0033"
}
]
}
要从JSON解析表,我使用SAS引擎:
libname jsonfl JSON fileref=injson ;
代码更高解码单元格中的字符,但vars的名称看起来像缺少的val:
+--------------+---------------------------+------------+---------+---------+
| ordinal_root | ordinal_SASTableData_TEST | __________ | _______ | ______ |
+--------------+---------------------------+------------+---------+---------+
| 1 | 1 | 2 | 4 | Что-то1 |
| 1 | 2 | 2 | 2 | Что-то2 |
| 1 | 3 | 1 | 42 | Что-то3 |
+--------------+---------------------------+------------+---------+---------+
标题必须如下:
+--------------+---------------------------+------------+---------+---------+
| ordinal_root | ordinal_SASTableData_TEST | Переменная | Среднее | Строка |
+--------------+---------------------------+------------+---------+---------+
所以我决定用这个DIM_N_
之类的名称替换unicoded变量字符。
为此,我必须找到所有符合下一个正则表达式的字符串:/([\s\w\d\\]+)\"\:/
但是,要从json获取字符串,我需要设置为delim下一个char '{','}','[',']',','
。
但是如果将那个字符设置为dlm,我将不会再次组装json。
所以我决定在char ~
之前粘贴以将其设置为dlm。
data delim;
infile injson lrecl=1073741823 nopad;
file delim;
input char1 $char1. @@;
if char1 in ('{','}','[',']',',') then
put '7E'x;
put char1 $CHAR1. @@;
run;
我收到了无效的json文件:
~
{"SASJSONExport":"1.0"~
,"SASTableData+TEST":~
[ ~
{"\u0056\u0061\u0072":2~
,"\u006d\u0065\u0061\u006e":4~
,"\u004e\u0061\u006d\u0065":"\u0073\u006d\u0074\u0068\u0031"~
}~
, ~
{"\u0056\u0061\u0072":2~
,"\u006d\u0065\u0061\u006e":2~
,"\u004e\u0061\u006d\u0065":"\u0073\u006d\u0074\u0068\u0032"~
}~
, ~
{"\u0056\u0061\u0072":1~
,"\u006d\u0065\u0061\u006e":42~
,"\u004e\u0061\u006d\u0065":"\u0073\u006d\u0074\u0068\u0033"~
} ~
]~
}
因此,下一步我要解析JSON并使用~
作为分隔符:
data transfer;
length column $2000;
retain r;
infile delim delimiter='7E'x nopad;
input char1 : $4000. @@;
r = prxparse('/([\s\w\d\\]+)\"\:/');
pos = prxmatch(r,char1);
column = prxposn(r,1,char1);
n= _n_;
run;
它有效...但我觉得那些 太糟糕的做法 ,而且它有限制。
UPD1
选项,
options vAlidfmtname=long VALIDMEMNAME=extend VALIDVARNAME=any;
返回:
+--------------+---------------------------+----------------------------+---------+--------------+
| ordinal_root | ordinal_SASTableData_TEST | __________ | _______ | ______ |
+--------------+---------------------------+----------------------------+---------+--------------+
| 1 | 1 | авфа2 фвафв = фвыа - тфвыа | 4 | Что-то1 ,,,, |
| 1 | 2 | авфа2 фвафв = фвыа - тфвыа | 2 | Что-то2 |
| 1 | 3 | авфа2 фвафв = фвыа - тфвыа | 2017 | Что-то3 |
+--------------+---------------------------+----------------------------+---------+--------------+
所以我的问题是:
infile
语句的情况下解码整个文件吗?infile delimiter
,但是设置smth选项以不删除分隔符吗?欢迎充分的批评。
答案 0 :(得分:1)
UPD
我来到解决方案时无需手动编辑json地图文件,而是使用正则表达式。
libname _all_ clear;
filename _all_ clear;
filename _PDFOUT temp;
filename _GSFNAME temp;
proc datasets lib=work kill memtype=data nolist; quit;
filename jsf '~/sasuser.v94/.json' encoding='utf-8';
data _null_;
file jsf;
length js varchar(*);
retain js;
input;
js=unicode(_infile_);
put js;
datalines;
{
"SASJSONExport":"1.0",
"SASTableData+TEST":[
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":2,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":4,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0031"
},
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":2,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":2,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0032"
},
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":1,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":42,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0033"
}
]
}
;
run;
filename jsm '~/sasuser.v94/.json.map' encoding='utf-8';
libname jsd json fileref=jsf map=jsm automap=replace;
libname jsm json fileref=jsm;
data jsmm;
merge jsm.datasets jsm.datasets_variables;
by ordinal_DATASETS;
run;
proc sort data=jsmm; by ordinal_root ordinal_DATASETS; run;
data _null_;
set work.jsmm end=last;
if _N_=1 then do;
length s varchar(*) ds varchar(*);
retain s ds prx;
s='{"DATASETS":[';
ds='';
prx=prxparse('/[^_]/');
end;
if ds=dsname then s=s||',';
else do;
ds=dsname;
if _N_^=1 then s=s||']},';
s=cats(s,'{"DSNAME":"',ds,'","TABLEPATH":"',tablepath,'","VARIABLES":[');
end;
s=cats(s,'{"NAME":"',name,'","TYPE":"',type,'","PATH":"',path,'"');
if prxmatch(prx,name) > length(name) then
s=cats(s,',"LABEL":"',scan(path,-1,'/'),'"');
s=s||'}';
if last then do;
s=s||']}]}';
file jsm;
put s;
end;
run;
libname jsd json fileref=jsf map=jsm;
proc print data=jsd.SASTableData_TEST label noobs; run;
解决方案的第一个变体
这是一个快速的解决方案。
首先准备输入数据:
libname _all_ clear;
filename _all_ clear;
filename jsf '~/sasuser.v94/.json' encoding='utf-8';
data _null_;
file jsf;
length js varchar(*);
input;
js=unicode(_infile_);
put js;
datalines;
{
"SASJSONExport":"1.0",
"SASTableData+TEST": [
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":2,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":4,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0031"
},
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":2,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":2,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0032"
},
{
"\u041f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f":1,
"\u0421\u0440\u0435\u0434\u043d\u0435\u0435":42,
"\u0421\u0442\u0440\u043e\u043a\u0430":"\u0427\u0442\u043e\u002d\u0442\u043e\u0033"
}
]
}
;
run;
输出文件.json
:
{
"SASJSONExport":"1.0",
"SASTableData+TEST": [
{
"Переменная":2,
"Среднее":4,
"Строка":"Что-то1"
},
{
"Переменная":2,
"Среднее":2,
"Строка":"Что-то2"
},
{
"Переменная":1,
"Среднее":42,
"Строка":"Что-то3"
}
]
}
然后创建json地图文件.json.map
:
filename jsmf '~/sasuser.v94/.json.map' encoding='utf-8';
libname jsm json fileref=jsf map=jsmf automap=create;
.json.map
内容:
{
"DATASETS": [
{
"DSNAME": "root",
"TABLEPATH": "/root",
"VARIABLES": [
{
"NAME": "ordinal_root",
"TYPE": "ORDINAL",
"PATH": "/root"
},
{
"NAME": "SASJSONExport",
"TYPE": "CHARACTER",
"PATH": "/root/SASJSONExport",
"CURRENT_LENGTH": 3
}
]
},
{
"DSNAME": "SASTableData_TEST",
"TABLEPATH": "/root/SASTableData+TEST",
"VARIABLES": [
{
"NAME": "ordinal_root",
"TYPE": "ORDINAL",
"PATH": "/root"
},
{
"NAME": "ordinal_SASTableData_TEST",
"TYPE": "ORDINAL",
"PATH": "/root/SASTableData+TEST"
},
{
"NAME": "____________________",
"TYPE": "NUMERIC",
"PATH": "/root/SASTableData+TEST/Переменная"
},
{
"NAME": "______________",
"TYPE": "NUMERIC",
"PATH": "/root/SASTableData+TEST/Среднее"
},
{
"NAME": "____________",
"TYPE": "CHARACTER",
"PATH": "/root/SASTableData+TEST/Строка",
"CURRENT_LENGTH": 12
}
]
}
]
}
通过删除不必要的数据集的描述并添加标签,让我们稍微更改一下文件:
{
"DATASETS": [
{
"DSNAME": "SASTableData_TEST",
"TABLEPATH": "/root/SASTableData+TEST",
"VARIABLES": [
{
"NAME": "ordinal_root",
"TYPE": "ORDINAL",
"PATH": "/root"
},
{
"NAME": "ordinal_SASTableData_TEST",
"TYPE": "ORDINAL",
"PATH": "/root/SASTableData+TEST"
},
{
"NAME": "____________________",
"TYPE": "NUMERIC",
"PATH": "/root/SASTableData+TEST/Переменная",
"LABEL": "Переменная"
},
{
"NAME": "______________",
"TYPE": "NUMERIC",
"PATH": "/root/SASTableData+TEST/Среднее",
"LABEL": "Среднее"
},
{
"NAME": "____________",
"TYPE": "CHARACTER",
"PATH": "/root/SASTableData+TEST/Строка",
"LABEL": "Строка",
"CURRENT_LENGTH": 12
}
]
}
]
}
再试一次:
libname jsd json fileref=jsf map=jsmf;
proc print data=jsd.SASTableData_TEST label noobs; run;
结果:
+--------------+---------------------------+- ----------+---------+-----------+
| ordinal_root | ordinal_SASTableData_TEST | Переменная | Среднее | Строка |
+--------------+---------------------------+------------+---------+-----------+
| 1 | 1 | 2 | 4 | Что-то1 |
| 1 | 2 | 2 | 2 | Что-то2 |
| 1 | 3 | 1 | 42 | Что-то3 |
+--------------+---------------------------+------------+---------+-----------+
所有这些都是在SAS大学版中完成的。