我第一次尝试使用SAS解析Json文件。 我在Internet上找到了一些脚本,并在SAS上尝试了这个CODE,但无法使其在我的JSON数据文件上运行。
data parsed;
infile data lrecl=32000 truncover scanover dlm=",";
input @'"driver_id":{' driver_id $255.
@'"publication_site_id":{' publication_site_id $255.
@'"confirmed_seat_count":{' confirmed_seat_count $255.
@'"published_date":{' published_date $255.
@'"trip_date":{' trip_date $255.
@'"first_publication_date":{' first_publication_date $255.
@'"profile_picture_added_date":{' profile_picture_added_date $255.;
driver_id=scan(driver_id, 1, '",');
published_date=scan(published_date, 1, '",');
trip_date =scan(trip_date, 1, '",');
publication_site_id=scan(publication_site_id, 1, '",');
confirmed_seat_count =scan(confirmed_seat_count, 1, '",');
first_publication_date =scan(first_publication_date, 1, '",');
profile_picture_added_date =scan(profile_picture_added_date, 1, '",');
run;
以下是我的Json文件概述
{
"driver_id": {"0":11450650,"1":4705453,"2":18873210,"3":4892472,"4":47572650,"5":5242675},
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056},
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0},
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"},
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"},
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"},
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null}
}
我非常感谢你的帮助。
此致 SM
答案 0 :(得分:1)
这应该会帮助你:
options noquotelenmax;
filename data temp lrecl=32000;
data _null_;
file data;
put '{
"driver_id": {"0":11450650,"1":4705453,"2":18873210,"3":4892472,"4":47572650,"5":5242675},
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056},
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0},
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"},
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"},
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"},
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null}
}';
run;
data parsed;
infile data lrecl=32000 truncover scanover dlm='}' ;
input @'"driver_id": {' driver_id: $255.
@'"publication_site_id":{' publication_site_id: $255.
@'"confirmed_seat_count":{' confirmed_seat_count: $255.
@'"published_date":{' published_date: $255.
@'"trip_date":{' trip_date: $255.
@'"first_publication_date":{' first_publication_date: $255.
@'"profile_picture_added_date":{' profile_picture_added_date: $255.@@;
run;
我更新了分隔符(允许您保留内部对象,它们本身是分隔的),添加了冒号输入修饰符(允许您通过说&#34读取非标准值;开始读取下一个非空白列&# 34;)并以@@尾随列指针结束,以保存输出后续观察的记录(有可能你的json将作为一个长流到达)。哦,我还为@'"driver_id": {'
添加了一个空格 - 这是一个错字吗?
我省略了扫描部分,认为你控制了这一点!
答案 1 :(得分:1)
不是我最好的工作,但它会给你所追求的!需要重新组织列和一些dq,但您的JSON将看起来像SAS数据集。我建议你看一下python pandas
包。使用pandas
,您可以使用一行代码对其进行排序。
data parsed;
infile "/your_path/test.json" lrecl=32500 truncover scanover dlm="," firstobs=2;
input @'"driver_id"' _driver_id $255.
@'"publication_site_id"' _publication_site_id $255.
@'"confirmed_seat_count"' _confirmed_seat_count $255.
@'"published_date"' _published_date $255.
@'"trip_date"' _trip_date $255.
@'"first_publication_date"' _first_publication_date $255.
@'"profile_picture_added_date"' _profile_picture_added_date $255.;
array v{*} _:;
do i=1 to dim(v);
v[i]=prxchange('s/"\d+"//',-1,v[i]);
v[i]=compress(v[i],'{}"');
v[i]=substr(v[i],3);
end;
run;
proc transpose data=parsed out=t_parsed ;
var _:;
run;
data want(drop=i);
set t_parsed;
array split(6) $50.;
do i=1 to dim(split);
split[i] = scan(col1,i,',');
end;
run;
proc sort data=want; by _name_;run;
proc transpose data=want out=want_t(drop=_name_);
var split:;
id _name_;
run;