使用SAS解析Json文件

时间:2016-07-27 09:58:51

标签: json parsing sas

我第一次尝试使用SAS解析Json文件。 我在Internet上找到了一些脚本,并在SAS上尝试了这个CODE,但无法使其在我的JSON数据文件上运行。

data parsed; 
infile data lrecl=32000 truncover scanover dlm=",";
input @'"driver_id":{' driver_id $255.
  @'"publication_site_id":{' publication_site_id $255.
  @'"confirmed_seat_count":{' confirmed_seat_count $255.
  @'"published_date":{' published_date $255.
  @'"trip_date":{' trip_date $255.
  @'"first_publication_date":{' first_publication_date $255.
  @'"profile_picture_added_date":{' profile_picture_added_date $255.;

driver_id=scan(driver_id, 1, '",');
published_date=scan(published_date, 1, '",');   
trip_date   =scan(trip_date, 1, '",');
publication_site_id=scan(publication_site_id, 1, '",');
confirmed_seat_count    =scan(confirmed_seat_count, 1, '",');
first_publication_date  =scan(first_publication_date, 1, '",');
profile_picture_added_date  =scan(profile_picture_added_date, 1, '",');
run;

以下是我的Json文件概述

{
"driver_id": {"0":11450650,"1":4705453,"2":18873210,"3":4892472,"4":47572650,"5":5242675}, 
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056},
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0},
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"},
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"},
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"},
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null}
}

我非常感谢你的帮助。

此致 SM

2 个答案:

答案 0 :(得分:1)

这应该会帮助你:

options noquotelenmax;
filename data temp lrecl=32000;

data _null_;
file data;
put '{
"driver_id": {"0":11450650,"1":4705453,"2":18873210,"3":4892472,"4":47572650,"5":5242675}, 
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056},
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0},
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"},
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"},
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"},
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null}
}';
run;

data parsed; 
infile data lrecl=32000 truncover scanover dlm='}' ;
input @'"driver_id": {' driver_id: $255.
  @'"publication_site_id":{' publication_site_id: $255.
  @'"confirmed_seat_count":{' confirmed_seat_count: $255.
  @'"published_date":{' published_date: $255.
  @'"trip_date":{' trip_date: $255.
  @'"first_publication_date":{' first_publication_date: $255.
  @'"profile_picture_added_date":{' profile_picture_added_date: $255.@@;
run;

我更新了分隔符(允许您保留内部对象,它们本身是分隔的),添加了冒号输入修饰符(允许您通过说&#34读取非标准值;开始读取下一个非空白列&# 34;)并以@@尾随列指针结束,以保存输出后续观察的记录(有可能你的json将作为一个长流到达)。哦,我还为@'"driver_id": {'添加了一个空格 - 这是一个错字吗?

我省略了扫描部分,认为你控制了这一点!

答案 1 :(得分:1)

不是我最好的工作,但它会给你所追求的!需要重新组织列和一些dq,但您的JSON将看起来像SAS数据集。我建议你看一下python pandas包。使用pandas,您可以使用一行代码对其进行排序。

data parsed; 
infile "/your_path/test.json" lrecl=32500 truncover scanover dlm="," firstobs=2;
input   @'"driver_id"' _driver_id $255.
        @'"publication_site_id"' _publication_site_id $255.
        @'"confirmed_seat_count"' _confirmed_seat_count $255.
        @'"published_date"' _published_date $255.
        @'"trip_date"' _trip_date $255.
        @'"first_publication_date"' _first_publication_date $255.
        @'"profile_picture_added_date"' _profile_picture_added_date $255.;
array v{*} _:;
do i=1 to dim(v);
    v[i]=prxchange('s/"\d+"//',-1,v[i]);
    v[i]=compress(v[i],'{}"');
    v[i]=substr(v[i],3); 
end;
run;

proc transpose data=parsed out=t_parsed ;
var _:;
run;

data want(drop=i);
set t_parsed;
array split(6) $50.;
do i=1 to dim(split);
 split[i] = scan(col1,i,',');
end;
run;

proc sort data=want; by _name_;run;

proc transpose data=want out=want_t(drop=_name_);
var split:;
id _name_;
run;