所以我有这个包含因子的jSon数据集,其中一些因子分为1-2个子级别的因子。当我尝试将列表提取为data.frame格式时,这会导致问题,因此我的问题是:
有没有办法在我的脚本中编写一个部分来识别一个因子的级别和子级别,并重命名它们,使它们各自成为可识别的但是各个因素。
示例:
library(rjson)
jSonData2 <- fromJSON(file="metadata.json")
df <- lapply(jSonData2, function(play){ data.frame(matrix(unlist(play), ncol=67, byrow=T))})
并且json文件采用以下格式:
[{
"md5sum": "23f66008b892aee401c8b63299db50c5",
"data_type": "Clinical Supplement",
"file_name": "nationwidechildrens.org_clinical.TCGA-AO-A0J7.xml",
"file_size": 100296,
"data_format": "BCR XML",
"submitter_id": null,
"access": "open",
"state": "live",
"file_id": "e1cbf391-eeba-4780-a344-9bf408f3628e",
"data_category": "Clinical",
"associated_entities": [
{
"entity_id": "9536bec0-dae0-4d63-a1b3-956741835243",
"case_id": "9536bec0-dae0-4d63-a1b3-956741835243",
"entity_submitter_id": "TCGA-AO-A0J7",
"entity_type": "case"
}
],
"cases": [
{
"diagnoses": [
{
"classification_of_tumor": "not reported",
"last_known_disease_status": "not reported",
"updated_datetime": "2017-03-04T16:44:35.784223-06:00",
"primary_diagnosis": "c50.9",
"submitter_id": "TCGA-AO-A0J7_diagnosis",
"tumor_stage": "stage iib",
"age_at_diagnosis": 26281.0,
"vital_status": "alive",
"morphology": "8523/3",
"days_to_death": null,
"days_to_last_known_disease_status": null,
"created_datetime": null,
"state": "live",
"days_to_recurrence": null,
"diagnosis_id": "27b1503c-ceca-56ce-a4b1-e38f5c28c31c",
"tumor_grade": "not reported",
"treatments": [
{
"updated_datetime": "2017-03-04T16:43:59.646072-06:00",
"created_datetime": null,
"therapeutic_agents": null,
"submitter_id": "TCGA-AO-A0J7_treatment",
"treatment_id": "c84e41de-78dc-5713-b67c-8d6b74dd086c",
"state": "live",
"days_to_treatment": null,
"treatment_intent_type": null,
"treatment_or_therapy": null
}
],
"tissue_or_organ_of_origin": "c50.9",
"days_to_birth": -26281.0,
"progression_or_recurrence": "not reported",
"prior_malignancy": "not reported",
"site_of_resection_or_biopsy": "c50.9",
"days_to_last_follow_up": 618.0
}
],
"disease_type": "Breast Invasive Carcinoma",
"updated_datetime": "2017-03-04T16:39:19.244769-06:00",
"created_datetime": null,
"demographic": {
"updated_datetime": "2017-03-04T16:37:28.862150-06:00",
"created_datetime": null,
"gender": "female",
"year_of_birth": 1938,
"submitter_id": "TCGA-AO-A0J7_demographic",
"state": "live",
"race": "white",
"demographic_id": "7e2f36a1-f8bd-5b08-b76a-1d389499998f",
"ethnicity": "not hispanic or latino",
"year_of_death": null
},
"submitter_id": "TCGA-AO-A0J7",
"project": {
"project_id": "TCGA-BRCA"
},
"state": "live",
"case_id": "9536bec0-dae0-4d63-a1b3-956741835243",
"primary_site": "Breast",
"exposures": [
{
"cigarettes_per_day": null,
"weight": null,
"updated_datetime": "2017-03-04T16:37:25.850486-06:00",
"alcohol_history": null,
"alcohol_intensity": null,
"bmi": null,
"years_smoked": null,
"submitter_id": "TCGA-AO-A0J7_exposure",
"created_datetime": null,
"state": "live",
"exposure_id": "0520198e-d60b-5491-a77e-75a7f2a62827",
"height": null
}
]
}
],
"archive": {
"archive_id": "21b29624-43f6-4100-8d84-38a04ed1277a",
"data_type": "TCGA DCC Archive",
"updated_datetime": "2017-03-04T16:43:58.641267-06:00",
"created_datetime": "2016-04-01T20:46:54.412047-05:00",
"file_name": "nationwidechildrens.org_BRCA.bio.Level_1.56.90.0.tar.gz",
"md5sum": "a0d358c22121eebd9b2b8c40a7121673",
"data_format": "TARGZ",
"submitter_id": "nationwidechildrens.org_BRCA.bio.Level_1.56",
"state": "live",
"data_category": "Archive",
"file_size": 1119400,
"file_state": "submitted",
"revision": 90
}
}
]
对于这种情况,我希望每个都重命名为:cases_diagnoses_classification_of_tumor等...
我显然可以逐个或逐节地执行此操作,但我很好奇是否有一种方法可以让R识别出这样的子级别的每次出现都应该重命名为:firstlevel_secondlevel_thirdlevel .. ......等等。
感谢您的帮助!我希望这对其他人也有帮助。