我正在尝试在Big Query中加载嵌套的json数据。
以下是我正在使用的数据和架构..
scehma - >
[{"名称":"种类""类型":"串"},{"名称&#34 ;:"全名""类型":"串"},{"名称":"年龄",& #34;类型":"整数"},{"名称":" citiesLived""类型":"记录""字段":[{"名称":"地方""类型":"串&#34 ;},{"名称":" numberOfYears""类型":"整数"}]}]
数据 - >它存在于gc_data / load_data.json
中{" kind":" person"," fullName":" John Doe"," age": 22," citiesLived":[{" place":" Seattle"," numberOfYears":5},{" place&# 34;:"斯德哥尔摩"," numberOfYears":6}]} {"善良":"人"," fullName":" Jane Austen","年龄":24,& #34; citiesLived":[{" place":" Los Angeles"," numberOfYears":2},{" place" :" Tokyo"," numberOfYears":2}]}
try {
bigquery.datasets().insert(PROJECT_ID, dataset).execute();
} catch (IOException e) {
System.out.println(e);
}
// Set where you are importing from (i.e. the Google Cloud Storage paths).
List<String> sources = new ArrayList<String>();
sources.add("gs://gc_data/json_test_new_flat.json");
loadConfig.setSourceUris(sources);
loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
//loadConfig.setFieldDelimiter("\n");
// Describe the resulting table you are importing to:
TableReference tableRef = new TableReference();
tableRef.setDatasetId("myDataset");
tableRef.setTableId("myTableJSONNew");
tableRef.setProjectId(projectId);
loadConfig.setDestinationTable(tableRef);
List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
TableFieldSchema fieldKind = new TableFieldSchema();
fieldKind.setName("kind");
fieldKind.setType("STRING");
TableFieldSchema fieldFullName = new TableFieldSchema();
fieldFullName.setName("fullName");
fieldFullName.setType("STRING");
TableFieldSchema fieldAge = new TableFieldSchema();
fieldAge.setName("age");
fieldAge.setType("INTEGER");
TableFieldSchema fieldJSON = new TableFieldSchema();
fieldJSON.setName("citiesLived");
fieldJSON.setType("RECORD");
// this is for record
List<TableFieldSchema> listOfJSonSchema = new ArrayList<TableFieldSchema>();
TableFieldSchema fieldPlace = new TableFieldSchema();
fieldPlace.setName("place");
fieldPlace.setType("STRING");
TableFieldSchema fieldnumberOfYears = new TableFieldSchema();
fieldnumberOfYears.setName("numberOfYears");
fieldnumberOfYears.setType("INTEGER");
listOfJSonSchema.add(fieldPlace);
listOfJSonSchema.add(fieldnumberOfYears);
//
fieldJSON.setFields(listOfJSonSchema);
fields.add(fieldKind);
fields.add(fieldFullName);
fields.add(fieldAge);
fields.add(fieldJSON);
TableSchema schema = new TableSchema();
schema.setFields(fields); // This is to set delimiter
loadConfig.setSchema(schema);
Insert insert = bigquery.jobs().insert(projectId, job);
insert.setProjectId(projectId);
JobReference jobRef = insert.execute().getJobReference();
System.out.println(jobRef.toPrettyString());
答案 0 :(得分:1)
您已经定义了citiesLived记录,但看起来您没有指出记录是可重复的,这是您的示例数据所指示的。尝试在citiesLived字段上使用setMode(“REPEATED”)。