我尝试使用Google数据流模板(云端存储文本到BigQuery)来加载数据,但未能在"插入Bigquery"部分。我在日志文件中收到以下错误。
"location" : "query",
"message" : "Invalid date: '9/11/2017' Field: Date; Value: 9/11/2017",
"reason" : "invalidQuery"
任何人都可以帮忙解释如何在输入CSV文件中正确格式化日期,或者如何将其转换为预期的格式?
以下是我在CSV中输入的行。 CSV具有单行,如下所示。没有标题。
123456,Jack,Jones,F,39,183,130,8,2501,990,9/11/2017
这是我的transform.js:
function transform(line) {
var values = line.split(',');
var obj = new Object();
obj.Member_ID = values[0];
obj.First_Name = values[1];
obj.Last_Name = values[2];
obj.Gender = values[3];
obj.Age = values[4];
obj.Height = values[5]; // todo - convert from inches to cm
obj.Weight = values[6]; // todo - convert from pounds to kilos
obj.Hours_Sleep = values[7];
obj.Calories_Consumed = values[8];
obj.Exercise_Calories_Burned = values[9];
obj.Date = values[10];
var jsonString = JSON.stringify(obj);
return jsonString;
}
这是我的schema.json:
{
"BigQuery Schema": [
{
"name": "Member_ID",
"type": "INTEGER"
},
{
"name": "First_Name",
"type": "STRING"
},
{
"name": "Last_Name",
"type": "STRING"
},
{
"name": "Gender",
"type": "STRING"
},
{
"name": "Age",
"type": "INTEGER"
},
{
"name": "Height",
"type": "INTEGER"
},
{
"name": "Weight",
"type": "INTEGER"
},
{
"name": "Hours_Sleep",
"type": "INTEGER"
},
{
"name": "Calories_Consumed",
"type": "INTEGER"
},
{
"name": "Exercise_Calories_Burned",
"type": "INTEGER"
},
{
"name": "Date",
"type": "DATE"
}
]
}
完整错误堆栈:
(fc35da1cedcd900a): java.lang.RuntimeException: org.apache.beam.sdk.util.UserCodeException: java.lang.RuntimeException: Failed to create load job with id prefix c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000, reached max retries: 3, last failed load job: {
"configuration" : {
"load" : {
"createDisposition" : "CREATE_IF_NEEDED",
"destinationTable" : {
"datasetId" : "nationalhealthclubfitnessdata",
"projectId" : "nationalhealthclub-196411",
"tableId" : "history"
},
"schema" : {
"fields" : [ {
"name" : "Member_ID",
"type" : "INTEGER"
}, {
"name" : "First_Name",
"type" : "STRING"
}, {
"name" : "Last_Name",
"type" : "STRING"
}, {
"name" : "Gender",
"type" : "STRING"
}, {
"name" : "Age",
"type" : "INTEGER"
}, {
"name" : "Height",
"type" : "INTEGER"
}, {
"name" : "Weight",
"type" : "INTEGER"
}, {
"name" : "Hours_Sleep",
"type" : "INTEGER"
}, {
"name" : "Calories_Consumed",
"type" : "INTEGER"
}, {
"name" : "Exercise_Calories_Burned",
"type" : "INTEGER"
}, {
"name" : "Date",
"type" : "DATE"
} ]
},
"sourceFormat" : "NEWLINE_DELIMITED_JSON",
"sourceUris" : [ "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f" ],
"writeDisposition" : "WRITE_TRUNCATE"
}
},
"etag" : "\"OhENgf8ForUUnKbYWWdbr5aJHYs/zPfRJx4AGF6QkTv27FplQTraleU\"",
"id" : "nationalhealthclub-196411:US.c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
"jobReference" : {
"jobId" : "c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
"projectId" : "nationalhealthclub-196411",
"location" : "US"
},
"kind" : "bigquery#job",
"selfLink" : "https://www.googleapis.com/bigquery/v2/projects/nationalhealthclub-196411/jobs/c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2?location=US",
"statistics" : {
"creationTime" : "1520090987201",
"endTime" : "1520090987651",
"startTime" : "1520090987447"
},
"status" : {
"errorResult" : {
"location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
"message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
"reason" : "invalid"
},
"errors" : [ {
"location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
"message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
"reason" : "invalid"
}, {
"location" : "query",
"message" : "Invalid date: '9/11/17' Field: Date; Value: 9/11/17",
"reason" : "invalidQuery"
} ],
"state" : "DONE"
},
"user_email" : "867773240827-compute@developer.gserviceaccount.com"
}.
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:182)
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner$1.outputWindowedValue(GroupAlsoByWindowFnRunner.java:104)
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowReshuffleFn.processElement(BatchGroupAlsoByWindowReshuffleFn.java:54)
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowReshuffleFn.processElement(BatchGroupAlsoByWindowReshuffleFn.java:37)
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.invokeProcessElement(GroupAlsoByWindowFnRunner.java:117)
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.processElement(GroupAlsoByWindowFnRunner.java:74)
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn.processElement(GroupAlsoByWindowsParDoFn.java:113)
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48)
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52)
at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:187)
at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:148)
at com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:68)
at com.google.cloud.dataflow.worker.DataflowWorker.executeWork(DataflowWorker.java:330)
at com.google.cloud.dataflow.worker.DataflowWorker.doWork(DataflowWorker.java:302)
at com.google.cloud.dataflow.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:251)
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:135)
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:115)
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:102)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.beam.sdk.util.UserCodeException: java.lang.RuntimeException: Failed to create load job with id prefix c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000, reached max retries: 3, last failed load job: {
"configuration" : {
"load" : {
"createDisposition" : "CREATE_IF_NEEDED",
"destinationTable" : {
"datasetId" : "nationalhealthclubfitnessdata",
"projectId" : "nationalhealthclub-196411",
"tableId" : "history"
},
"schema" : {
"fields" : [ {
"name" : "Member_ID",
"type" : "INTEGER"
}, {
"name" : "First_Name",
"type" : "STRING"
}, {
"name" : "Last_Name",
"type" : "STRING"
}, {
"name" : "Gender",
"type" : "STRING"
}, {
"name" : "Age",
"type" : "INTEGER"
}, {
"name" : "Height",
"type" : "INTEGER"
}, {
"name" : "Weight",
"type" : "INTEGER"
}, {
"name" : "Hours_Sleep",
"type" : "INTEGER"
}, {
"name" : "Calories_Consumed",
"type" : "INTEGER"
}, {
"name" : "Exercise_Calories_Burned",
"type" : "INTEGER"
}, {
"name" : "Date",
"type" : "DATE"
} ]
},
"sourceFormat" : "NEWLINE_DELIMITED_JSON",
"sourceUris" : [ "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f" ],
"writeDisposition" : "WRITE_TRUNCATE"
}
},
"etag" : "\"OhENgf8ForUUnKbYWWdbr5aJHYs/zPfRJx4AGF6QkTv27FplQTraleU\"",
"id" : "nationalhealthclub-196411:US.c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
"jobReference" : {
"jobId" : "c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
"projectId" : "nationalhealthclub-196411",
"location" : "US"
},
"kind" : "bigquery#job",
"selfLink" : "https://www.googleapis.com/bigquery/v2/projects/nationalhealthclub-196411/jobs/c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2?location=US",
"statistics" : {
"creationTime" : "1520090987201",
"endTime" : "1520090987651",
"startTime" : "1520090987447"
},
"status" : {
"errorResult" : {
"location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
"message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
"reason" : "invalid"
},
"errors" : [ {
"location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
"message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
"reason" : "invalid"
}, {
"location" : "query",
"message" : "Invalid date: '9/11/17' Field: Date; Value: 9/11/17",
"reason" : "invalidQuery"
} ],
"state" : "DONE"
},
"user_email" : "867773240827-compute@developer.gserviceaccount.com"
}.
at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:36)
at org.apache.beam.sdk.io.gcp.bigquery.WriteTables$WriteTablesDoFn$DoFnInvoker.invokeProcessElement(Unknown Source)
at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177)
at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:138)
at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324)
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48)
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52)
at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(SimpleParDoFn.java:272)
at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowedValue(SimpleDoFnRunner.java:211)
at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(SimpleDoFnRunner.java:66)
at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:436)
at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:424)
at org.apache.beam.runners.dataflow.ReshuffleOverrideFactory$ReshuffleWithOnlyTrigger$1.processElement(ReshuffleOverrideFactory.java:84)
at org.apache.beam.runners.dataflow.ReshuffleOverrideFactory$ReshuffleWithOnlyTrigger$1$DoFnInvoker.invokeProcessElement(Unknown Source)
at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177)
at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:141)
at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324)
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48)
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52)
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:180)
... 21 more
Caused by: java.lang.RuntimeException: Failed to create load job with id prefix c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000, reached max retries: 3, last failed load job: {
"configuration" : {
"load" : {
"createDisposition" : "CREATE_IF_NEEDED",
"destinationTable" : {
"datasetId" : "nationalhealthclubfitnessdata",
"projectId" : "nationalhealthclub-196411",
"tableId" : "history"
},
"schema" : {
"fields" : [ {
"name" : "Member_ID",
"type" : "INTEGER"
}, {
"name" : "First_Name",
"type" : "STRING"
}, {
"name" : "Last_Name",
"type" : "STRING"
}, {
"name" : "Gender",
"type" : "STRING"
}, {
"name" : "Age",
"type" : "INTEGER"
}, {
"name" : "Height",
"type" : "INTEGER"
}, {
"name" : "Weight",
"type" : "INTEGER"
}, {
"name" : "Hours_Sleep",
"type" : "INTEGER"
}, {
"name" : "Calories_Consumed",
"type" : "INTEGER"
}, {
"name" : "Exercise_Calories_Burned",
"type" : "INTEGER"
}, {
"name" : "Date",
"type" : "DATE"
} ]
},
"sourceFormat" : "NEWLINE_DELIMITED_JSON",
"sourceUris" : [ "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f" ],
"writeDisposition" : "WRITE_TRUNCATE"
}
},
"etag" : "\"OhENgf8ForUUnKbYWWdbr5aJHYs/zPfRJx4AGF6QkTv27FplQTraleU\"",
"id" : "nationalhealthclub-196411:US.c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
"jobReference" : {
"jobId" : "c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
"projectId" : "nationalhealthclub-196411",
"location" : "US"
},
"kind" : "bigquery#job",
"selfLink" : "https://www.googleapis.com/bigquery/v2/projects/nationalhealthclub-196411/jobs/c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2?location=US",
"statistics" : {
"creationTime" : "1520090987201",
"endTime" : "1520090987651",
"startTime" : "1520090987447"
},
"status" : {
"errorResult" : {
"location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
"message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
"reason" : "invalid"
},
"errors" : [ {
"location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
"message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
"reason" : "invalid"
}, {
"location" : "query",
"message" : "Invalid date: '9/11/17' Field: Date; Value: 9/11/17",
"reason" : "invalidQuery"
} ],
"state" : "DONE"
},
"user_email" : "867773240827-compute@developer.gserviceaccount.com"
}.
at org.apache.beam.sdk.io.gcp.bigquery.WriteTables.load(WriteTables.java:269)
at org.apache.beam.sdk.io.gcp.bigquery.WriteTables.access$600(WriteTables.java:77)
at org.apache.beam.sdk.io.gcp.bigquery.WriteTables$WriteTablesDoFn.processElement(WriteTables.java:141)
答案 0 :(得分:3)
BigQuery中的日期类型(标准SQL)具有以下格式:YYYY-MM-DD。
参考:https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date-type