Google数据流模板UDF转换 - 如何格式化日期

时间:2018-03-03 16:01:19

标签: google-bigquery google-cloud-dataflow

我尝试使用Google数据流模板(云端存储文本到BigQuery)来加载数据,但未能在"插入Bigquery"部分。我在日志文件中收到以下错误。

"location" : "query",
"message" : "Invalid date: '9/11/2017' Field: Date; Value: 9/11/2017",
"reason" : "invalidQuery"

任何人都可以帮忙解释如何在输入CSV文件中正确格式化日期,或者如何将其转换为预期的格式?

以下是我在CSV中输入的行。 CSV具有单行,如下所示。没有标题。

123456,Jack,Jones,F,39,183,130,8,2501,990,9/11/2017

这是我的transform.js:

function transform(line) {
    var values = line.split(',');

    var obj = new Object();
    obj.Member_ID = values[0];
    obj.First_Name = values[1];
    obj.Last_Name = values[2];
    obj.Gender = values[3];
    obj.Age = values[4];
    obj.Height = values[5]; // todo - convert from inches to cm
    obj.Weight = values[6]; // todo - convert from pounds to kilos
    obj.Hours_Sleep = values[7];
    obj.Calories_Consumed = values[8];
    obj.Exercise_Calories_Burned = values[9];
    obj.Date = values[10];
    var jsonString = JSON.stringify(obj);

    return jsonString;
}

这是我的schema.json:

{
    "BigQuery Schema": [
      {
        "name": "Member_ID",
        "type": "INTEGER"
      },
      {
        "name": "First_Name",
        "type": "STRING"
      },
      {
        "name": "Last_Name",
        "type": "STRING"
      },
      {
        "name": "Gender",
        "type": "STRING"
      },
      {
        "name": "Age",
        "type": "INTEGER"
      },
      {
        "name": "Height",
        "type": "INTEGER"
      },
      {
        "name": "Weight",
        "type": "INTEGER"
      },
      {
        "name": "Hours_Sleep",
        "type": "INTEGER"
      },
      {
        "name": "Calories_Consumed",
        "type": "INTEGER"
      },
      {
        "name": "Exercise_Calories_Burned",
        "type": "INTEGER"
      },
      {
        "name": "Date",
        "type": "DATE"
      }
    ]
  }

完整错误堆栈:

(fc35da1cedcd900a): java.lang.RuntimeException: org.apache.beam.sdk.util.UserCodeException: java.lang.RuntimeException: Failed to create load job with id prefix c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000, reached max retries: 3, last failed load job: {
  "configuration" : {
    "load" : {
      "createDisposition" : "CREATE_IF_NEEDED",
      "destinationTable" : {
        "datasetId" : "nationalhealthclubfitnessdata",
        "projectId" : "nationalhealthclub-196411",
        "tableId" : "history"
      },
      "schema" : {
        "fields" : [ {
          "name" : "Member_ID",
          "type" : "INTEGER"
        }, {
          "name" : "First_Name",
          "type" : "STRING"
        }, {
          "name" : "Last_Name",
          "type" : "STRING"
        }, {
          "name" : "Gender",
          "type" : "STRING"
        }, {
          "name" : "Age",
          "type" : "INTEGER"
        }, {
          "name" : "Height",
          "type" : "INTEGER"
        }, {
          "name" : "Weight",
          "type" : "INTEGER"
        }, {
          "name" : "Hours_Sleep",
          "type" : "INTEGER"
        }, {
          "name" : "Calories_Consumed",
          "type" : "INTEGER"
        }, {
          "name" : "Exercise_Calories_Burned",
          "type" : "INTEGER"
        }, {
          "name" : "Date",
          "type" : "DATE"
        } ]
      },
      "sourceFormat" : "NEWLINE_DELIMITED_JSON",
      "sourceUris" : [ "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f" ],
      "writeDisposition" : "WRITE_TRUNCATE"
    }
  },
  "etag" : "\"OhENgf8ForUUnKbYWWdbr5aJHYs/zPfRJx4AGF6QkTv27FplQTraleU\"",
  "id" : "nationalhealthclub-196411:US.c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
  "jobReference" : {
    "jobId" : "c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
    "projectId" : "nationalhealthclub-196411",
    "location" : "US"
  },
  "kind" : "bigquery#job",
  "selfLink" : "https://www.googleapis.com/bigquery/v2/projects/nationalhealthclub-196411/jobs/c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2?location=US",
  "statistics" : {
    "creationTime" : "1520090987201",
    "endTime" : "1520090987651",
    "startTime" : "1520090987447"
  },
  "status" : {
    "errorResult" : {
      "location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
      "message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
      "reason" : "invalid"
    },
    "errors" : [ {
      "location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
      "message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
      "reason" : "invalid"
    }, {
      "location" : "query",
      "message" : "Invalid date: '9/11/17' Field: Date; Value: 9/11/17",
      "reason" : "invalidQuery"
    } ],
    "state" : "DONE"
  },
  "user_email" : "867773240827-compute@developer.gserviceaccount.com"
}.
    at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:182)
    at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner$1.outputWindowedValue(GroupAlsoByWindowFnRunner.java:104)
    at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowReshuffleFn.processElement(BatchGroupAlsoByWindowReshuffleFn.java:54)
    at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowReshuffleFn.processElement(BatchGroupAlsoByWindowReshuffleFn.java:37)
    at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.invokeProcessElement(GroupAlsoByWindowFnRunner.java:117)
    at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.processElement(GroupAlsoByWindowFnRunner.java:74)
    at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn.processElement(GroupAlsoByWindowsParDoFn.java:113)
    at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48)
    at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52)
    at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:187)
    at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:148)
    at com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:68)
    at com.google.cloud.dataflow.worker.DataflowWorker.executeWork(DataflowWorker.java:330)
    at com.google.cloud.dataflow.worker.DataflowWorker.doWork(DataflowWorker.java:302)
    at com.google.cloud.dataflow.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:251)
    at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:135)
    at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:115)
    at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:102)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.beam.sdk.util.UserCodeException: java.lang.RuntimeException: Failed to create load job with id prefix c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000, reached max retries: 3, last failed load job: {
  "configuration" : {
    "load" : {
      "createDisposition" : "CREATE_IF_NEEDED",
      "destinationTable" : {
        "datasetId" : "nationalhealthclubfitnessdata",
        "projectId" : "nationalhealthclub-196411",
        "tableId" : "history"
      },
      "schema" : {
        "fields" : [ {
          "name" : "Member_ID",
          "type" : "INTEGER"
        }, {
          "name" : "First_Name",
          "type" : "STRING"
        }, {
          "name" : "Last_Name",
          "type" : "STRING"
        }, {
          "name" : "Gender",
          "type" : "STRING"
        }, {
          "name" : "Age",
          "type" : "INTEGER"
        }, {
          "name" : "Height",
          "type" : "INTEGER"
        }, {
          "name" : "Weight",
          "type" : "INTEGER"
        }, {
          "name" : "Hours_Sleep",
          "type" : "INTEGER"
        }, {
          "name" : "Calories_Consumed",
          "type" : "INTEGER"
        }, {
          "name" : "Exercise_Calories_Burned",
          "type" : "INTEGER"
        }, {
          "name" : "Date",
          "type" : "DATE"
        } ]
      },
      "sourceFormat" : "NEWLINE_DELIMITED_JSON",
      "sourceUris" : [ "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f" ],
      "writeDisposition" : "WRITE_TRUNCATE"
    }
  },
  "etag" : "\"OhENgf8ForUUnKbYWWdbr5aJHYs/zPfRJx4AGF6QkTv27FplQTraleU\"",
  "id" : "nationalhealthclub-196411:US.c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
  "jobReference" : {
    "jobId" : "c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
    "projectId" : "nationalhealthclub-196411",
    "location" : "US"
  },
  "kind" : "bigquery#job",
  "selfLink" : "https://www.googleapis.com/bigquery/v2/projects/nationalhealthclub-196411/jobs/c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2?location=US",
  "statistics" : {
    "creationTime" : "1520090987201",
    "endTime" : "1520090987651",
    "startTime" : "1520090987447"
  },
  "status" : {
    "errorResult" : {
      "location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
      "message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
      "reason" : "invalid"
    },
    "errors" : [ {
      "location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
      "message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
      "reason" : "invalid"
    }, {
      "location" : "query",
      "message" : "Invalid date: '9/11/17' Field: Date; Value: 9/11/17",
      "reason" : "invalidQuery"
    } ],
    "state" : "DONE"
  },
  "user_email" : "867773240827-compute@developer.gserviceaccount.com"
}.
    at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:36)
    at org.apache.beam.sdk.io.gcp.bigquery.WriteTables$WriteTablesDoFn$DoFnInvoker.invokeProcessElement(Unknown Source)
    at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177)
    at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:138)
    at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324)
    at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48)
    at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52)
    at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(SimpleParDoFn.java:272)
    at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowedValue(SimpleDoFnRunner.java:211)
    at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(SimpleDoFnRunner.java:66)
    at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:436)
    at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:424)
    at org.apache.beam.runners.dataflow.ReshuffleOverrideFactory$ReshuffleWithOnlyTrigger$1.processElement(ReshuffleOverrideFactory.java:84)
    at org.apache.beam.runners.dataflow.ReshuffleOverrideFactory$ReshuffleWithOnlyTrigger$1$DoFnInvoker.invokeProcessElement(Unknown Source)
    at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177)
    at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:141)
    at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324)
    at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48)
    at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52)
    at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:180)
    ... 21 more
Caused by: java.lang.RuntimeException: Failed to create load job with id prefix c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000, reached max retries: 3, last failed load job: {
  "configuration" : {
    "load" : {
      "createDisposition" : "CREATE_IF_NEEDED",
      "destinationTable" : {
        "datasetId" : "nationalhealthclubfitnessdata",
        "projectId" : "nationalhealthclub-196411",
        "tableId" : "history"
      },
      "schema" : {
        "fields" : [ {
          "name" : "Member_ID",
          "type" : "INTEGER"
        }, {
          "name" : "First_Name",
          "type" : "STRING"
        }, {
          "name" : "Last_Name",
          "type" : "STRING"
        }, {
          "name" : "Gender",
          "type" : "STRING"
        }, {
          "name" : "Age",
          "type" : "INTEGER"
        }, {
          "name" : "Height",
          "type" : "INTEGER"
        }, {
          "name" : "Weight",
          "type" : "INTEGER"
        }, {
          "name" : "Hours_Sleep",
          "type" : "INTEGER"
        }, {
          "name" : "Calories_Consumed",
          "type" : "INTEGER"
        }, {
          "name" : "Exercise_Calories_Burned",
          "type" : "INTEGER"
        }, {
          "name" : "Date",
          "type" : "DATE"
        } ]
      },
      "sourceFormat" : "NEWLINE_DELIMITED_JSON",
      "sourceUris" : [ "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f" ],
      "writeDisposition" : "WRITE_TRUNCATE"
    }
  },
  "etag" : "\"OhENgf8ForUUnKbYWWdbr5aJHYs/zPfRJx4AGF6QkTv27FplQTraleU\"",
  "id" : "nationalhealthclub-196411:US.c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
  "jobReference" : {
    "jobId" : "c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2",
    "projectId" : "nationalhealthclub-196411",
    "location" : "US"
  },
  "kind" : "bigquery#job",
  "selfLink" : "https://www.googleapis.com/bigquery/v2/projects/nationalhealthclub-196411/jobs/c9d1ea08ae4d4a70b352c7be0f0e6a33_cf04bbeb51a7e102d8e5e34aaedbed62_00001_00000-2?location=US",
  "statistics" : {
    "creationTime" : "1520090987201",
    "endTime" : "1520090987651",
    "startTime" : "1520090987447"
  },
  "status" : {
    "errorResult" : {
      "location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
      "message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
      "reason" : "invalid"
    },
    "errors" : [ {
      "location" : "gs://nationalhealthclub/ingest/tmp/BigQueryWriteTemp/c9d1ea08ae4d4a70b352c7be0f0e6a33/31351f6c-b900-4ee0-9401-81ba3db3313f",
      "message" : "Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.",
      "reason" : "invalid"
    }, {
      "location" : "query",
      "message" : "Invalid date: '9/11/17' Field: Date; Value: 9/11/17",
      "reason" : "invalidQuery"
    } ],
    "state" : "DONE"
  },
  "user_email" : "867773240827-compute@developer.gserviceaccount.com"
}.
    at org.apache.beam.sdk.io.gcp.bigquery.WriteTables.load(WriteTables.java:269)
    at org.apache.beam.sdk.io.gcp.bigquery.WriteTables.access$600(WriteTables.java:77)
    at org.apache.beam.sdk.io.gcp.bigquery.WriteTables$WriteTablesDoFn.processElement(WriteTables.java:141)

1 个答案:

答案 0 :(得分:3)

BigQuery中的日期类型(标准SQL)具有以下格式:YYYY-MM-DD。

参考:https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date-type