我想将csv文件中的数据写入BigQuery表。
我为此构建了以下数据管道: 云存储中的csv文件< => GCS Text to PubSub DataFlow< =>云功能< => BigQuery表。
云功能配置为在PubSub主题中新消息到达时触发。 由于在插入BigQuery之前从PubSub JSON消息中提取的数据不是正确的JSON格式,我的函数执行会一直失败。我曾试图修改csv文件中的数据格式,但是它一直在不断失败,我不知道这是否可能。具有NDJSON源文件的相同管道就像魅力一样,并在BQ中写入。
1)这是我的表(11个字段):
Member_ID INTEGER First_Name STRING
Last_Name STRING
性别STRING
年龄INTEGER
高度INTEGER
重量整数
Hours_Sleep INTEGER
Calories_Consumed INTEGER
Exercise_Calories_Burned INTEGER
日期日期
2)这是我的功能:
/**
* Background Cloud Function to be triggered by PubSub.
*
* @param {object} event The Cloud Functions event.
* @param {function} callback The callback function.
*/
exports.subscribe = function (event, callback) {
const BigQuery = require('@google-cloud/bigquery');
const projectId = "iot-fitness-198120"; //Enter your project ID here
const datasetId = "CDCData"; //Enter your BigQuery dataset name here
const tableId = "fitness"; //Enter your BigQuery table name here
const PubSubMessage = event.data;
console.log(`Le Message PubSub en base64 est: ${PubSubMessage}`);
// Incoming data is in JSON format
const incomingData = PubSubMessage.data ? Buffer.from(PubSubMessage.data, 'base64').toString() : "{'Member_ID':'na','First_Name':'na','Last_Name':'na','Gender':'na','Age':'na','Height':'na','Weight':'na','Hours_Sleep':'na','Calories_Consumed':'na','Exercise_Calories_Burned':'na','Date':'na'}";
console.log(`Le Message PubSub en String est: ${incomingData}`);
const jsonData = JSON.parse(incomingData);
console.log(`Le Message PubSub parse en JSON est: ${jsonData}`);
var rows = [jsonData];
console.log(`Incoming data: ${rows}`);
// Instantiates a client
const bigquery = BigQuery({
projectId: projectId
});
console.log(`BigQuery Client instantiated`);
// Inserts data into a table
bigquery
.dataset(datasetId)
.table(tableId)
.insert(rows)
.then((insertErrors) => {
console.log('Inserted:');
rows.forEach((row) => console.log(row));
if (insertErrors && insertErrors.length > 0) {
console.log('Insert errors:');
insertErrors.forEach((err) => console.error(err));
}
})
.catch((err) => {
console.error('ERROR:', err);
});
// [END bigquery_insert_stream]
callback();
};
3)最后,下面是尝试导入以下简单csv文件时的主要错误消息:123456,Jack,Jones,F,39,183,130,8,2501,990,2017-11-09。
2018-03-19 22:44:40.585 GST
function-fitnessPubSubToBQ
58216840933966
SyntaxError: Unexpected token , in JSON at position 6 at Object.parse (native) at exports.subscribe (/user_code/index.js:17:25) at /var/tmp/worker/worker.js:695:16 at /var/tmp/worker/worker.js:660:9 at _combinedTickCallback (internal/process/next_tick.js:73:7) at process._tickDomainCallback (internal/process/next_tick.js:128:9)
Expand all | Collapse all {
insertId: "000000-2437f842-1bab-47c3-9583-b91ffd0cc601"
labels: {…}
logName: "projects/iot-fitness-198120/logs/cloudfunctions.googleapis.com%2Fcloud-functions"
receiveTimestamp: "2018-03-19T18:44:45.895841517Z"
resource: {…}
severity: "ERROR"
textPayload: "SyntaxError: Unexpected token , in JSON at position 6
at Object.parse (native)
at exports.subscribe (/user_code/index.js:17:25)
at /var/tmp/worker/worker.js:695:16
at /var/tmp/worker/worker.js:660:9
at _combinedTickCallback (internal/process/next_tick.js:73:7)
at process._tickDomainCallback (internal/process/next_tick.js:128:9)"
timestamp: "2018-03-19T18:44:40.585Z"
}
它抱怨数据格式并执行“JSON.parse(incomingData);” 如何在JSON解析之前获取要格式化的数据?
感谢。