我试图执行一个小管道:
我已经通过Dataprep创建了一个Dataflow作业,因为它在写入BigQuery表之前有很好的UI来完成我的所有转换(写入BigQuery工作正常),并且当文件上传到GCS桶。但是,Cloud Function不会触发Dataflow作业(我在Dataprep中写过)。
如果我可以获得有关数据流作业未触发的原因的任何指示,请查看我的云函数下面的示例代码。
/**
* Triggered from a message on a Cloud Storage bucket.
*
* @param {!Object} event The Cloud Functions event.
* @param {!Function} The callback function.
*/
exports.processFile = (event, callback) => {
console.log('Processing file: ' + event.data.name);
callback();
const google = require('googleapis');
exports.CF_GCStoDataFlow_v2 = function(event, callback) {
const file = event.data;
if (file.resourceState === 'exists' && file.name) {
google.auth.getApplicationDefault(function (err, authClient, projectId) {
if (err) {
throw err;
}
if (authClient.createScopedRequired && authClient.createScopedRequired()) {
authClient = authClient.createScoped([
'https://www.googleapis.com/auth/cloud-platform',
'https://www.googleapis.com/auth/userinfo.email'
]);
}
const dataflow = google.dataflow({ version: 'v1b3', auth: authClient });
dataflow.projects.templates.create({
projectId: projectId,
resource: {
parameters: {
inputFile: `gs://${file.bucket}/${file.name}`,
outputFile: `gs://${file.bucket}/${file.name}`
},
jobName: 'cloud-dataprep-csvtobq-v2-281345',
gcsPath: 'gs://mygcstest-pipeline-staging/temp/'
}
}, function(err, response) {
if (err) {
console.error("problem running dataflow template, error was: ", err);
}
console.log("Dataflow template response: ", response);
callback();
});
});
}
};
};

答案 0 :(得分:2)
这个代码片段可能有所帮助,它使用了不同的数据流api(启动)方法,它对我有用,请注意你需要指定模板的url并检查元数据文件(你可以在与通过dataprep接口执行的模板相同的目录)文件包含正确的参数
dataflow.projects.templates.launch({
projectId: projectId,
location: location,
gcsPath: jobTemplateUrl,
resource: {
parameters: {
inputLocations : `{"location1" :"gs://${file.bucket}/${file.name}"}`,
outputLocations: `{"location1" : "gs://${destination.bucket}/${destination.name}"}"}`,
},
environment: {
tempLocation: `gs://${destination.bucket}/${destination.tempFolder}`,
zone: "us-central1-f"
},
jobName: 'my-job-name',
}
}
答案 1 :(得分:1)
你有没有提交Dataproc工作?它开始运行了吗? 以下文档可以给出一些开始的想法!
https://cloud.google.com/dataproc/docs/concepts/jobs/life-of-a-job
答案 2 :(得分:1)
看起来你将CF_GCStoDataFlow_v2
放在processFile
里面,所以代码的Dataflow部分没有执行。
您的功能应如下所示:
/**
* Triggered from a message on a Cloud Storage bucket.
*
* @param {!Object} event The Cloud Functions event.
* @param {!Function} The callback function.
*/
exports.CF_GCStoDataFlow_v2 = (event, callback) => {
const google = require('googleapis');
if (file.resourceState === 'exists' && file.name) {
google.auth.getApplicationDefault(function (err, authClient, projectId) {
if (err) {
throw err;
}
if (authClient.createScopedRequired && authClient.createScopedRequired()) {
authClient = authClient.createScoped([
'https://www.googleapis.com/auth/cloud-platform',
'https://www.googleapis.com/auth/userinfo.email'
]);
}
const dataflow = google.dataflow({ version: 'v1b3', auth: authClient });
dataflow.projects.templates.create({
projectId: projectId,
resource: {
parameters: {
inputFile: `gs://${file.bucket}/${file.name}`,
outputFile: `gs://${file.bucket}/${file.name}`
},
jobName: '<JOB_NAME>',
gcsPath: '<BUCKET_NAME>'
}
}, function(err, response) {
if (err) {
console.error("problem running dataflow template, error was: ", err);
}
console.log("Dataflow template response: ", response);
callback();
});
});
}
callback();
};
确保将“要执行的功能”下的值更改为CF_GCStoDataFlow_v2