我有一个Google Cloud功能,可以从云存储中读取多行文件,并将其作为带分隔符的单行文件写回云存储。
下面的代码适用于小文件,但如果文件很大(150 MB),则会停止状态:'连接错误'。
我使用了Documentation中提到的createReadStream
和createWriteStream
。
注意:我有用于测试的硬编码输入文件名
index.js
const fs = require('fs')
const readline = require('readline');
const byline = require('byline');
const storage = require('@google-cloud/storage')();
const stream = require('stream');
exports.processFiles = function(event, callback) {
const bucket = storage.bucket('src_bucket');
const targetbucket = storage.bucket('cf100');
const remoteFile = bucket.file('/files/mylog.log');
const outremoteFile = targetbucket.file('processed_log.log');
var line_no = 1;
var segmentsLineCheck = ["s1", "s2", "s3"];
var segments = [];
var gcsStream = remoteFile.createReadStream();
let remoteWriteStream = outremoteFile.createWriteStream({ resumable: false,
metadata : {
contentType : 'text/plain'
}
});
var lineStream = byline.createStream(gcsStream);
lineStream.on('data', function(line) {
transform(line.toString());
});
lineStream.on('error', (err) => {
console.log('lineStream Err'+err);
});
lineStream.on('finish', () => {
remoteWriteStream.end();
console.log('lineStream finished' );
});
var isSegmentStarted = false;
var segmentString = '';
var segmentFound = '';
function transform(line) {
//if line contains any of the segmentsLineCheck then add the line to segments array
//else append the line followed by $$
if (new RegExp(segmentsLineCheck.join("|")).test(line)) {
// At least one match in segmentsLineCheck
for(var i=0; i<segmentsLineCheck.length; i++) {
if(line.indexOf(segmentsLineCheck[i])!==-1) {
segmentFound = segmentsLineCheck[i];
segmentsLineCheck.splice(i, 1);
break;
}
}
if(!isSegmentStarted) {
console.log('segment forund - ' , segmentFound);
console.log('segments remaining - ' , segmentsLineCheck);
segmentString = ' $$ ' + line;
isSegmentStarted = true;
}
} else if(line.indexOf('[20')!==-1 && isSegmentStarted) {
remoteWriteStream.write(line+ ' $$ ');
isSegmentStarted = false;
if(segmentString!=='') {
segments[segments.length] = segmentString;
segmentString = '';
}
} else if(isSegmentStarted) {
segmentString += line;
} else if(!isSegmentStarted){
remoteWriteStream.write(line+ ' $$ ');
}
line_no++;
}
callback();
};
答案 0 :(得分:0)
这篇文章中的代码适用于小文件和大文件。我遇到的问题是由于文件不正确。我没有动过这篇文章,所以它可以帮助其他人。