云功能已完成状态:'连接错误'对于大文件

时间:2018-04-08 16:27:20

标签: node.js google-cloud-storage google-cloud-functions

我有一个Google Cloud功能,可以从云存储中读取多行文件,并将其作为带分隔符的单行文件写回云存储。

下面的代码适用于小文件,但如果文件很大(150 MB),则会停止状态:'连接错误'。

我使用了Documentation中提到的createReadStreamcreateWriteStream

注意:我有用于测试的硬编码输入文件名

index.js

const fs = require('fs')
const readline = require('readline');
const byline = require('byline');
const storage = require('@google-cloud/storage')();
const stream = require('stream');

exports.processFiles = function(event, callback) {
const bucket = storage.bucket('src_bucket');
const targetbucket = storage.bucket('cf100');
const remoteFile = bucket.file('/files/mylog.log');


const outremoteFile = targetbucket.file('processed_log.log');

var line_no = 1;
var segmentsLineCheck = ["s1", "s2", "s3"];
var segments = [];

var  gcsStream = remoteFile.createReadStream();



let remoteWriteStream = outremoteFile.createWriteStream({ resumable: false,
     metadata : { 
        contentType : 'text/plain'  
     }
  }); 


var lineStream = byline.createStream(gcsStream);
lineStream.on('data', function(line) {
 transform(line.toString()); 
});

lineStream.on('error', (err) => {
            console.log('lineStream Err'+err);
        });

lineStream.on('finish', () => {
    remoteWriteStream.end();
            console.log('lineStream finished' );
        });


var isSegmentStarted = false;
var segmentString = '';
var segmentFound = '';

function transform(line) {

    //if line contains any of the segmentsLineCheck then add the line to segments array
    //else append the line followed by $$

    if (new RegExp(segmentsLineCheck.join("|")).test(line)) {
        // At least one match in segmentsLineCheck

        for(var i=0; i<segmentsLineCheck.length; i++) {
            if(line.indexOf(segmentsLineCheck[i])!==-1) {
                segmentFound = segmentsLineCheck[i];
                segmentsLineCheck.splice(i, 1);
                break;
            }
        }

        if(!isSegmentStarted) {
            console.log('segment forund - ' , segmentFound);
            console.log('segments remaining - ' , segmentsLineCheck);

            segmentString = ' $$ ' + line;
            isSegmentStarted = true;
        }
    } else if(line.indexOf('[20')!==-1 && isSegmentStarted) {
        remoteWriteStream.write(line+ ' $$ '); 
        isSegmentStarted = false;
        if(segmentString!=='') { 
            segments[segments.length] = segmentString;
            segmentString = ''; 
        }   
    } else if(isSegmentStarted) {
        segmentString += line;
    } else if(!isSegmentStarted){
        remoteWriteStream.write(line+ ' $$ ');
    }

    line_no++;

}
callback();
};

1 个答案:

答案 0 :(得分:0)

这篇文章中的代码适用于小文件和大文件。我遇到的问题是由于文件不正确。我没有动过这篇文章,所以它可以帮助其他人。