javascript承诺,这需要吗?

时间:2016-11-17 21:27:52

标签: javascript node.js promise pdfjs

我对如何使用promises感到有些困惑。我已经阅读了一些关于承诺的内容,主要是因为我似乎必须使用它们。我正在研究一个小应用程序,它应该使用pdfjs搜索一些pdf,并使用promises。我在nodejs中找到了一些东西,通过查看网络上的各种示例,但我遇到了一个问题。

让我们先看一下代码:

require('pdfjs-dist');
var fs = require('fs');

//var searchTerm = "course";
var searchTerm = "designee";
//var searchTerm = "document";
var wordCounter = 0;
var searchResultJSON = [];
//var data = new Uint8Array(fs.readFileSync('testPdf.pdf'));
//var data = new Uint8Array(fs.readFileSync('advanced-javascript.pdf'));
var data = new Uint8Array(fs.readFileSync('iss4.pdf'));
PDFJS.getDocument(data).then(function (pdfDocument) {
  console.log('Number of pages: ' + pdfDocument.numPages );
  //var div = document.getElementById('viewer');
  for(var i = 1; i<=pdfDocument.numPages; i++ ){//loops thru pages
      console.log("i is " + (i));
      pdfDocument.getPage((i)).then(function(page){//get page(i), 
         // console.log("page is " + (i));
          //console.log("inside getPage()");
          page.getTextContent().then( function(textContent){//get content of pdf
            //console.log("inside getTextContent()");  
            //if( null != textContent.items ){
                var page_text = "";
                var last_block = null;
                var lineWithResult = "";

                for( var k = 0; k < textContent.items.length; k++ ){
                    var block = textContent.items[k];
                    //console.log("word " + textContent.items.length + " k is " + k );
                    /* if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
                        if( block.x < last_block.x )
                            page_text += "\r\n"; 
                        else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
                            page_text += ' ';
                    } */

                    page_text += block.str;

                    last_block = block;
                    lineWithResult = searchPdf(block.str);
                    if(lineWithResult != null){
                        console.log(lineWithResult + " wordCounter is " + wordCounter);

                    }

                }//end of for(var k...)
                    //console.log(" page_text " + page_text);
                    //console.log(searchResultJSON);

            //}
          });//end of textContent.items

      });//end of getPage

  }//end of loop      
});
function searchPdf(toSearch){//searching pdf for searchTerm
    var result = toSearch.toLowerCase().indexOf(searchTerm);
    if(result >=0){//if match is found
        wordCounter++;
        //console.log("toSearch " + toSearch + " result is " + result + " wordCounter " + wordCounter);
        constructResult(toSearch, result);//build the result object
        return toSearch;
    }
    else{//if match not found
        return null;
    }

}
function constructResult(toSearch, result){//construct array of objects containing: search term, search result and index of search term
    searchResultJSON.push({
        "TextLine":toSearch,
        "SearchTerm":searchTerm,
        "Result": result,               
    });     
} 

此代码的目的是:

  • 循环浏览pdf的页面

  • 循环播放内容

  • 逐行变量获取pdf文本

  • 使用关键字

  • 搜索pdf内容
  • 如果关键字找到匹配项,请打印匹配

  • 在javascript对象中获取匹配

所以,一切正常但是你会注意到从第二个for循环内部(我得到pdf的文本)我调用一个函数searchPdf(),它基本上执行搜索并从在该函数中,我调用另一个函数constructResult(...),该函数应该用结果创建javascript对象。

我在打印这个对象时遇到了一些问题,因为如果我在for循环的范围外打印它,它是空的,因为在循环实际复制和分析之前执行了print调用(在我的情况下是console.log) (阅读过程并找到匹配)文本。所以,承诺看到了解决问题的方法。事实是,我不知道如何以这样的方式编写代码,以便我可以链接承诺并在所有内容执行后打印我的对象。有什么想法吗?

编辑: 所以要澄清一下,我需要的是: 1)循环遍历pdf(我将不得不修改代码以在不久的某个时候循环遍历pdf集合) 2)获取每行文字 3)检查是否匹配 4)如果是这样,复制javascript对象中的文本行 5)打印javascript对象

1 个答案:

答案 0 :(得分:1)

尝试这样的事情:

function search(doc, s) {
    var allPages = [],
        i;

    for (var i = 1; i <= doc.numPages; i++) {
        allPages.push(doc.getPage(i));
    }

    // Promise.all returns a promise that resolves once 
    // each promise inside allPages has been resolved
    return Promise.all(allPages)
    // pages now contains an array of pages, loop over them
    // using map, return the promise to get the content for each page
    // return it through Promise.all so we can run the next phase
    // once the text is resolved for each page
    .then(pages => Promise.all(pages.map(p => p.getTextContent())))
    // We now have an array of contents for each page, filter based
    // on the passed string
    .then(content => content.filter(c => c.indexOf(s) > -1));
}

// This is a mock of the pdf API used in your question
var pdfDocument = {
    numPages: 3,
    getPage: function(i) {
        return Promise.resolve({
            getTextContent: function() {
                return Promise.resolve('Page ' + i);
            }
        });
    }
}