使用正则表达式使用Node.js从50,000个提取的单词中提取特定的单词和句子

时间:2019-10-29 13:30:58

标签: javascript jquery node.js

我得到了一个项目,允许用户上载docx文件并从docx文件中提取单词:姓名,电子邮件,电话号码,地址,职业,关于您的信息等,并仅使用<span class=name ></span>输入详细信息NodeJ和javascript

我尝试使用正则表达式,但是它使用类似于名称或数字的其他单词提取名称,数字等。

我如何使用正则表达式提取我需要的特定单词或句子,或者还有另一种方法?

app.post('/upload', upload.single('cv'),(req, res, next)=>{
  var path =req.file.path
console.log(path)


mammoth.extractRawText({path: path})
.then(function(result){
     var text = result.value; 
     var text = text.trim();
     var text = text.replace(/^.+\n|^.+\r/g,"");
     var text = text.replace(/\n|\r/g," "); 
     var text = text.replace(/\s+/g,' ').trim();// The raw text
    var messages = result.messages;
    console.log(text);

//Regex start  


    var name = text.match(/^([a-zA-Z0-9]+|[a-zA-Z0-9]+\s{1}[a-zA-Z0-9]{1,}|[a-zA-Z0-9]+\s{1}[a-zA-Z0-9]{3,}\s{1}[a-zA-Z0-9]{1,})/img);
    var email = text.match(/.{1,}@[^.]{1,}/i);
    var occupation = text.match(/ (Health and|Safety|Specialist|Pilot|Engineer|Nurse|Doctor|Pharmacist|Professional|Microbiologist|Business|Development|Public|Srategist|Communication|Expert|Technical|Operations|Management|Specialist|Administrative|Professional)/img)
    var phone = text.match(/^[+]*[(]{0,1}[0-9]{1,4}[)]{0,1}[-\s\./0-9]*/img);

    //var phone = matchAll(text, /^[+]*[(]{0,1}[0-9]{1,4}[)]{0,1}[-\s\./0-9]*$/).toArray()
    console.log(occupation)
    console.log(name)
    console.log(phone)
    console.log(email)
    res.render('cv', {email:email, name:name, phone:phone, occupation:occupation });
})
.done(

    );

});

0 个答案:

没有答案