我得到了一个项目,允许用户上载docx文件并从docx文件中提取单词:姓名,电子邮件,电话号码,地址,职业,关于您的信息等,并仅使用<span class=name ></span>
输入详细信息NodeJ和javascript
我尝试使用正则表达式,但是它使用类似于名称或数字的其他单词提取名称,数字等。
我如何使用正则表达式提取我需要的特定单词或句子,或者还有另一种方法?
app.post('/upload', upload.single('cv'),(req, res, next)=>{
var path =req.file.path
console.log(path)
mammoth.extractRawText({path: path})
.then(function(result){
var text = result.value;
var text = text.trim();
var text = text.replace(/^.+\n|^.+\r/g,"");
var text = text.replace(/\n|\r/g," ");
var text = text.replace(/\s+/g,' ').trim();// The raw text
var messages = result.messages;
console.log(text);
//Regex start
var name = text.match(/^([a-zA-Z0-9]+|[a-zA-Z0-9]+\s{1}[a-zA-Z0-9]{1,}|[a-zA-Z0-9]+\s{1}[a-zA-Z0-9]{3,}\s{1}[a-zA-Z0-9]{1,})/img);
var email = text.match(/.{1,}@[^.]{1,}/i);
var occupation = text.match(/ (Health and|Safety|Specialist|Pilot|Engineer|Nurse|Doctor|Pharmacist|Professional|Microbiologist|Business|Development|Public|Srategist|Communication|Expert|Technical|Operations|Management|Specialist|Administrative|Professional)/img)
var phone = text.match(/^[+]*[(]{0,1}[0-9]{1,4}[)]{0,1}[-\s\./0-9]*/img);
//var phone = matchAll(text, /^[+]*[(]{0,1}[0-9]{1,4}[)]{0,1}[-\s\./0-9]*$/).toArray()
console.log(occupation)
console.log(name)
console.log(phone)
console.log(email)
res.render('cv', {email:email, name:name, phone:phone, occupation:occupation });
})
.done(
);
});