我正在尝试创建一个API来将pdf文件转换为JSON。解析pdf文件后得到的数据是逗号分隔值。我正在使用Nodejs pdf2json包。 解析pdf文件后得到的数据:
"val a: 1, val b: 2, val c, vald: 3, 4, 5, 6, val e: 7,"
期望的输出:
{
"val a" : 1,
"val b" : 2,
"val c" : "",
"vald" : "3,4,5,6",
"val e": 7
}
我正在使用的代码:
var express = require('express'),
app = express(),
upload = require('express-fileupload'),
http = require('http').Server(app).listen(8080),
PDFParser = require("pdf2json"),
fs = require('fs');
console.log("Server started on port 8080");
//pdf parser
let pdfParser = new PDFParser(this, 1);
//express file uploader
app.use(upload());
app.get("/", (req, res) => {
res.sendFile(__dirname + "/index.html");
})
app.post("/", (req, res) => {
if (req.files) {
var file = req.files.filename,
filename = file.name;
pdfParser.parseBuffer(file.data);
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError));
pdfParser.on("pdfParser_dataReady", pdfData => {
var output = JSON.stringify(pdfParser.getRawTextContent()).split(/\\r\\n/);
fs.writeFile('data2toJson.json', output, 'utf8');
console.log("hellow" + Date());
});
res.send("done");
}
})

<form method="post" enctype="multipart/form-data" action="/">
<input type="file" name="filename">
<input type="submit" value="upload">
</form>
&#13;
答案 0 :(得分:0)
我试图根据您的问题获得预期的输出。 尝试以下解决方案
希望这能让你前进:)
注意:您应该尝试使用优化的解决方案来执行相同的操作。
var input = "val a: 1, val b: 2, val c, vald: 3, 4, 5, 6, val e: 7,";
var keys = input.match(/(val([a-z0-9\s]+):)|(val([a-z0-9\s]+),)/g)
var output = {};
for (var i = 0; i < keys.length; i++) {
input = input.slice(keys[i].length);
if (i < keys.length - 1)
var val = input.substring(0, input.indexOf(keys[i + 1]));
else
var val = input;
input = input.slice(val.length);
val = val.trim();
if (val.lastIndexOf(",") == val.length - 1) {
val = val.substr(0, val.length - 1)
}
output[keys[i].replace(":", "").replace(",", "")] = val;
}
console.log(output)