window.onload = function() {
var myResume = document.getElementById('myResume');
var displayResume = document.getElementById('displayResume');
myResume.addEventListener('change', function(e) {
var resume = myResume.files[0];
var textType = /text.*/;
var imageType = /image.*/;
if (resume.type.match(textType)) {
var reader = new FileReader();
reader.onload = function(e) {
displayResume.innerText = reader.result;
}
reader.readAsText(resume);
}
else if (resume.type.match(imageType)) {
var reader = new FileReader();
reader.onload = function(e) {
displayResume.innerHTML = "";
var img = new Image();
img.src = reader.result;
var string = OCRAD(img);
alert(string);
}
reader.readAsDataURL(resume);
}
else if (myResume.files[0].type === 'application/pdf') {
var reader = new FileReader();
reader.onload = function(e) {
displayResume.innerHTML = "";
var img = new Image();
img.src = reader.result;
Tesseract.recognize(img)
.progress(function (p) { console.log('progress', p) })
.then(function (result) { console.log('result', result) })
}
reader.readAsDataURL(resume);
}
else if (myResume.files[0].type === 'application/msword') {
var reader = new FileReader();
reader.onload = function(e) {
displayResume.innerHTML = "";
var img = new Image();
img.src = reader.result;
}
reader.readAsDataURL(resume);
}
else
displayResume.innerText = "Media type couldn't recognized.";
});
}
$("#submitBTN").click(function() {
if ( (myResume.files.length == 0) && (myCover.files.length == 0) )
alert ("No files uploaded.");
});
.centralize {
margin-left: auto;
margin-right: auto;
padding-top: 250px;
}
#displayResume {
margin-top: 2em;
width: 100%;
overflow-x: auto;
}
.header-title {
float: none !important;
color: #FFF !important;
text-align: center;
width: 100%;
}
.navbar-header {
width: 100%;
text-align: center;
padding-top: 25px;
padding-bottom: 25px;
}
<!doctype html>
<html ng-app>
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="text/html">
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<meta name="robots" content="all,follow">
<title>Arete Human Resources</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
<link href="css/ionic.min.css" rel="stylesheet"/>
<link href="css/style.css" rel="stylesheet">
<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.2.23/angular.min.js"></script>
</head>
<body>
<nav class="navbar navbar-inverse navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<a class="navbar-brand header-title">Arete Human Resources</a>
</div>
</div>
</nav>
<div class="container centralize">
<div class="row">
<div class="col-xs-5 col-sm-5 col-md-5 col-lg-5">
<input type="file" id="myResume" accept=".txt,.doc,.docx,.pdf,.jpg">
</div>
<div class="col-xs-4 col-sm-4 col-md-4 col-lg-4">
<input type="text" ng-model="fileName" placeholder="Your name here">
</div>
<div class="col-xs-3 col-sm-3 col-md-3 col-lg-3">
<h4>{{fileName}}</h4>
</div>
</div>
<div class="row">
<div class="col-xs-5 col-sm-5 col-md-5 col-lg-5">
<input type="file" id="myCover" accept=".txt,.doc,.docx,.pdf,.jpg">
</div>
<div class="col-xs-4 col-sm-4 col-md-4 col-lg-4">
<input type="text" ng-model="fileDesc" placeholder="Explanations">
</div>
<div class="col-xs-3 col-sm-3 col-md-3 col-lg-3">
<h5>{{fileDesc}}</h5>
</div>
</div>
<hr>
<button type="submit" class="button button-outline button-positive" id="submitBTN" onclick="" style="float:right">Submit</button>
<div id="displayResume"></div>
</div>
<script src="http://code.jquery.com/jquery-latest.min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<script src="js/ionic.min.js" type="text/javascript"></script>
<script src="js/ionic.bundle.min.js" type="text/javascript"></script>
<script src="js/ionic-angular.min.js" type="text/javascript"></script>
<script src="js/ocrad.js" type="text/javascript"></script>
<!--<script src="js/require.js" type="text/javascript"></script>-->
<script src='https://cdn.rawgit.com/naptha/tesseract.js/1.0.10/dist/tesseract.js'></script>
<script src="js/main.js" type="text/javascript"></script>
</body>
</html>
我正在尝试从用户那里获取输入文件并尝试将文件更改为文本,然后发送任何用户发送的输入文件。我认为主要输入是文本,pdf,docx甚至图像。
我为image
找到ocrad并且它完美无缺,因此pdf/docx
即使我尝试使用pdfreader,pdf-to-text,它也无法以某种方式工作, composer,pdf.js,docxtemplater,study.js以及其他许多人。我试图像往常一样在node
的终端上添加库,甚至我调试了,我找不到办法。这是codePen,下面是片段,如果有人批准现有的图书馆工作,我会很高兴。
答案 0 :(得分:1)
很抱歉,如果文档中没有明确说明,但pdfreader是在node.js上运行,而不是在Web浏览器中运行。因此require
问题。
以下是如何将您的PDF简历转换为文本,来自Node.js:
var pdfreader = require('pdfreader');
var rows = {}; // indexed by y-position
function printRows() {
Object.keys(rows) // => array of y-positions (type: float)
.sort((y1, y2) => parseFloat(y1) - parseFloat(y2)) // sort float positions
.forEach((y) => console.log((rows[y] || []).join('')));
}
new pdfreader.PdfReader().parseFileItems('CV_ErhanYasar.pdf', function(err, item){
if (!item || item.page) {
// end of file, or page
printRows();
console.log('PAGE:', item.page);
rows = {}; // clear rows for next page
}
else if (item.text) {
// accumulate text items into rows object, per line
(rows[item.y] = rows[item.y] || []).push(item.text);
}
});
=&GT;然后,这就是你要得到的输出: