在PHP中从图像中读取文本

时间:2013-11-01 07:03:45

标签: php

我使用OCR来读取我的图像文本,但是它给出了找不到PHPUnit_Framework_TestCase类的错误,我为这些尝试了很多。使用其他不同的代码实现但没有输出。有没有人有一个工作的例子,这是PHP网站。如果是这样,请帮助我。我一直在为这个问题寻找解决方案。我用了这段代码:

  require_once dirname(__FILE__).'/../tesseract_ocr/tesseract_ocr.php';

  class TesseractOCRTest extends PHPUnit_Framework_TestCase {


  var $imagesPath;

   function setUp() {
     $path = getenv('PATH');
     putenv("PATH=$path:/usr/local/bin");
     $this->imagesPath = dirname(__FILE__).'/images';
  }

  function testTextRecognition() {
    $images = array(
    'denver-marathon_bibs.jpg' => '253',
      'image1.jpg' => 'Hello, Tesseract!',
      'image2.gif' => 'Works with a GIF image',
      'image3.png' => 'A PNG? Recognizes too!'
    );
    foreach($images as $path => $text){
     $this->assertEquals(
        TesseractOCR::recognize("\"{$this->imagesPath}/$path\""),
        $text
     );
    }
  }

  function testInducingRecognition() {
   $this->assertEquals(
     TesseractOCR::recognize("\"{$this->imagesPath}/617.jpg\"", range('A','Z')),
     'GIT'
    );
   $this->assertEquals(
     TesseractOCR::recognize("\"{$this->imagesPath}/gotz.jpg\"", range(0,9)),
     '6072'
    );
  }
 }

和tesseract_ocr.php文件如下:

<?php
ob_start();


 class TesseractOCR {

  function recognize($originalImage) {
    echo $originalImage;
     $tifImage       = TesseractOCR::convertImageToTif($originalImage);
     $configFile     = TesseractOCR::generateConfigFile(func_get_args());
     $outputFile     = TesseractOCR::executeTesseract($tifImage, $configFile);
     $recognizedText = TesseractOCR::readOutputFile($outputFile);
     TesseractOCR::removeTempFiles($tifImage, $outputFile, $configFile);
     return $recognizedText;
   }

   function convertImageToTif($originalImage) {
   $tifImage = sys_get_temp_dir().'tesseract-ocr-tif-'.rand().'.tif';
   exec("convert -colorspace gray +matte $originalImage $tifImage");
   return $tifImage;
 }

  function generateConfigFile($arguments) {
$configFile = sys_get_temp_dir().'tesseract-ocr-config-'.rand().'.conf';
exec("touch $configFile");
$whitelist = TesseractOCR::generateWhitelist($arguments);
if(!empty($whitelist)) {
  $fp = fopen($configFile, 'w');
  fwrite($fp, "tessedit_char_whitelist $whitelist");
  fclose($fp);
}
return $configFile;
 }

  function generateWhitelist($arguments) {
array_shift($arguments); //first element is the image path
$whitelist = '';
foreach($arguments as $chars) $whitelist.= join('', (array)$chars);
return $whitelist;
 }

  function executeTesseract($tifImage, $configFile) {
  echo sys_get_temp_dir();
$outputFile = sys_get_temp_dir().'tesseract-ocr-output-'.rand();
exec("tesseract $tifImage $outputFile nobatch $configFile 2> /dev/null");
return $outputFile.'.txt'; //tesseract appends txt extension to output file
  }

  function readOutputFile($outputFile) {
 return trim(file_get_contents($outputFile));
  }

  function removeTempFiles() { array_map("unlink", func_get_args()); }
}
?>

0 个答案:

没有答案