OpenCV Java文本分段

时间:2017-05-13 17:42:46

标签: java c++ opencv tesseract


尽管有明显可见的字符,但这个失败了: this one fails despite the clearly visible characters

即使6清晰可见,也无法检测到Z和6: Z and 6 fail to be detected even when 6 is clearly visible


originalFrame = image.clone();
    roiColor = image.clone();
    Imgproc.cvtColor(image, image, Imgproc.COLOR_BGR2GRAY, 0);
    originalFrameGrayScale = image.clone();
    Mat morph = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(9, 9));
    Imgproc.morphologyEx(image, image, Imgproc.MORPH_TOPHAT, morph);
    Imgproc.Sobel(image, image, -1, 2, 0);
    Imgproc.GaussianBlur(image, image, new Size(5,5), 3,3);
    Imgproc.morphologyEx(image, image, Imgproc.MORPH_CLOSE, morph);
    Imgproc.threshold(image, image, 200, 255, Imgproc.THRESH_OTSU);
    Vector<Rect> rectangles = detectionContour(image);
    Mat roi = originalFrameGrayScale.clone();
    roi = originalFrameGrayScale.submat(rectangles.get(0));
    roiBlack = roi.clone();
    roiColor = roiColor.submat(rectangles.get(0));
    Imgproc.rectangle(originalFrame, rectangles.get(0).br(),    rectangles.get(0).tl(), new Scalar(0,0,255), 2);

    Imgproc.medianBlur(roi, roi, 3); 
   Imgproc.adaptiveThreshold(roi, roi, 225, Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C, Imgproc.THRESH_BINARY, 15, 3);
   roiBinarize = roi.clone();
   Mat erode = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1, 1));
   Mat dilate = Imgproc.getStructuringElement(Imgproc.MORPH_RECT,new Size(1, 1));
   Imgproc.morphologyEx(roi, roi, Imgproc.MORPH_OPEN, dilate);
   Imgproc.morphologyEx(roi, roi, Imgproc.MORPH_OPEN, erode);
   Imgproc.Canny(roi, roi, 150, 150 * 3, 3, true);
   Vector<Rect> letters = detectionPlateCharacterContour(roi);
   doTesseractOCR(letters, roiBinarize);

    private static void doTesseractOCR(Vector<Rect> letters, Mat plate){
    Tesseract instance = new Tesseract(); //
    String resultPlate = "";
    for(int i= 0; i < letters.size(); i++){

     BufferedImage letter = OpenCvUtils.Mat2bufferedImage(plate.submat(letters.get(i)));
        try {
        String result = instance.doOCR(letter);
        resultPlate += result + " position "+i;

        } catch (TesseractException e) {
        System.out.println("Tesseract output: "+resultPlate);

    private static Vector<Rect> detectionPlateCharacterContour(Mat roi) {
    Mat contHierarchy = new Mat();
    Mat imageMat = roi.clone();
    Rect rect = null;
    List<MatOfPoint> contours = new ArrayList<>();
    Imgproc.findContours(imageMat, contours, contHierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_NONE);
    Vector<Rect> rect_array = new Vector<>();

    for (int i = 0; i < contours.size(); i++) {
        rect = Imgproc.boundingRect(contours.get(i));
        double ratio = 0;

               if(rect.height > rect.width){
            ratio = rect.height/rect.width;

                ratio = rect.width/rect.height;

         Logger.printMessage("Ratio of letter: "+ratio);
      double contourarea = Imgproc.contourArea(contours.get(i));
         if (contourarea >= 100 && contourarea <= 1000 && ( ratio >= 1 && ratio <= 2)) {
         Imgproc.rectangle(roiColor,,, new Scalar(255,0,0));

    return rect_array;

