Esempio n. 1
0
 /**
  * Test of TessBaseAPIGetHOCRText method, of class TessAPI1.
  *
  * @throws Exception while getting ocr text from image.
  */
 @Test
 public void testTessBaseAPIGetHOCRText() throws Exception {
   logger.info("TessBaseAPIGetHOCRText");
   File tiff = new File(this.testResourcesDataPath, "eurotext.tif");
   BufferedImage image =
       ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
   ByteBuffer buf = ImageIOHelper.convertImageData(image);
   int bpp = image.getColorModel().getPixelSize();
   int bytespp = bpp / 8;
   int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
   TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
   TessAPI1.TessBaseAPIInit3(handle, datapath, language);
   TessAPI1.TessBaseAPISetImage(
       handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
   int page_number = 0;
   Pointer utf8Text = TessAPI1.TessBaseAPIGetHOCRText(handle, page_number);
   String result = utf8Text.getString(0);
   TessAPI1.TessDeleteText(utf8Text);
   assertTrue(result.contains("<div class='ocr_page'"));
 }