示例#1
0
 /**
  * Test of Orientation and script detection (OSD).
  *
  * @throws Exception while processing the image.
  */
 @Test
 public void testOSD() throws Exception {
   logger.info("OSD");
   int expResult = TessPageSegMode.PSM_AUTO_OSD;
   IntBuffer orientation = IntBuffer.allocate(1);
   IntBuffer direction = IntBuffer.allocate(1);
   IntBuffer order = IntBuffer.allocate(1);
   FloatBuffer deskew_angle = FloatBuffer.allocate(1);
   File tiff = new File(this.testResourcesDataPath, "eurotext.tif");
   BufferedImage image =
       ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
   ByteBuffer buf = ImageIOHelper.convertImageData(image);
   int bpp = image.getColorModel().getPixelSize();
   int bytespp = bpp / 8;
   int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
   TessAPI1.TessBaseAPIInit3(handle, datapath, language);
   TessAPI1.TessBaseAPISetPageSegMode(handle, expResult);
   int actualResult = TessAPI1.TessBaseAPIGetPageSegMode(handle);
   logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
   TessAPI1.TessBaseAPISetImage(
       handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
   int success = TessAPI1.TessBaseAPIRecognize(handle, null);
   if (success == 0) {
     TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
     TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
     logger.info(
         String.format(
             "Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
             Utils.getConstantName(orientation.get(), TessOrientation.class),
             Utils.getConstantName(direction.get(), TessWritingDirection.class),
             Utils.getConstantName(order.get(), TessTextlineOrder.class),
             deskew_angle.get()));
   }
   assertEquals(expResult, actualResult);
 }
示例#2
0
  /**
   * Test of extending Tesseract.
   *
   * @throws java.lang.Exception
   */
  @Test
  public void testExtendingTesseract() throws Exception {
    logger.info("Extends Tesseract");
    File imageFile = new File(this.testResourcesDataPath, "eurotext.tif");

    String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
    String[] expResults = expResult.split("\\s");

    TessExtension instance1 = new TessExtension();
    instance1.setDatapath(new File(datapath).getPath());
    int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
    logger.info(
        "PageIteratorLevel: "
            + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
    List<Word> result = instance1.getTextElements(imageFile, pageIteratorLevel);

    // print the complete result
    for (Word word : result) {
      logger.info(word.toString());
    }

    List<String> text = new ArrayList<String>();
    for (Word word : result.subList(0, expResults.length)) {
      text.add(word.getText());
    }

    assertArrayEquals(expResults, text.toArray());
  }