Ejemplo n.º 1
0
 /**
  * Test of doOCR method, of class Tesseract.
  *
  * @throws Exception while processing image.
  */
 @Test
 public void testDoOCR_List_Rectangle() throws Exception {
   File imageFile = null;
   String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
   String result = "<empty>";
   try {
     logger.info("doOCR on a PDF document");
     imageFile = new File(this.testResourcesDataPath, "eurotext.pdf");
     List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
     result = instance.doOCR(imageList, null);
     logger.info(result);
     assertEquals(expResult, result.substring(0, expResult.length()));
   } catch (IOException e) {
     logger.error(
         "Exception-Message: '{}'. Imagefile: '{}'",
         e.getMessage(),
         imageFile.getAbsoluteFile(),
         e);
     fail();
   } catch (TesseractException e) {
     logger.error(
         "Exception-Message: '{}'. Imagefile: '{}'",
         e.getMessage(),
         imageFile.getAbsoluteFile(),
         e);
     fail();
   } catch (StringIndexOutOfBoundsException e) {
     logger.error(
         "Exception-Message: '{}'. Imagefile: '{}'",
         e.getMessage(),
         imageFile.getAbsoluteFile(),
         e);
     fail();
   }
 }
  @Test
  public void testFolderExtraction() {

    File tessDataFolder = null;

    try {

      /** Loads the image from resources. */
      String filename = String.format("%s/%s", "/test-data", "eurotext.pdf");
      URL defaultImage = getClass().getResource(filename);
      File imageFile = new File(defaultImage.toURI());

      /** Extracts <code>tessdata</code> folder into a temp folder. */
      logger.info("Loading the tessdata folder into a temporary folder.");
      tessDataFolder = LoadLibs.extractTessResources("tessdata");

      /** Gets tesseract instance and sets data path. */
      ITesseract instance = new Tesseract();

      if (tessDataFolder != null) {
        logger.info(tessDataFolder.getAbsolutePath());
        instance.setDatapath(tessDataFolder.getAbsolutePath());
        instance.setLanguage("por");
      }

      /** Performs OCR on the image. */
      String result = instance.doOCR(imageFile);
      logger.info(result);

    } catch (TesseractException e) {
      logger.error(e.getMessage());
      logger.error(e.getMessage(), e);
    } catch (URISyntaxException e) {
      logger.error(e.getMessage(), e);
    }

    // checks if tessdata folder exists
    assertTrue(tessDataFolder != null && tessDataFolder.exists());
  }