示例#1
0
 /**
  * Test of doOCR method, of class Tesseract.
  *
  * @throws java.lang.Exception
  */
 @Test
 public void testDoOCR_File_With_Configs() throws Exception {
   logger.info("doOCR with configs");
   File imageFile = new File(this.testResourcesDataPath, "eurotext.png");
   String expResult = "[-0123456789.\n ]+";
   List<String> configs = Arrays.asList("digits");
   instance.setConfigs(configs);
   String result = instance.doOCR(imageFile);
   logger.info(result);
   assertTrue(result.matches(expResult));
   instance.setConfigs(
       null); // since Tesseract instance is a singleton, clear configs so the effects do not carry
              // on into subsequent runs.
 }
示例#2
0
 /**
  * Test of doOCR method, of class Tesseract.
  *
  * @throws Exception while processing image.
  */
 @Test
 public void testDoOCR_List_Rectangle() throws Exception {
   File imageFile = null;
   String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
   String result = "<empty>";
   try {
     logger.info("doOCR on a PDF document");
     imageFile = new File(this.testResourcesDataPath, "eurotext.pdf");
     List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
     result = instance.doOCR(imageList, null);
     logger.info(result);
     assertEquals(expResult, result.substring(0, expResult.length()));
   } catch (IOException e) {
     logger.error(
         "Exception-Message: '{}'. Imagefile: '{}'",
         e.getMessage(),
         imageFile.getAbsoluteFile(),
         e);
     fail();
   } catch (TesseractException e) {
     logger.error(
         "Exception-Message: '{}'. Imagefile: '{}'",
         e.getMessage(),
         imageFile.getAbsoluteFile(),
         e);
     fail();
   } catch (StringIndexOutOfBoundsException e) {
     logger.error(
         "Exception-Message: '{}'. Imagefile: '{}'",
         e.getMessage(),
         imageFile.getAbsoluteFile(),
         e);
     fail();
   }
 }
示例#3
0
 /**
  * Test of doOCR method, of class Tesseract.
  *
  * @throws Exception while processing image.
  */
 @Test
 public void testDoOCR_File() throws Exception {
   logger.info("doOCR on a PNG image");
   File imageFile = new File(this.testResourcesDataPath, "eurotext.png");
   String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
   String result = instance.doOCR(imageFile);
   logger.info(result);
   assertEquals(expResult, result.substring(0, expResult.length()));
 }
示例#4
0
 /**
  * Test of doOCR method, of class Tesseract.
  *
  * @throws Exception while processing image.
  */
 @Test
 public void testDoOCR_File_Rectangle() throws Exception {
   logger.info("doOCR on a BMP image with bounding rectangle");
   File imageFile = new File(this.testResourcesDataPath, "eurotext.bmp");
   Rectangle rect =
       new Rectangle(
           0, 0, 1024, 800); // define an equal or smaller region of interest on the image
   String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
   String result = instance.doOCR(imageFile, rect);
   logger.info(result);
   assertEquals(expResult, result.substring(0, expResult.length()));
 }
  @Test
  public void testFolderExtraction() {

    File tessDataFolder = null;

    try {

      /** Loads the image from resources. */
      String filename = String.format("%s/%s", "/test-data", "eurotext.pdf");
      URL defaultImage = getClass().getResource(filename);
      File imageFile = new File(defaultImage.toURI());

      /** Extracts <code>tessdata</code> folder into a temp folder. */
      logger.info("Loading the tessdata folder into a temporary folder.");
      tessDataFolder = LoadLibs.extractTessResources("tessdata");

      /** Gets tesseract instance and sets data path. */
      ITesseract instance = new Tesseract();

      if (tessDataFolder != null) {
        logger.info(tessDataFolder.getAbsolutePath());
        instance.setDatapath(tessDataFolder.getAbsolutePath());
        instance.setLanguage("por");
      }

      /** Performs OCR on the image. */
      String result = instance.doOCR(imageFile);
      logger.info(result);

    } catch (TesseractException e) {
      logger.error(e.getMessage());
      logger.error(e.getMessage(), e);
    } catch (URISyntaxException e) {
      logger.error(e.getMessage(), e);
    }

    // checks if tessdata folder exists
    assertTrue(tessDataFolder != null && tessDataFolder.exists());
  }
示例#6
0
 /**
  * Test of doOCR method, of class Tesseract.
  *
  * @throws java.lang.Exception
  */
 @Test
 public void testDoOCR_UNLV_Zone_File() throws Exception {
   logger.info("doOCR on a PNG image with UNLV zone file .uzn");
   // UNLV zone format: left top width height label
   String filename = String.format("%s/%s", this.testResourcesDataPath, "eurotext_unlv.png");
   File imageFile = new File(filename);
   String expResult =
       "& duck/goose, as 12.5% of E-mail\n\n"
           + "from [email protected] is spam.\n\n"
           + "The (quick) [brown] {fox} jumps!\n"
           + "Over the $43,456.78 <lazy> #90 dog";
   String result = instance.doOCR(imageFile);
   logger.info(result);
   assertEquals(expResult, result.trim());
 }
示例#7
0
 /**
  * Test of createDocuments method, of class Tesseract.
  *
  * @throws java.lang.Exception
  */
 @Test
 public void testCreateDocuments() throws Exception {
   logger.info("createDocuments for multiple images");
   File imageFile1 = new File(this.testResourcesDataPath, "eurotext.pdf");
   File imageFile2 = new File(this.testResourcesDataPath, "eurotext.png");
   String outputbase1 = "target/test-classes/test-results/docrenderer-1";
   String outputbase2 = "target/test-classes/test-results/docrenderer-2";
   List<RenderedFormat> formats =
       new ArrayList<RenderedFormat>(
           Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
   instance.createDocuments(
       new String[] {imageFile1.getPath(), imageFile2.getPath()},
       new String[] {outputbase1, outputbase2},
       formats);
   assertTrue(new File(outputbase1 + ".pdf").exists());
 }
示例#8
0
  /**
   * Test of deskew algorithm.
   *
   * @throws Exception while processing image.
   */
  @Test
  public void testDoOCR_SkewedImage() throws Exception {
    logger.info("doOCR on a skewed PNG image");
    File imageFile = new File(this.testResourcesDataPath, "eurotext_deskew.png");
    BufferedImage bi = ImageIO.read(imageFile);
    ImageDeskew id = new ImageDeskew(bi);
    double imageSkewAngle = id.getSkewAngle(); // determine skew angle
    if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD
        || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
      bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
    }

    String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
    String result = instance.doOCR(bi);
    logger.info(result);
    assertEquals(expResult, result.substring(0, expResult.length()));
  }
示例#9
0
 @Before
 public void setUp() {
   instance = new Tesseract();
   instance.setDatapath(new File(datapath).getPath());
 }