/** * Test of doOCR method, of class Tesseract. * * @throws Exception while processing image. */ @Test public void testDoOCR_List_Rectangle() throws Exception { File imageFile = null; String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog"; String result = "<empty>"; try { logger.info("doOCR on a PDF document"); imageFile = new File(this.testResourcesDataPath, "eurotext.pdf"); List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile); result = instance.doOCR(imageList, null); logger.info(result); assertEquals(expResult, result.substring(0, expResult.length())); } catch (IOException e) { logger.error( "Exception-Message: '{}'. Imagefile: '{}'", e.getMessage(), imageFile.getAbsoluteFile(), e); fail(); } catch (TesseractException e) { logger.error( "Exception-Message: '{}'. Imagefile: '{}'", e.getMessage(), imageFile.getAbsoluteFile(), e); fail(); } catch (StringIndexOutOfBoundsException e) { logger.error( "Exception-Message: '{}'. Imagefile: '{}'", e.getMessage(), imageFile.getAbsoluteFile(), e); fail(); } }
/** * Test of doOCR method, of class Tesseract. * * @throws Exception while processing image. */ @Test public void testDoOCR_File() throws Exception { logger.info("doOCR on a PNG image"); File imageFile = new File(this.testResourcesDataPath, "eurotext.png"); String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog"; String result = instance.doOCR(imageFile); logger.info(result); assertEquals(expResult, result.substring(0, expResult.length())); }
/** * Test of doOCR method, of class Tesseract. * * @throws Exception while processing image. */ @Test public void testDoOCR_File_Rectangle() throws Exception { logger.info("doOCR on a BMP image with bounding rectangle"); File imageFile = new File(this.testResourcesDataPath, "eurotext.bmp"); Rectangle rect = new Rectangle( 0, 0, 1024, 800); // define an equal or smaller region of interest on the image String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog"; String result = instance.doOCR(imageFile, rect); logger.info(result); assertEquals(expResult, result.substring(0, expResult.length())); }
/** * Test of doOCR method, of class Tesseract. * * @throws java.lang.Exception */ @Test public void testDoOCR_File_With_Configs() throws Exception { logger.info("doOCR with configs"); File imageFile = new File(this.testResourcesDataPath, "eurotext.png"); String expResult = "[-0123456789.\n ]+"; List<String> configs = Arrays.asList("digits"); instance.setConfigs(configs); String result = instance.doOCR(imageFile); logger.info(result); assertTrue(result.matches(expResult)); instance.setConfigs( null); // since Tesseract instance is a singleton, clear configs so the effects do not carry // on into subsequent runs. }
/** * Test of doOCR method, of class Tesseract. * * @throws java.lang.Exception */ @Test public void testDoOCR_UNLV_Zone_File() throws Exception { logger.info("doOCR on a PNG image with UNLV zone file .uzn"); // UNLV zone format: left top width height label String filename = String.format("%s/%s", this.testResourcesDataPath, "eurotext_unlv.png"); File imageFile = new File(filename); String expResult = "& duck/goose, as 12.5% of E-mail\n\n" + "from [email protected] is spam.\n\n" + "The (quick) [brown] {fox} jumps!\n" + "Over the $43,456.78 <lazy> #90 dog"; String result = instance.doOCR(imageFile); logger.info(result); assertEquals(expResult, result.trim()); }
/** * Test of deskew algorithm. * * @throws Exception while processing image. */ @Test public void testDoOCR_SkewedImage() throws Exception { logger.info("doOCR on a skewed PNG image"); File imageFile = new File(this.testResourcesDataPath, "eurotext_deskew.png"); BufferedImage bi = ImageIO.read(imageFile); ImageDeskew id = new ImageDeskew(bi); double imageSkewAngle = id.getSkewAngle(); // determine skew angle if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) { bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image } String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog"; String result = instance.doOCR(bi); logger.info(result); assertEquals(expResult, result.substring(0, expResult.length())); }
@Test public void testFolderExtraction() { File tessDataFolder = null; try { /** Loads the image from resources. */ String filename = String.format("%s/%s", "/test-data", "eurotext.pdf"); URL defaultImage = getClass().getResource(filename); File imageFile = new File(defaultImage.toURI()); /** Extracts <code>tessdata</code> folder into a temp folder. */ logger.info("Loading the tessdata folder into a temporary folder."); tessDataFolder = LoadLibs.extractTessResources("tessdata"); /** Gets tesseract instance and sets data path. */ ITesseract instance = new Tesseract(); if (tessDataFolder != null) { logger.info(tessDataFolder.getAbsolutePath()); instance.setDatapath(tessDataFolder.getAbsolutePath()); instance.setLanguage("por"); } /** Performs OCR on the image. */ String result = instance.doOCR(imageFile); logger.info(result); } catch (TesseractException e) { logger.error(e.getMessage()); logger.error(e.getMessage(), e); } catch (URISyntaxException e) { logger.error(e.getMessage(), e); } // checks if tessdata folder exists assertTrue(tessDataFolder != null && tessDataFolder.exists()); }