// Scan the photo for text using the tess-two API public static String scanPhoto(Bitmap bitmap) { TessBaseAPI baseApi = new TessBaseAPI(); baseApi.setDebug(true); baseApi.init(DATA_PATH, lang); // set the black list baseApi.setVariable("tessedit_char_blacklist", "':;,.?/\\}][{!@#$%^&*()-_=+~"); baseApi.setVariable("save_blob_choices", "T"); baseApi.setImage(bitmap); String recognizedText = baseApi.getUTF8Text(); // Iterate over the results and print confidence values for debugging purposes final ResultIterator iterator = baseApi.getResultIterator(); String lastUTF8Text; float lastConfidence; iterator.begin(); do { lastUTF8Text = iterator.getUTF8Text(PageIteratorLevel.RIL_WORD); lastConfidence = iterator.confidence(PageIteratorLevel.RIL_WORD); if (lastConfidence > 50) { Log.d(TAG, String.format("%s => %.2f", lastUTF8Text, lastConfidence)); } } while (iterator.next(PageIteratorLevel.RIL_WORD)); baseApi.end(); Log.d(TAG, recognizedText); return recognizedText; }
@SmallTest public void testChoiceIterator() { final String inputText = "hello"; final Bitmap bmp = TessBaseAPITest.getTextImage(inputText, 640, 480); // Attempt to initialize the API. final TessBaseAPI baseApi = new TessBaseAPI(); baseApi.init(TessBaseAPITest.TESSBASE_PATH, TessBaseAPITest.DEFAULT_LANGUAGE); baseApi.setPageSegMode(TessBaseAPI.PageSegMode.PSM_SINGLE_LINE); baseApi.setVariable(TessBaseAPI.VAR_SAVE_BLOB_CHOICES, TessBaseAPI.VAR_TRUE); // Ensure that text is recognized. baseApi.setImage(bmp); String recognizedText = baseApi.getUTF8Text(); assertTrue("No recognized text found.", recognizedText != null && !recognizedText.equals("")); // Iterate through the results. ResultIterator iterator = baseApi.getResultIterator(); List<Pair<String, Double>> choicesAndConfidences; iterator.begin(); do { choicesAndConfidences = iterator.getChoicesAndConfidence(PageIteratorLevel.RIL_SYMBOL); assertNotNull("Invalid result.", choicesAndConfidences); for (Pair<String, Double> choiceAndConfidence : choicesAndConfidences) { String choice = choiceAndConfidence.first; Double conf = choiceAndConfidence.second; assertTrue("No choice value found.", choice != null && !choice.equals("")); assertTrue("Found an incorrect confidence value.", conf >= 0 && conf <= 100); } } while (iterator.next(PageIteratorLevel.RIL_SYMBOL)); iterator.delete(); assertNotNull("No ChoiceIterator values found.", choicesAndConfidences); // Attempt to shut down the API. baseApi.end(); bmp.recycle(); }
private void testGetUTF8Text(String language, int ocrEngineMode) { final String inputText = "hello"; final Bitmap bmp = getTextImage(inputText, 640, 480); // Attempt to initialize the API. final TessBaseAPI baseApi = new TessBaseAPI(); baseApi.init(TESSBASE_PATH, language, ocrEngineMode); baseApi.setPageSegMode(TessBaseAPI.PageSegMode.PSM_SINGLE_LINE); baseApi.setImage(bmp); // Ensure that the result is correct. final String outputText = baseApi.getUTF8Text(); assertEquals("\"" + outputText + "\" != \"" + inputText + "\"", inputText, outputText); // Ensure getRegions() works. final Pixa regions = baseApi.getRegions(); assertEquals("Found incorrect number of regions.", regions.size(), 1); regions.recycle(); // Ensure getTextlines() works. final Pixa textlines = baseApi.getTextlines(); assertEquals("Found incorrect number of textlines.", textlines.size(), 1); textlines.recycle(); // Ensure getStrips() works. final Pixa strips = baseApi.getStrips(); assertEquals("Found incorrect number of strips.", strips.size(), 1); strips.recycle(); // Ensure getWords() works. final Pixa words = baseApi.getWords(); assertEquals("Found incorrect number of words.", words.size(), 1); words.recycle(); // Ensure getConnectedComponents() works. final Pixa connectedComponents = baseApi.getConnectedComponents(); assertTrue("Connected components not found.", connectedComponents.size() > 0); connectedComponents.recycle(); // Iterate through the results. final ResultIterator iterator = baseApi.getResultIterator(); String lastUTF8Text; float lastConfidence; int[] lastBoundingBox; Rect lastBoundingRect; int count = 0; iterator.begin(); do { lastUTF8Text = iterator.getUTF8Text(PageIteratorLevel.RIL_WORD); lastConfidence = iterator.confidence(PageIteratorLevel.RIL_WORD); lastBoundingBox = iterator.getBoundingBox(PageIteratorLevel.RIL_WORD); lastBoundingRect = iterator.getBoundingRect(PageIteratorLevel.RIL_WORD); count++; } while (iterator.next(PageIteratorLevel.RIL_WORD)); iterator.delete(); assertEquals("Found incorrect number of results.", count, 1); assertEquals("Found an incorrect result.", lastUTF8Text, outputText); assertTrue("Result was not high-confidence.", lastConfidence > 80); assertTrue("Result bounding box not found.", lastBoundingBox[2] > 0 && lastBoundingBox[3] > 0); boolean validBoundingRect = lastBoundingRect.left < lastBoundingRect.right && lastBoundingRect.top < lastBoundingRect.bottom; assertTrue("Result bounding box Rect is incorrect.", validBoundingRect); // Attempt to shut down the API. baseApi.end(); bmp.recycle(); }