/** * Performs OCR operation. * * @param imageList a list of <code>IIOImage</code> objects * @param rect the bounding rectangle defines the region of the image to be recognized. A * rectangle of zero dimension or <code>null</code> indicates the whole image. * @return the recognized text * @throws TesseractException */ public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException { init(); setTessVariables(); try { StringBuilder sb = new StringBuilder(); for (IIOImage oimage : imageList) { pageNum++; try { setImage(oimage.getRenderedImage(), rect); sb.append(getOCRText()); } catch (IOException ioe) { // skip the problematic image logger.log(Level.SEVERE, ioe.getMessage(), ioe); } } if (hocr) { sb.insert(0, htmlBeginTag).append(htmlEndTag); } return sb.toString(); } finally { dispose(); } }
private byte[] getBytes(short[] array) { try { ByteArrayOutputStream bytestream = new ByteArrayOutputStream(); DataOutputStream datastream = new DataOutputStream(bytestream); for (short n : array) { datastream.writeShort(n); } datastream.flush(); return bytestream.toByteArray(); } catch (IOException ioe) { Logging.logger().finest(ioe.getMessage()); } return null; }