/** * Processes content from the specified page number using the specified listener * * @param <E> the type of the renderListener - this makes it easy to chain calls * @param pageNumber the page number to process * @param renderListener the listener that will receive render callbacks * @return the provided renderListener * @throws IOException if operations on the reader fail */ public <E extends RenderListener> E processContent(int pageNumber, E renderListener) throws IOException { PdfDictionary pageDic = reader.getPageN(pageNumber); PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(renderListener); processor.processContent( ContentByteUtils.getContentBytesForPage(reader, pageNumber), resourcesDic); return renderListener; }
private String extractTextFromPdf(byte pdfAsByteArray[]) throws IOException, IOException { PdfReader reader = new PdfReader(pdfAsByteArray); TextRenderListener listener = new TextRenderListener(); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.getPageN(1); PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES); byte array[] = ContentByteUtils.getContentBytesForPage(reader, 1); processor.processContent(array, resourcesDic); return listener.getSb(); }