/** * @param source source input stream * @return InputStream the resulting input stream */ public InputStream getDestinationStream(InputStream source) throws Exception { try { boolean useTemporaryFile = ConfigurationManager.getBooleanProperty("pdffilter.largepdfs", false); // get input stream from bitstream // pass to filter, get string back PDFTextStripper pts = new PDFTextStripper(); PDDocument pdfDoc = null; Writer writer = null; File tempTextFile = null; ByteArrayOutputStream byteStream = null; if (useTemporaryFile) { tempTextFile = File.createTempFile("dspacepdfextract" + source.hashCode(), ".txt"); tempTextFile.deleteOnExit(); writer = new OutputStreamWriter(new FileOutputStream(tempTextFile)); } else { byteStream = new ByteArrayOutputStream(); writer = new OutputStreamWriter(byteStream); } try { pdfDoc = PDDocument.load(source); pts.writeText(pdfDoc, writer); } finally { try { if (pdfDoc != null) pdfDoc.close(); } catch (Exception e) { log.error("Error closing PDF file: " + e.getMessage(), e); } try { writer.close(); } catch (Exception e) { log.error("Error closing temporary extract file: " + e.getMessage(), e); } } if (useTemporaryFile) { return new FileInputStream(tempTextFile); } else { byte[] bytes = byteStream.toByteArray(); return new ByteArrayInputStream(bytes); } } catch (OutOfMemoryError oome) { log.error("Error parsing PDF document " + oome.getMessage(), oome); if (!ConfigurationManager.getBooleanProperty("pdffilter.skiponmemoryexception", false)) { throw oome; } } return null; }
String pdf2Text(File fll) { String fileName = fll.getName(); System.out.println("Parsing PDF file " + fileName + "..."); File fl = fll; if (!fl.isFile()) { System.out.println("The File : " + fileName + " does not exist!"); return null; } try { parser = new PDFParser(new FileInputStream(fl)); } catch (Exception e) { System.out.println("Could not open PDF Parser."); return null; } try { parser.parse(); csDoc = parser.getDocument(); pdfStrp = new PDFTextStripper(); pdDoc = new PDDocument(csDoc); NewText = pdfStrp.getText(pdDoc); } catch (Exception e) { System.out.println("Error while parsing PDF file!"); e.printStackTrace(); try { if (csDoc != null) csDoc.close(); if (pdDoc != null) pdDoc.close(); } catch (Exception e1) { e.printStackTrace(); } return null; } System.out.println("Done."); return NewText; }
package lius.index.pdf;