public static void writeText(String pdfPath, String outputPath) throws java.io.IOException { Document pdf = PDF.open(pdfPath); StringBuilder text = new StringBuilder(1024); // pdf.pipe(new OutputTarget(text)); pdf.pipe(new VisualOutputTarget(text)); pdf.close(); File file = new File(outputPath); FileOutputStream outputStream = new FileOutputStream(file); outputStream.write(text.toString().getBytes()); outputStream.flush(); outputStream.close(); }
public void index(Path file, IndexWriter indexWriter) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { logger.info("Indexing PDF document " + file); try (com.snowtide.pdf.Document pdf = PDF.open(file.toFile())) { LucenePDFConfiguration configuration = new LucenePDFConfiguration(); configuration.setBodyTextFieldName("content"); Document pdfDocument = LucenePDFDocumentFactory.buildPDFDocument(pdf, configuration); pdfDocument.add(new StoredField("name", file.toFile().getName())); pdfDocument.add(new StoredField("path", file.toFile().getAbsolutePath())); indexWriter.addDocument(pdfDocument); } }