Пример #1
0
 public static void writeText(String pdfPath, String outputPath) throws java.io.IOException {
   Document pdf = PDF.open(pdfPath);
   StringBuilder text = new StringBuilder(1024);
   // pdf.pipe(new OutputTarget(text));
   pdf.pipe(new VisualOutputTarget(text));
   pdf.close();
   File file = new File(outputPath);
   FileOutputStream outputStream = new FileOutputStream(file);
   outputStream.write(text.toString().getBytes());
   outputStream.flush();
   outputStream.close();
 }
Пример #2
0
  public void index(Path file, IndexWriter indexWriter)
      throws IOException, InvalidFormatException, OpenXML4JException, XmlException {

    logger.info("Indexing PDF document " + file);
    try (com.snowtide.pdf.Document pdf = PDF.open(file.toFile())) {

      LucenePDFConfiguration configuration = new LucenePDFConfiguration();
      configuration.setBodyTextFieldName("content");

      Document pdfDocument = LucenePDFDocumentFactory.buildPDFDocument(pdf, configuration);
      pdfDocument.add(new StoredField("name", file.toFile().getName()));
      pdfDocument.add(new StoredField("path", file.toFile().getAbsolutePath()));

      indexWriter.addDocument(pdfDocument);
    }
  }