示例#1
1
  /**
   * @param source source input stream
   * @return InputStream the resulting input stream
   */
  public InputStream getDestinationStream(InputStream source) throws Exception {
    try {
      boolean useTemporaryFile =
          ConfigurationManager.getBooleanProperty("pdffilter.largepdfs", false);

      // get input stream from bitstream
      // pass to filter, get string back
      PDFTextStripper pts = new PDFTextStripper();
      PDDocument pdfDoc = null;
      Writer writer = null;
      File tempTextFile = null;
      ByteArrayOutputStream byteStream = null;

      if (useTemporaryFile) {
        tempTextFile = File.createTempFile("dspacepdfextract" + source.hashCode(), ".txt");
        tempTextFile.deleteOnExit();
        writer = new OutputStreamWriter(new FileOutputStream(tempTextFile));
      } else {
        byteStream = new ByteArrayOutputStream();
        writer = new OutputStreamWriter(byteStream);
      }

      try {
        pdfDoc = PDDocument.load(source);
        pts.writeText(pdfDoc, writer);
      } finally {
        try {
          if (pdfDoc != null) pdfDoc.close();
        } catch (Exception e) {
          log.error("Error closing PDF file: " + e.getMessage(), e);
        }

        try {
          writer.close();
        } catch (Exception e) {
          log.error("Error closing temporary extract file: " + e.getMessage(), e);
        }
      }

      if (useTemporaryFile) {
        return new FileInputStream(tempTextFile);
      } else {
        byte[] bytes = byteStream.toByteArray();
        return new ByteArrayInputStream(bytes);
      }
    } catch (OutOfMemoryError oome) {
      log.error("Error parsing PDF document " + oome.getMessage(), oome);
      if (!ConfigurationManager.getBooleanProperty("pdffilter.skiponmemoryexception", false)) {
        throw oome;
      }
    }

    return null;
  }
示例#2
0
  String pdf2Text(File fll) {

    String fileName = fll.getName();

    System.out.println("Parsing PDF file " + fileName + "...");
    File fl = fll;

    if (!fl.isFile()) {
      System.out.println("The File : " + fileName + " does not exist!");
      return null;
    }

    try {
      parser = new PDFParser(new FileInputStream(fl));
    } catch (Exception e) {
      System.out.println("Could not open PDF Parser.");
      return null;
    }

    try {
      parser.parse();
      csDoc = parser.getDocument();

      pdfStrp = new PDFTextStripper();
      pdDoc = new PDDocument(csDoc);
      NewText = pdfStrp.getText(pdDoc);

    } catch (Exception e) {
      System.out.println("Error while parsing PDF file!");
      e.printStackTrace();
      try {
        if (csDoc != null) csDoc.close();
        if (pdDoc != null) pdDoc.close();
      } catch (Exception e1) {
        e.printStackTrace();
      }
      return null;
    }
    System.out.println("Done.");
    return NewText;
  }
示例#3
0
package lius.index.pdf;