Java COSDocument примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.pdfbox.cos

Класс/Тип: COSDocument

Примеров на hotexamples.com: 3

Java COSDocument - 3 примера найдено. Это лучшие примеры Java кода для org.pdfbox.cos.COSDocument, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

close(3)

getEncryptionDictionary(1)

Пример #1

Показать файл

Файл: Tool.java Проект: nalsabhan/Thesis_code

  String pdf2Text(File fll) {

    String fileName = fll.getName();

    System.out.println("Parsing PDF file " + fileName + "...");
    File fl = fll;

    if (!fl.isFile()) {
      System.out.println("The File : " + fileName + " does not exist!");
      return null;
    }

    try {
      parser = new PDFParser(new FileInputStream(fl));
    } catch (Exception e) {
      System.out.println("Could not open PDF Parser.");
      return null;
    }

    try {
      parser.parse();
      csDoc = parser.getDocument();

      pdfStrp = new PDFTextStripper();
      pdDoc = new PDDocument(csDoc);
      NewText = pdfStrp.getText(pdDoc);

    } catch (Exception e) {
      System.out.println("Error while parsing PDF file!");
      e.printStackTrace();
      try {
        if (csDoc != null) csDoc.close();
        if (pdDoc != null) pdDoc.close();
      } catch (Exception e1) {
        e.printStackTrace();
      }
      return null;
    }
    System.out.println("Done.");
    return NewText;
  }

Пример #2

Показать файл

Файл: PDFPackager.java Проект: brasiliana/CORISCO2

  private void crosswalkPDF(Context context, Item item, InputStream metadata)
      throws CrosswalkException, IOException, SQLException, AuthorizeException {
    COSDocument cos = null;

    try {
      PDFParser parser = new PDFParser(metadata);
      parser.parse();
      cos = parser.getDocument();

      // sanity check: PDFBox breaks on encrypted documents, so give up.
      if (cos.getEncryptionDictionary() != null)
        throw new MetadataValidationException(
            "This packager cannot accept an encrypted PDF document.");

      /* PDF to DC "crosswalk":
       *
       * NOTE: This is not in a crosswalk plugin because (a) it isn't
       * useful anywhere else, and more importantly, (b) the source
       * data is not XML so it doesn't fit the plugin's interface.
       *
       * pattern of crosswalk -- PDF dict entries to DC:
       *   Title -> title.null
       *   Author -> contributor.author
       *   CreationDate -> date.created
       *   ModDate -> date.created
       *   Creator -> description.provenance (application that created orig)
       *   Producer -> description.provenance (convertor to pdf)
       *   Subject -> description.abstract
       *   Keywords -> subject.other
       *    date is java.util.Calendar
       */
      PDDocument pd = new PDDocument(cos);
      PDDocumentInformation docinfo = pd.getDocumentInformation();
      String title = docinfo.getTitle();

      // sanity check: item must have a title.
      if (title == null)
        throw new MetadataValidationException(
            "This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary.");
      log.debug("PDF Info dict title=\"" + title + "\"");
      item.addDC("title", null, "en", title);
      String value;
      Calendar date;
      if ((value = docinfo.getAuthor()) != null) {
        item.addDC("contributor", "author", null, value);
        log.debug("PDF Info dict author=\"" + value + "\"");
      }
      if ((value = docinfo.getCreator()) != null)
        item.addDC(
            "description",
            "provenance",
            "en",
            "Application that created the original document: " + value);
      if ((value = docinfo.getProducer()) != null)
        item.addDC(
            "description", "provenance", "en", "Original document converted to PDF by: " + value);
      if ((value = docinfo.getSubject()) != null)
        item.addDC("description", "abstract", null, value);
      if ((value = docinfo.getKeywords()) != null) item.addDC("subject", "other", null, value);

      // Take either CreationDate or ModDate as "date.created",
      // Too bad there's no place to put "last modified" in the DC.
      Calendar calValue;
      if ((calValue = docinfo.getCreationDate()) == null) calValue = docinfo.getModificationDate();
      if (calValue != null)
        item.addDC("date", "created", null, (new DCDate(calValue.getTime())).toString());
      item.update();
    } finally {
      if (cos != null) cos.close();
    }
  }

Пример #3

Показать файл

Файл: PDFPackager.java Проект: brasiliana/CORISCO2

  /**
   * Create new Item out of the ingested package, in the indicated collection. It creates a
   * workspace item, which the application can then install if it chooses to bypass Workflow.
   *
   * <p>This is a VERY crude import of a single Adobe PDF (Portable Document Format) file, using the
   * document's embedded metadata for package metadata. If the PDF file hasn't got the minimal
   * metadata available, it is rejected.
   *
   * <p>
   *
   * @param context DSpace context.
   * @param collection collection under which to create new item.
   * @param pkg input stream containing package to ingest.
   * @param params package parameters (none recognized)
   * @param license may be null, which takes default license.
   * @return workspace item created by ingest.
   * @throws PackageException if package is unacceptable or there is a fatal error turning it into
   *     an Item.
   */
  public WorkspaceItem ingest(
      Context context,
      Collection collection,
      InputStream pkg,
      PackageParameters params,
      String license)
      throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException,
          IOException {
    InputStream bis = null;
    COSDocument cos = null;
    boolean success = false;
    Bundle original = null;
    Bitstream bs = null;
    WorkspaceItem wi = null;

    /**
     * XXX comment out for now // XXX for debugging of parameter handling if (params != null) {
     * Enumeration pe = params.propertyNames(); while (pe.hasMoreElements()) { String name =
     * (String)pe.nextElement(); String v[] = params.getProperties(name); StringBuffer msg = new
     * StringBuffer("PackageParam: "); msg.append(name).append(" = "); for (int i = 0; i < v.length;
     * ++i) { if (i > 0) msg.append(", "); msg.append(v[i]); } log.debug(msg); } }
     */
    try {
      // Save the PDF in a bitstream first, since the parser
      // has to read it as well, and we cannot "rewind" it after that.
      wi = WorkspaceItem.create(context, collection, false);
      Item myitem = wi.getItem();
      original = myitem.createBundle("ORIGINAL");
      bs = original.createBitstream(pkg);
      pkg.close();
      bs.setName("package.pdf");
      setFormatToMIMEType(context, bs, "application/pdf");
      bs.update();
      log.debug("Created bitstream ID=" + String.valueOf(bs.getID()) + ", parsing...");

      crosswalkPDF(context, myitem, bs.retrieve());

      wi.update();
      context.commit();
      success = true;
      log.info(
          LogManager.getHeader(
              context,
              "ingest",
              "Created new Item, db ID="
                  + String.valueOf(myitem.getID())
                  + ", WorkspaceItem ID="
                  + String.valueOf(wi.getID())));
      return wi;
    } finally {
      try {
        // Close bitstream input stream and PDF file.
        if (bis != null) bis.close();
        if (cos != null) cos.close();
      } catch (IOException ie) {
      }

      // get rid of bitstream and item if ingest fails
      if (!success) {
        if (original != null && bs != null) original.removeBitstream(bs);
        if (wi != null) wi.deleteAll();
      }
      context.commit();
    }
  }