コード例 #1
0
  /**
   * Unpacks a file attachment.
   *
   * @param reader The object that reads the PDF document
   * @param filespec The dictonary containing the file specifications
   * @throws IOException
   */
  protected static Object[] unpackFile(PdfReader reader, PdfDictionary filespec)
      throws IOException {
    Object arr[] = new Object[2]; // use to store name and file bytes
    if (filespec == null) {
      return null;
    }

    PdfName type = (PdfName) PdfReader.getPdfObject(filespec.get(PdfName.TYPE));
    if (!PdfName.F.equals(type) && !PdfName.FILESPEC.equals(type)) {
      return null;
    }

    PdfDictionary ef = (PdfDictionary) PdfReader.getPdfObject(filespec.get(PdfName.EF));
    if (ef == null) {
      return null;
    }

    PdfString fn = (PdfString) PdfReader.getPdfObject(filespec.get(PdfName.F));
    if (fn == null) {
      return null;
    }

    File fLast = new File(fn.toUnicodeString());
    PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F));
    if (prs == null) {
      return null;
    }

    byte attachmentByte[] = PdfReader.getStreamBytes(prs);
    arr[0] = fLast.getName();
    arr[1] = attachmentByte;

    return arr;
  }
コード例 #2
0
  /** extracts attachments from PDF File */
  @SuppressWarnings("unchecked")
  protected Map extractAttachments(PdfReader reader) throws IOException {
    Map fileMap = new HashMap();
    PdfDictionary catalog = reader.getCatalog();
    PdfDictionary names = (PdfDictionary) PdfReader.getPdfObject(catalog.get(PdfName.NAMES));
    if (names != null) {
      PdfDictionary embFiles =
          (PdfDictionary) PdfReader.getPdfObject(names.get(new PdfName("EmbeddedFiles")));
      if (embFiles != null) {
        HashMap embMap = PdfNameTree.readTree(embFiles);
        for (Iterator i = embMap.values().iterator(); i.hasNext(); ) {
          PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next());
          Object fileInfo[] = unpackFile(reader, filespec);
          if (fileMap.containsKey(fileInfo[0])) {
            throw new RuntimeException(DUPLICATE_FILE_NAMES);
          }
          fileMap.put(fileInfo[0], fileInfo[1]);
        }
      }
    }
    for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
      PdfArray annots = (PdfArray) PdfReader.getPdfObject(reader.getPageN(k).get(PdfName.ANNOTS));
      if (annots == null) {
        continue;
      }
      for (Iterator i = annots.getArrayList().listIterator(); i.hasNext(); ) {
        PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next());
        PdfName subType = (PdfName) PdfReader.getPdfObject(annot.get(PdfName.SUBTYPE));
        if (!PdfName.FILEATTACHMENT.equals(subType)) {
          continue;
        }
        PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject(annot.get(PdfName.FS));
        Object fileInfo[] = unpackFile(reader, filespec);
        if (fileMap.containsKey(fileInfo[0])) {
          throw new RuntimeException(DUPLICATE_FILE_NAMES);
        }

        fileMap.put(fileInfo[0], fileInfo[1]);
      }
    }

    return fileMap;
  }
コード例 #3
0
  /** extracts XML from PDF */
  protected byte[] getXMLFromPDF(PdfReader reader) throws Exception {
    XfaForm xfaForm = reader.getAcroFields().getXfa();
    Node domDocument = xfaForm.getDomDocument();
    if (domDocument == null) return null;
    Element documentElement = ((Document) domDocument).getDocumentElement();

    Element datasetsElement =
        (Element) documentElement.getElementsByTagNameNS(XFA_NS, "datasets").item(0);
    Element dataElement = (Element) datasetsElement.getElementsByTagNameNS(XFA_NS, "data").item(0);

    Element xmlElement = (Element) dataElement.getChildNodes().item(0);

    Node budgetElement = getBudgetElement(xmlElement);

    byte[] serializedXML = XfaForm.serializeDoc(budgetElement);

    return serializedXML;
  }