/** * Unpacks a file attachment. * * @param reader The object that reads the PDF document * @param filespec The dictonary containing the file specifications * @throws IOException */ protected static Object[] unpackFile(PdfReader reader, PdfDictionary filespec) throws IOException { Object arr[] = new Object[2]; // use to store name and file bytes if (filespec == null) { return null; } PdfName type = (PdfName) PdfReader.getPdfObject(filespec.get(PdfName.TYPE)); if (!PdfName.F.equals(type) && !PdfName.FILESPEC.equals(type)) { return null; } PdfDictionary ef = (PdfDictionary) PdfReader.getPdfObject(filespec.get(PdfName.EF)); if (ef == null) { return null; } PdfString fn = (PdfString) PdfReader.getPdfObject(filespec.get(PdfName.F)); if (fn == null) { return null; } File fLast = new File(fn.toUnicodeString()); PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F)); if (prs == null) { return null; } byte attachmentByte[] = PdfReader.getStreamBytes(prs); arr[0] = fLast.getName(); arr[1] = attachmentByte; return arr; }
/** extracts attachments from PDF File */ @SuppressWarnings("unchecked") protected Map extractAttachments(PdfReader reader) throws IOException { Map fileMap = new HashMap(); PdfDictionary catalog = reader.getCatalog(); PdfDictionary names = (PdfDictionary) PdfReader.getPdfObject(catalog.get(PdfName.NAMES)); if (names != null) { PdfDictionary embFiles = (PdfDictionary) PdfReader.getPdfObject(names.get(new PdfName("EmbeddedFiles"))); if (embFiles != null) { HashMap embMap = PdfNameTree.readTree(embFiles); for (Iterator i = embMap.values().iterator(); i.hasNext(); ) { PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next()); Object fileInfo[] = unpackFile(reader, filespec); if (fileMap.containsKey(fileInfo[0])) { throw new RuntimeException(DUPLICATE_FILE_NAMES); } fileMap.put(fileInfo[0], fileInfo[1]); } } } for (int k = 1; k <= reader.getNumberOfPages(); ++k) { PdfArray annots = (PdfArray) PdfReader.getPdfObject(reader.getPageN(k).get(PdfName.ANNOTS)); if (annots == null) { continue; } for (Iterator i = annots.getArrayList().listIterator(); i.hasNext(); ) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next()); PdfName subType = (PdfName) PdfReader.getPdfObject(annot.get(PdfName.SUBTYPE)); if (!PdfName.FILEATTACHMENT.equals(subType)) { continue; } PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject(annot.get(PdfName.FS)); Object fileInfo[] = unpackFile(reader, filespec); if (fileMap.containsKey(fileInfo[0])) { throw new RuntimeException(DUPLICATE_FILE_NAMES); } fileMap.put(fileInfo[0], fileInfo[1]); } } return fileMap; }
/** extracts XML from PDF */ protected byte[] getXMLFromPDF(PdfReader reader) throws Exception { XfaForm xfaForm = reader.getAcroFields().getXfa(); Node domDocument = xfaForm.getDomDocument(); if (domDocument == null) return null; Element documentElement = ((Document) domDocument).getDocumentElement(); Element datasetsElement = (Element) documentElement.getElementsByTagNameNS(XFA_NS, "datasets").item(0); Element dataElement = (Element) datasetsElement.getElementsByTagNameNS(XFA_NS, "data").item(0); Element xmlElement = (Element) dataElement.getChildNodes().item(0); Node budgetElement = getBudgetElement(xmlElement); byte[] serializedXML = XfaForm.serializeDoc(budgetElement); return serializedXML; }