// private void getPart(ZipFile zf, Base source, // Package pkg, String resolvedPartUri, String relationshipType) private void getPart( ZipFile zf, OpcPackage pkg, RelationshipsPart rp, Relationship r, ContentTypeManager ctm) throws Docx4JException, InvalidFormatException, URISyntaxException { Base source = null; String resolvedPartUri = null; if (r.getType().equals(Namespaces.HYPERLINK)) { // Could be Internal or External // Example of Internal is w:drawing/wp:inline/wp:docPr/a:hlinkClick log.info("Encountered (but not loading) hyperlink " + r.getTarget()); return; } else if (r.getTargetMode() == null || !r.getTargetMode().equals("External")) { // Usual case source = rp.getSourceP(); resolvedPartUri = URIHelper.resolvePartUri(rp.getSourceURI(), new URI(r.getTarget())).toString(); // Now drop leading "/' resolvedPartUri = resolvedPartUri.substring(1); // Now normalise it .. ie abc/def/../ghi // becomes abc/ghi // Maybe this isn't necessary with a zip file, // - ZipFile class may be smart enough to do it. // But it is certainly necessary in the JCR case. // resolvedPartUri = (new java.net.URI(resolvedPartUri)).normalize().toString(); // log.info("Normalised, it is " + resolvedPartUri ); } else { // EXTERNAL if (loadExternalTargets && r.getType().equals(Namespaces.IMAGE)) { // It could instead be, for example, of type hyperlink, // and we don't want to try to fetch that log.warn("Loading external resource " + r.getTarget() + " of type " + r.getType()); BinaryPart bp = ExternalResourceUtils.getExternalResource(r.getTarget()); pkg.getExternalResources().put(bp.getExternalTarget(), bp); } else { log.warn( "Encountered (but not loading) external resource " + r.getTarget() + " of type " + r.getType()); } return; } if (pkg.handled.get(resolvedPartUri) != null) return; String relationshipType = r.getType(); Part part = getRawPart(zf, ctm, resolvedPartUri, r); if (part instanceof BinaryPart || part instanceof DefaultXmlPart) { // The constructors of other parts should take care of this... part.setRelationshipType(relationshipType); } rp.loadPart(part, r); pkg.handled.put(resolvedPartUri, resolvedPartUri); // The source Part (or Package) might have a convenience // method for this if (source.setPartShortcut(part, relationshipType)) { log.info( "Convenience method established from " + source.getPartName() + " to " + part.getPartName()); } // unusedZipEntries.put(resolvedPartUri, new Boolean(false)); // log.info(".. added." ); RelationshipsPart rrp = getRelationshipsPart(zf, part); if (rrp != null) { // recurse via this parts relationships, if it has any addPartsFromRelationships(zf, part, rrp, ctm); String relPart = PartName.getRelationshipsPartName(part.getPartName().getName().substring(1)); // unusedZipEntries.put(relPart, new Boolean(false)); } }
public OpcPackage get(File f) throws Docx4JException { log.info("Filepath = " + f.getPath()); ZipFile zf = null; try { if (!f.exists()) { log.info("Couldn't find " + f.getPath()); } zf = new ZipFile(f); } catch (IOException ioe) { ioe.printStackTrace(); throw new Docx4JException("Couldn't get ZipFile", ioe); } // dumpZipFileContents(zf); // 1. The idea is to walk the tree of relationships, getting // everything we need from the zip file. But I'd like to know // whether there are any orphans, so first we make // a HashMap containing the names of all the zip file // entries, so we can tick them off. // unusedZipEntries = new HashMap(); Enumeration entries = zf.entries(); while (entries.hasMoreElements()) { ZipEntry entry = (ZipEntry) entries.nextElement(); // unusedZipEntries.put(entry.getName(), new Boolean(true) ); } // 2. Create a new Package // Eventually, you'll also be able to create an Excel package etc // but only the WordML package exists at present ContentTypeManager ctm = new ContentTypeManager(); try { InputStream is = getInputStreamFromZippedPart(zf, "[Content_Types].xml"); ctm.parseContentTypesFile(is); } catch (IOException e) { throw new Docx4JException("Couldn't get [Content_Types].xml from ZipFile", e); } OpcPackage p = ctm.createPackage(); // 3. Get [Content_Types].xml // Once we've got this, then we can look up the content type for // each PartName, and use it in the Part constructor. // p.setContentTypeManager(ctm); - 20080111 - done by ctm.createPackage(); // unusedZipEntries.put("[Content_Types].xml", new Boolean(false)); // 4. Start with _rels/.rels // <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> // <Relationship Id="rId3" // Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/> // <Relationship Id="rId2" // Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" // Target="docProps/core.xml"/> // <Relationship Id="rId1" // Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" // Target="word/document.xml"/> // </Relationships> String partName = "_rels/.rels"; RelationshipsPart rp = getRelationshipsPartFromZip(p, zf, partName); p.setRelationships(rp); // rp.setPackageRelationshipPart(true); // unusedZipEntries.put(partName, new Boolean(false)); log.info("Object created for: " + partName); // log.info( rp.toString()); // 5. Now recursively // (i) create new Parts for each thing listed // in the relationships // (ii) add the new Part to the package // (iii) cross the PartName off unusedZipEntries addPartsFromRelationships(zf, p, rp, ctm); // 6. Check unusedZipEntries is empty // if (log.isDebugEnabled()) { // Iterator myVeryOwnIterator = unusedZipEntries.keySet().iterator(); // while(myVeryOwnIterator.hasNext()) { // String key = (String)myVeryOwnIterator.next(); // log.info( key + " " + unusedZipEntries.get(key)); // } // } try { zf.close(); } catch (IOException exc) { exc.printStackTrace(); } registerCustomXmlDataStorageParts(p); return p; }