/** * The dummy section wrappers only contains one section with all the document. Therefore any * sections within the document are ignored in the conversion process. As it doesn't need to check * for sections it is faster and the html-Output only uses one section.<br> * It will use the Header/Footer of the body sectPr. This isn't correct, if there are several * Sections in the document, but to find the correct SectPr it would need to check the document * content - and the aim of this method is a low overhead. * * @param wmlPackage * @param dummyPageNumbering * @return */ protected static List<ConversionSectionWrapper> processDummy( WordprocessingMLPackage wmlPackage, Document document, RelationshipsPart rels, BooleanDefaultTrue evenAndOddHeaders, boolean dummyPageNumbering) { List<ConversionSectionWrapper> conversionSections = new ArrayList<ConversionSectionWrapper>(); ConversionSectionWrapper currentSectionWrapper = null; HeaderFooterPolicy previousHF = new HeaderFooterPolicy(document.getBody().getSectPr(), null, rels, evenAndOddHeaders); // SectionWrapper does work where sectPr is null (ie document has no body level sectPr), // so document.getBody().getSectPr() is ok currentSectionWrapper = createSectionWrapper( document.getBody().getSectPr(), previousHF, rels, evenAndOddHeaders, 1, document.getBody().getContent(), dummyPageNumbering); conversionSections.add(currentSectionWrapper); return conversionSections; }
/** * Traverse the document, and return a map of all styles which are used directly in the document. * (IE this does not include styles on which others are just BasedOn). * * @return */ public Set<String> getStylesInUse() { org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) this.getJaxbElement(); Body body = wmlDocumentEl.getBody(); List<Object> bodyChildren = body.getContent(); Set<String> stylesInUse = new HashSet<String>(); FontAndStyleFinder finder = new FontAndStyleFinder(null, null, stylesInUse); finder.defaultCharacterStyle = this.getStyleDefinitionsPart().getDefaultCharacterStyle(); finder.defaultParagraphStyle = this.getStyleDefinitionsPart().getDefaultParagraphStyle(); new TraversalUtil(bodyChildren, finder); finder.finish(); // Styles in headers, footers? RelationshipsPart rp = this.getRelationshipsPart(); if (rp != null) { for (Relationship r : rp.getRelationships().getRelationship()) { Part part = rp.getPart(r); if (part instanceof FooterPart) { Ftr ftr = ((FooterPart) part).getJaxbElement(); finder.walkJAXBElements(ftr); } else if (part instanceof HeaderPart) { Hdr hdr = ((HeaderPart) part).getJaxbElement(); finder.walkJAXBElements(hdr); } } } // Styles in endnotes, footnotes? if (this.getEndNotesPart() != null) { log.debug("Looking at endnotes"); CTEndnotes endnotes = this.getEndNotesPart().getJaxbElement(); finder.walkJAXBElements(endnotes); } if (this.getFootnotesPart() != null) { log.debug("Looking at footnotes"); CTFootnotes footnotes = this.getFootnotesPart().getJaxbElement(); finder.walkJAXBElements(footnotes); } // Comments if (this.getCommentsPart() != null) { log.debug("Looking at comments"); Comments comments = this.getCommentsPart().getJaxbElement(); finder.walkJAXBElements(comments); } return stylesInUse; }
public static void main(String[] args) throws Exception { String inputfilepath = System.getProperty("user.dir") + "/sample-docs/sample-docx.xml"; WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath)); MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart.getJaxbElement(); Body body = wmlDocumentEl.getBody(); new TraversalUtil( body, new Callback() { String indent = ""; @Override public List<Object> apply(Object o) { String text = ""; if (o instanceof org.docx4j.wml.Text) text = ((org.docx4j.wml.Text) o).getValue(); System.out.println(indent + o.getClass().getName() + " \"" + text + "\""); return null; } @Override public boolean shouldTraverse(Object o) { return true; } // Depth first @Override public void walkJAXBElements(Object parent) { indent += " "; List children = getChildren(parent); if (children != null) { for (Object o : children) { // if its wrapped in javax.xml.bind.JAXBElement, get its // value; this is ok, provided the results of the Callback // won't be marshalled o = XmlUtils.unwrap(o); this.apply(o); if (this.shouldTraverse(o)) { walkJAXBElements(o); } } } indent = indent.substring(0, indent.length() - 4); } @Override public List<Object> getChildren(Object o) { return TraversalUtil.getChildrenImpl(o); } }); }
/** * Traverse the document, looking for fonts which have been applied, either directly, or via a * style. * * @return */ public Set<String> fontsInUse() { log.info("fontsInUse.."); getPropertyResolver(); // this inits our virtual DocDefaults style // Setup Set<String> fontsDiscovered = new java.util.HashSet<String>(); // // Keep track of styles we encounter, so we can // // inspect these for fonts // Set<String> stylesInUse = new java.util.HashSet<String>(); // // org.docx4j.wml.Styles styles = null; // if (this.getStyleDefinitionsPart()!=null) { // styles = (org.docx4j.wml.Styles)this.getStyleDefinitionsPart().getJaxbElement(); // } // // It is convenient to have a HashMap of styles // Map<String, Style> stylesDefined = new java.util.HashMap<String, Style>(); // if (styles!=null) { // for (Iterator<Style> iter = styles.getStyle().iterator(); iter.hasNext();) { // Style s = iter.next(); // stylesDefined.put(s.getStyleId(), s); // } // } // // We need to know what fonts and styles are used in the document org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) this.getJaxbElement(); Body body = wmlDocumentEl.getBody(); List<Object> bodyChildren = body.getContent(); FontDiscoveryCharacterVisitor visitor = new FontDiscoveryCharacterVisitor(fontsDiscovered); RunFontSelector runFontSelector = new RunFontSelector( (WordprocessingMLPackage) this.pack, visitor, RunFontActionType.DISCOVERY); FontAndStyleFinder finder = new FontAndStyleFinder(runFontSelector, fontsDiscovered, null); finder.defaultCharacterStyle = this.getStyleDefinitionsPart().getDefaultCharacterStyle(); finder.defaultParagraphStyle = this.getStyleDefinitionsPart().getDefaultParagraphStyle(); new TraversalUtil(bodyChildren, finder); // finder.finish(); fontsDiscovered.add(runFontSelector.getDefaultFont()); // fonts in headers, footers? RelationshipsPart rp = this.getRelationshipsPart(); if (rp != null) { for (Relationship r : rp.getRelationships().getRelationship()) { Part part = rp.getPart(r); if (part instanceof FooterPart) { Ftr ftr = ((FooterPart) part).getJaxbElement(); finder.walkJAXBElements(ftr); } else if (part instanceof HeaderPart) { Hdr hdr = ((HeaderPart) part).getJaxbElement(); finder.walkJAXBElements(hdr); } } } // Styles in endnotes, footnotes? if (this.getEndNotesPart() != null) { log.debug("Looking at endnotes"); CTEndnotes endnotes = this.getEndNotesPart().getJaxbElement(); finder.walkJAXBElements(endnotes); } if (this.getFootnotesPart() != null) { log.debug("Looking at footnotes"); CTFootnotes footnotes = this.getFootnotesPart().getJaxbElement(); finder.walkJAXBElements(footnotes); } // Comments if (this.getCommentsPart() != null) { log.debug("Looking at comments"); Comments comments = this.getCommentsPart().getJaxbElement(); finder.walkJAXBElements(comments); } // Add fonts used in the styles we discovered // .. 2013 03 10: no longer necessary // Fonts can also be used in the numbering part // For now, treat any font mentioned in that part as in use. // Ideally, we'd only register fonts used in numbering levels // that were actually used in the document if (getNumberingDefinitionsPart() != null) { Numbering numbering = getNumberingDefinitionsPart().getJaxbElement(); for (Numbering.AbstractNum abstractNumNode : numbering.getAbstractNum()) { for (Lvl lvl : abstractNumNode.getLvl()) { if (lvl.getRPr() != null && lvl.getRPr().getRFonts() != null) { String fontName = lvl.getRPr().getRFonts().getAscii(); if (fontName != null) { fontsDiscovered.add(fontName); log.debug( "Registered " + fontName + " for abstract list " + abstractNumNode.getAbstractNumId() + " lvl " + lvl.getIlvl()); } } } } } return fontsDiscovered; }
public static WordprocessingMLPackage createPackage(PageSizePaper sz, boolean landscape) throws InvalidFormatException { // Create a package WordprocessingMLPackage wmlPack = new WordprocessingMLPackage(); // Create main document part MainDocumentPart wordDocumentPart = new MainDocumentPart(); // Create main document part content org.docx4j.wml.ObjectFactory factory = Context.getWmlObjectFactory(); org.docx4j.wml.Body body = factory.createBody(); org.docx4j.wml.Document wmlDocumentEl = factory.createDocument(); wmlDocumentEl.setBody(body); // Create a basic sectPr using our Page model PageDimensions page = new PageDimensions(); page.setPgSize(sz, landscape); SectPr sectPr = factory.createSectPr(); body.setSectPr(sectPr); sectPr.setPgSz(page.getPgSz()); sectPr.setPgMar(page.getPgMar()); // Put the content in the part wordDocumentPart.setJaxbElement(wmlDocumentEl); // Add the main document part to the package relationships // (creating it if necessary) wmlPack.addTargetPart(wordDocumentPart); // Create a styles part Part stylesPart = new org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart(); try { ((org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart) stylesPart) .unmarshalDefaultStyles(); // Add the styles part to the main document part relationships // (creating it if necessary) wordDocumentPart.addTargetPart(stylesPart); // NB - add it to main doc part, not package! } catch (Exception e) { // TODO: handle exception // e.printStackTrace(); log.error(e.getMessage(), e); } // Metadata: docx4j 2.7.1 can populate some of this from docx4j.properties // See SaveToZipFile DocPropsCorePart core = new DocPropsCorePart(); org.docx4j.docProps.core.ObjectFactory coreFactory = new org.docx4j.docProps.core.ObjectFactory(); core.setJaxbElement(coreFactory.createCoreProperties()); wmlPack.addTargetPart(core); DocPropsExtendedPart app = new DocPropsExtendedPart(); org.docx4j.docProps.extended.ObjectFactory extFactory = new org.docx4j.docProps.extended.ObjectFactory(); app.setJaxbElement(extFactory.createProperties()); wmlPack.addTargetPart(app); // Return the new package return wmlPack; }
public static void main(String[] args) throws Exception { WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(); MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); CTSettings ct = new CTSettings(); DocumentSettingsPart dsp = documentPart.getDocumentSettingsPart(); if (dsp == null) { dsp = new DocumentSettingsPart(); CTView ctView = Context.getWmlObjectFactory().createCTView(); ctView.setVal(STView.PRINT); ct.setView(ctView); BooleanDefaultTrue b = new BooleanDefaultTrue(); b.setVal(true); ct.setUpdateFields(b); dsp.setJaxbElement(ct); documentPart.addTargetPart(dsp); } org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart.getJaxbElement(); Body body = wmlDocumentEl.getBody(); ObjectFactory factory = Context.getWmlObjectFactory(); /* * Create the following: * * <w:p> <w:r> <w:fldChar w:dirty="true" w:fldCharType="begin"/> * <w:instrText xml:space="preserve">TOC \o "1-3" \h \z \ u * \h</w:instrText> </w:r> <w:r/> <w:r> <w:fldChar w:fldCharType="end"/> * </w:r> </w:p> */ P paragraphForTOC = factory.createP(); R r = factory.createR(); FldChar fldchar = factory.createFldChar(); fldchar.setFldCharType(STFldCharType.BEGIN); fldchar.setDirty(true); r.getContent().add(getWrappedFldChar(fldchar)); paragraphForTOC.getContent().add(r); R r1 = factory.createR(); Text txt = new Text(); txt.setSpace("preserve"); txt.setValue("TOC \\o \"1-3\" \\h \\z \\u "); r.getContent().add(factory.createRInstrText(txt)); paragraphForTOC.getContent().add(r1); FldChar fldcharend = factory.createFldChar(); fldcharend.setFldCharType(STFldCharType.END); R r2 = factory.createR(); r2.getContent().add(getWrappedFldChar(fldcharend)); paragraphForTOC.getContent().add(r2); body.getContent().add(paragraphForTOC); documentPart.addStyledParagraphOfText("Heading1", "Hello 1"); documentPart.addStyledParagraphOfText("Heading2", "Hello 2"); documentPart.addStyledParagraphOfText("Heading3", "Hello 3"); documentPart.addStyledParagraphOfText("Heading1", "Hello 1"); wordMLPackage.save( new java.io.File(System.getProperty("user.dir") + "/OUT_TableOfContentsAdd.docx")); }
protected static List<ConversionSectionWrapper> processComplete( WordprocessingMLPackage wmlPackage, Document document, RelationshipsPart rels, BooleanDefaultTrue evenAndOddHeaders, boolean dummyPageNumbering) { List<ConversionSectionWrapper> conversionSections = new ArrayList<ConversionSectionWrapper>(); List<Object> sectionContent = new ArrayList<Object>(); ConversionSectionWrapper currentSectionWrapper = null; HeaderFooterPolicy previousHF = null; int conversionSectionIndex = 0; // According to the ECMA-376 2ed, if type is not specified, read it as next page // However Word 2007 sometimes treats it as continuous, and sometimes doesn't?? // 20130216 Review above comment: ! In the Word UI, the Word "continuous" is shown where it is // effective. // In the XML, it is stored in the next following sectPr. // First, remove content controls, // since the P could be in a content control. // (It is easier to remove content controls, than // to make the code below TraversalUtil based) // RemovalHandler is an XSLT-based way of doing this, // but here we avoid introducing a dependency on // XSLT (Xalan) for PDF output. SdtBlockFinder sbr = new SdtBlockFinder(); new TraversalUtil(document.getContent(), sbr); for (int i = sbr.sdtBlocks.size() - 1; i >= 0; i--) { // Have to process in reverse order // so that parentList is correct for nested sdt SdtBlock sdtBlock = sbr.sdtBlocks.get(i); List<Object> parentList = null; if (sdtBlock.getParent() instanceof ArrayList) { parentList = (ArrayList) sdtBlock.getParent(); } else { log.error("Handle " + sdtBlock.getParent().getClass().getName()); } int index = parentList.indexOf(sdtBlock); parentList.remove(index); parentList.addAll(index, sdtBlock.getSdtContent().getContent()); } // if (log.isDebugEnabled()) { // log.debug(XmlUtils.marshaltoString(document, true, true)); // } // Make a list, so it is easy to look at the following sectPr, // which we need to do to handle continuous sections properly List<SectPr> sectPrs = new ArrayList<SectPr>(); for (Object o : document.getBody().getContent()) { if (o instanceof org.docx4j.wml.P) { if (((org.docx4j.wml.P) o).getPPr() != null) { org.docx4j.wml.PPr ppr = ((org.docx4j.wml.P) o).getPPr(); if (ppr.getSectPr() != null) { sectPrs.add(ppr.getSectPr()); } } } } if (document.getBody().getSectPr() != null) { // usual case sectPrs.add(document.getBody().getSectPr()); } else { log.debug("No body level sectPr in document"); // OK if the last object is w:p and it contains a sectPr. List<Object> all = document.getBody().getContent(); Object last = all.get(all.size() - 1); if (last instanceof P && ((P) last).getPPr() != null && ((P) last).getPPr().getSectPr() != null) { // ok log.debug( ".. but last p contains sectPr .. move it"); // so our assumption later about there // being a following section is correct SectPr thisSectPr = ((P) last).getPPr().getSectPr(); document.getBody().setSectPr(thisSectPr); ((P) last).getPPr().setSectPr(null); sectPrs.remove(thisSectPr); } else { document.getBody().setSectPr(Context.getWmlObjectFactory().createSectPr()); sectPrs.add(document.getBody().getSectPr()); } } int sectPrIndex = 0; // includes continuous ones for (Object o : document.getBody().getContent()) { if (o instanceof org.docx4j.wml.P) { if (((org.docx4j.wml.P) o).getPPr() != null) { org.docx4j.wml.PPr ppr = ((org.docx4j.wml.P) o).getPPr(); if (ppr.getSectPr() != null) { // If the *following* section is continuous, don't add *this* section boolean ignoreThisSection = false; SectPr followingSectPr = sectPrs.get(++sectPrIndex); if (followingSectPr.getType() != null && followingSectPr.getType().getVal().equals("continuous")) { ignoreThisSection = true; // If the w:pgSz on the two sections differs, // then Word inserts a page break (ie doesn't treat it as continuous). // If no w:pgSz element is present, then Word defaults // (presumably to Legal? TODO CHECK. There is no default setting in the docx). // Word always inserts a w:pgSz element? PgSz pgSzThis = ppr.getSectPr().getPgSz(); PgSz pgSzNext = followingSectPr.getPgSz(); if (pgSzThis != null && pgSzNext != null) { if (pgSzThis.getH().compareTo(pgSzNext.getH()) != 0) { ignoreThisSection = false; } if (pgSzThis.getW().compareTo(pgSzNext.getW()) != 0) { ignoreThisSection = false; } // Orientation:default is portrait boolean portraitThis = true; if (pgSzThis.getOrient() != null) { portraitThis = pgSzThis.getOrient().equals(STPageOrientation.PORTRAIT); } boolean portraitNext = true; if (pgSzNext.getOrient() != null) { portraitNext = pgSzNext.getOrient().equals(STPageOrientation.PORTRAIT); } if (portraitThis != portraitNext) { ignoreThisSection = false; } } // TODO: handle cases where one or both pgSz elements are missing, // or H or W is missing. // Treat pgSz element missing as Legal size? } if (ignoreThisSection) { // In case there are some headers/footers that get inherited by the next section previousHF = new HeaderFooterPolicy(ppr.getSectPr(), previousHF, rels, evenAndOddHeaders); } else { currentSectionWrapper = createSectionWrapper( ppr.getSectPr(), previousHF, rels, evenAndOddHeaders, ++conversionSectionIndex, sectionContent, dummyPageNumbering); conversionSections.add(currentSectionWrapper); previousHF = currentSectionWrapper.getHeaderFooterPolicy(); sectionContent = new ArrayList<Object>(); } } } } sectionContent.add(o); // System.out.println(XmlUtils.marshaltoString(o, true)); } currentSectionWrapper = createSectionWrapper( document.getBody().getSectPr(), previousHF, rels, evenAndOddHeaders, ++conversionSectionIndex, sectionContent, dummyPageNumbering); conversionSections.add(currentSectionWrapper); return conversionSections; }