/** Example of how to find an object in document.xml via traversal (as opposed to XPath) */ public static void main(String[] args) throws Exception { String inputfilepath = System.getProperty("user.dir") + "/checkbox.docx"; WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath)); MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); Finder finder = new Finder(FldChar.class); // <----- change this to suit new TraversalUtil(documentPart.getContent(), finder); System.out.println("got " + finder.results.size() + " of type " + finder.typeToFind.getName()); for (Object o : finder.results) { Object o2 = XmlUtils.unwrap(o); // this is ok, provided the results of the Callback // won't be marshalled if (o2 instanceof org.docx4j.wml.Text) { org.docx4j.wml.Text txt = (org.docx4j.wml.Text) o2; System.out.println(txt.getValue()); } else { System.out.println(XmlUtils.marshaltoString(o, true, true)); } } }
@Override public String getCssProperty() { // Note regarding numbering case; handling of tab after the number:- // We get this right in the PDF case, via setXslFOListBlock below. // We don't attempt to get the tab right in the HTML case, // since without some research, I don't know what markup would be required. String prop = "position: relative; "; BigInteger left = ((Ind) this.getObject()).getLeft(); if (left != null) { prop = prop + composeCss(CSS_NAME, UnitsOfMeasurement.twipToBest(left.intValue())); } // SPEC: The firstLine and hanging attributes are mutually exclusive, if both are specified, // then // the firstLine value is ignored. BigInteger firstline = ((Ind) this.getObject()).getFirstLine(); BigInteger hanging = ((Ind) this.getObject()).getHanging(); if (hanging != null) { prop = prop + composeCss("text-indent", "-" + UnitsOfMeasurement.twipToBest(hanging.intValue())); } else if (firstline != null) { prop = prop + composeCss("text-indent", UnitsOfMeasurement.twipToBest(firstline.intValue())); } if (left == null && firstline == null && hanging == null) { log.debug("What to do with " + XmlUtils.marshaltoString(this.getObject(), true, true)); prop = CSS_NULL; } return prop; }
private void processDescendantCondition(Object sdt, String xpathBase, int index, Tag tag) { Condition c = null; HashMap<String, String> map = QueryString.parseQueryString(tag.getVal(), true); String conditionId = map.get(BINDING_ROLE_CONDITIONAL); if (conditionId != null) { c = ConditionsPart.getConditionById(conditions, conditionId); if (c == null) { log.error("Missing condition " + conditionId); throw new InputIntegrityException("Required condition '" + conditionId + "' is missing"); } // TODO: this code assumes the condition contains // a simple xpath log.debug("Using condition" + XmlUtils.marshaltoString(c, true, true)); Condition newCondition = c.repeat(xpathBase, index, conditions, xPaths); // set sdt to use it map.put(BINDING_ROLE_CONDITIONAL, newCondition.getId()); tag.setVal(QueryString.create(map)); } }
public OpenDoPEHandler(WordprocessingMLPackage wordMLPackage) throws Docx4JException { this.wordMLPackage = wordMLPackage; if (wordMLPackage.getMainDocumentPart().getXPathsPart() == null) { throw new Docx4JException("OpenDoPE XPaths part missing"); } else { xPaths = wordMLPackage.getMainDocumentPart().getXPathsPart().getJaxbElement(); log.debug(XmlUtils.marshaltoString(xPaths, true, true)); } if (wordMLPackage.getMainDocumentPart().getConditionsPart() != null) { conditions = wordMLPackage.getMainDocumentPart().getConditionsPart().getJaxbElement(); log.debug(XmlUtils.marshaltoString(conditions, true, true)); } if (wordMLPackage.getMainDocumentPart().getComponentsPart() != null) { components = wordMLPackage.getMainDocumentPart().getComponentsPart().getJaxbElement(); log.debug(XmlUtils.marshaltoString(components, true, true)); } shallowTraversor = new ShallowTraversor(); shallowTraversor.wordMLPackage = wordMLPackage; }
protected void applyPPr(PPr pPrToApply, PPr effectivePPr) { log.debug( "apply " + XmlUtils.marshaltoString(pPrToApply, true, true) + "\n\r to " + XmlUtils.marshaltoString(effectivePPr, true, true)); if (pPrToApply == null) { return; } List<Property> properties = PropertyFactory.createProperties(wordMLPackage, pPrToApply); for (Property p : properties) { if (p != null) { // log.debug("applying pPr " + p.getClass().getName() ); ((AbstractParagraphProperty) p) .set(effectivePPr); // NB, this new method does not copy. TODO? } } log.debug("result " + XmlUtils.marshaltoString(effectivePPr, true, true)); }
protected void init(Object docxObject) { org.docx4j.wml.P para = null; JAXBIntrospector inspector = Context.jc.createJAXBIntrospector(); if (docxObject == null) {; // implied ParagraphML } else if (inspector.isElement(docxObject)) { Object value = JAXBIntrospector.getValue(docxObject); if (value instanceof org.docx4j.wml.P) { para = (org.docx4j.wml.P) value; this.isDummy = false; } else { // Create a dummy ParagraphML for this unsupported element // TODO: A more informative text content in dummy ParagraphML QName name = inspector.getElementName(docxObject); String renderedText; if (name != null) { renderedText = XmlUtil.getEnclosingTagPair(name); } else { // Should not happen but it could. renderedText = "<w:unknownTag></w:unknownTag>"; log.warn( "init(): Unknown tag was detected for a JAXBElement = " + XmlUtils.marshaltoString(docxObject, true)); } para = ObjectFactory.createP(renderedText); this.isDummy = true; } } else { throw new IllegalArgumentException("Unsupported Docx Object = " + docxObject); } initParagraphProperties(para); initChildren(para); }
/** @param args */ public static void main(String[] args) throws Exception { try { getInputFilePath(args); } catch (IllegalArgumentException e) { // inputfilepath = System.getProperty("user.dir") + "/sample-docs/glox/Hier2Level.glox"; inputfilepath = System.getProperty("user.dir") + "/sample-docs/glox/Picture Organization Chart.glox"; } GloxPackage gloxPackage = GloxPackage.load(new java.io.File(inputfilepath)); String title0 = gloxPackage.getDiagramLayoutHeaderPart().getJaxbElement().getTitle().get(0).getVal(); System.out.println("Title: " + title0); String desc0 = gloxPackage.getDiagramLayoutHeaderPart().getJaxbElement().getDesc().get(0).getVal(); System.out.println("Description: " + desc0); String layoutXml = XmlUtils.marshaltoString(gloxPackage.getDiagramLayoutPart().getJaxbElement(), true, true); System.out.println(layoutXml); }
private List<Object> cloneRepeatSdt(Object sdt, String xpathBase, int numRepeats) { List<Object> newContent = new ArrayList<Object>(); SdtPr sdtPr = getSdtPr(sdt); log.debug(XmlUtils.marshaltoString(sdtPr, true, true)); // CTDataBinding binding = // (CTDataBinding)XmlUtils.unwrap(sdtPr.getDataBinding()); CTDataBinding binding = sdtPr.getDataBinding(); if (binding != null) { // Shouldn't be a binding anyway sdtPr.getRPrOrAliasOrLock().remove(binding); } emptyRepeatTagValue(sdtPr.getTag()); // 2012 07 15: do it to the first one for (int i = 0; i < numRepeats; i++) { // 2012 07 13: for "od:RptPosCon" processing to // work (conditional inclusion dependant on position // in repeat), we need each entry (ie including the // original) to have the same tag (which I've changed // to od:rptd). if (i > 0) { // Change ID sdtPr.setId(); } // preserve ID on index 0, important for OpenDoPEReverter! // Clone newContent.add(XmlUtils.deepCopy(sdt)); } return newContent; }
@Override public List<Object> apply(Object o) { if (o instanceof P) { currentP = createNode(document, NODE_BLOCK); currentSpan = null; if (tc.peek() != null) { tc.peek().appendChild(currentP); } else { parentNode.appendChild(currentP); } pPr = ((P) o).getPPr(); currentP = handlePPr(conversionContext, pPr, false, currentP); } else if (o instanceof org.docx4j.wml.R) { if (!conversionContext.isInComplexFieldDefinition()) { // Convert run to span Element spanEl = createNode(document, NODE_INLINE); currentSpan = spanEl; rPr = ((R) o).getRPr(); if (rPr != null) { handleRPr(conversionContext, pPr, rPr, currentSpan); } if (currentP == null) { // Hyperlink special case parentNode.appendChild(spanEl); } else { rtlAwareAppendChildToCurrentP(spanEl); } // To merge nested span (which we could do if there is a single child span), // TraversalUtil Callback would need an after walk children } } else if (o instanceof org.docx4j.wml.FldChar) { conversionContext.updateComplexFieldDefinition(((org.docx4j.wml.FldChar) o).getFldCharType()); } else if (o instanceof org.docx4j.wml.Text) { if (!conversionContext.isInComplexFieldDefinition()) { if (currentSpan == null) { // eg after <br/> log.error("null currentSpan! " + ((Text) o).getValue()); Element spanEl = createNode(document, NODE_INLINE); if (currentP == null) { // Hyperlink special case parentNode.appendChild(spanEl); } else { currentP.appendChild(spanEl); } currentSpan = spanEl; } log.debug(((Text) o).getValue()); DocumentFragment df = (DocumentFragment) conversionContext.getRunFontSelector().fontSelector(pPr, rPr, ((Text) o)); XmlUtils.treeCopy(df, currentSpan); // TODO would be more efficient without the treeCopy // but fontSelector would need to be refactored a bit } } else if (o instanceof org.docx4j.wml.R.Tab) { convertTabToNode(conversionContext, document); } else if (o instanceof org.docx4j.wml.CTSimpleField) { convertToNode( conversionContext, o, AbstractFldSimpleWriter.WRITER_ID, document, getCurrentParent()); } else if (o instanceof org.docx4j.wml.P.Hyperlink) { convertToNode( conversionContext, o, AbstractHyperlinkWriter.WRITER_ID, document, getCurrentParent()); } else if (o instanceof org.docx4j.wml.CTBookmark) { convertToNode( conversionContext, o, AbstractBookmarkStartWriter.WRITER_ID, document, getCurrentParent()); } else if (o instanceof org.docx4j.wml.Tbl) { convertToNode( conversionContext, o, AbstractTableWriter.WRITER_ID, document, (currentP != null ? currentP : parentNode)); currentP = null; currentSpan = null; } else if (o instanceof org.docx4j.wml.Tr) { // done in walkJAXBElements // tr = document.createElementNS(Namespaces.NS_WORD12, "tr"); // //parentNode is in this case the DocumentFragment, that get's passed // //to the TableModel/TableModelWriter // parentNode.appendChild(tr); } else if (o instanceof org.docx4j.wml.Tc) { // done in walkJAXBElements // tc = document.createElementNS(Namespaces.NS_WORD12, "tc"); // tr.appendChild(tc); // // now the html p content will go temporarily go in w:tc, // // which is what we need for our existing table model. // System.out.println("#wrapped in w:tc OK"); } else if (o instanceof org.docx4j.dml.wordprocessingDrawing.Inline || o instanceof org.docx4j.dml.wordprocessingDrawing.Anchor) { anchorOrInline = o; // keep this until we handle CTBlip } else if (o instanceof org.docx4j.dml.CTBlip) { /*<w:drawing> <wp:inline distT="0" distB="0" distL="0" distR="0"> <a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"> <a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"> <pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"> <pic:blipFill> <a:blip r:embed="rId10" cstate="print"/> */ DocumentFragment foreignFragment = createImage(IMAGE_E20, conversionContext, anchorOrInline); anchorOrInline = null; currentP.appendChild(document.importNode(foreignFragment, true)); } else if (o instanceof org.docx4j.wml.Pict) { /*<w:pict> <v:shape id="_x0000_i1025" type="#_x0000_t75" style="width:428.25pt;height:321pt"> <v:imagedata r:id="rId4" o:title=""/> </v:shape> */ org.docx4j.vml.CTTextbox textBox = getTextBox((org.docx4j.wml.Pict) o); if (textBox == null) { // Assume it contains an image! DocumentFragment foreignFragment = createImage(IMAGE_E10, conversionContext, o); currentP.appendChild(document.importNode(foreignFragment, true)); } else { convertToNode( conversionContext, o, AbstractPictWriter.WRITER_ID, document, getCurrentParent()); } } else if (o instanceof Br) { handleBr((Br) o); } else if (o instanceof org.docx4j.wml.R.Sym) { convertToNode( conversionContext, o, AbstractSymbolWriter.WRITER_ID, document, getCurrentParent()); } else if ((o instanceof org.docx4j.wml.ProofErr) || (o instanceof org.docx4j.wml.R.LastRenderedPageBreak) || (o instanceof org.docx4j.wml.CTMarkupRange)) { // Ignore theese types, they don't need to be outputed/handled // CTMarkupRange is the w:bookmarkEnd } else { getLog().warn("Need to handle " + o.getClass().getName()); log.debug(XmlUtils.marshaltoString(o)); } return null; }
@Override public void apply(SdtElement element, Object parent, List<Object> siblings) { System.out.println(); SdtPr sdtPr = element.getSdtPr(); if (sdtPr == null) { System.out.println( callback.indent + element.getClass().getSimpleName() + " [no sdtPr!]" + " (having parent " + parent.getClass().getSimpleName() + ")"); } else { System.out.println( callback.indent + element.getClass().getSimpleName() + " (having parent " + parent.getClass().getSimpleName() + ")"); CTDataBinding binding = (CTDataBinding) XmlUtils.unwrap(sdtPr.getDataBinding()); if (binding != null) { System.out.println(callback.indent + " binding: " + binding.getXpath()); } Tag tag = sdtPr.getTag(); if (tag == null) return; System.out.println(callback.indent + " " + tag.getVal()); HashMap<String, String> map = QueryString.parseQueryString(tag.getVal(), true); String conditionId = map.get(OpenDoPEHandler.BINDING_ROLE_CONDITIONAL); String repeatId = map.get(OpenDoPEHandler.BINDING_ROLE_REPEAT); String xp = map.get(OpenDoPEHandler.BINDING_ROLE_XPATH); if (conditionId != null) { Condition c = ConditionsPart.getConditionById(conditions, conditionId); if (c == null) { System.out.println(callback.indent + " " + "Missing condition " + conditionId); } if (c.getParticle() instanceof org.opendope.conditions.Xpathref) { org.opendope.conditions.Xpathref xpathRef = (Xpathref) c.getParticle(); if (xpathRef == null) { System.out.println( callback.indent + " " + "Condition " + c.getId() + " references a missing xpath!"); } org.opendope.xpaths.Xpaths.Xpath xpath = XPathsPart.getXPathById(xPaths, xpathRef.getId()); if (xpath == null) { System.out.println( callback.indent + " " + "XPath specified in condition '" + c.getId() + "' is missing!"); } else { System.out.println( callback.indent + " " + xpath.getId() + ": " + xpath.getDataBinding().getXpath()); } } else { System.out.println("Complex condition: " + XmlUtils.marshaltoString(c, true, true)); } } else if (repeatId != null) { org.opendope.xpaths.Xpaths.Xpath xpath = XPathsPart.getXPathById(xPaths, repeatId); if (xpath == null) { System.out.println( callback.indent + " " + "XPath specified in repeat '" + repeatId + "' is missing!"); } else { System.out.println( callback.indent + " " + xpath.getId() + ": " + xpath.getDataBinding().getXpath()); } } else if (xp != null) { org.opendope.xpaths.Xpaths.Xpath xpath = XPathsPart.getXPathById(xPaths, xp); if (xpath == null) { System.out.println( callback.indent + " " + "XPath specified with id '" + xp + "' is missing!"); } else { System.out.println( callback.indent + " " + xpath.getId() + ": " + xpath.getDataBinding().getXpath()); } } } }
/** @param args */ public static void main(String[] args) throws Exception { String inputfilepath = System.getProperty("user.dir") + "/sample-docs/word/databinding/invoice2.docx"; WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath)); filepathprefix = inputfilepath.substring(0, inputfilepath.lastIndexOf(".")); System.out.println(filepathprefix); StringBuilder timingSummary = new StringBuilder(); // Process conditionals and repeats long startTime = System.currentTimeMillis(); OpenDoPEHandler odh = new OpenDoPEHandler(wordMLPackage); odh.preprocess(); long endTime = System.currentTimeMillis(); timingSummary.append("OpenDoPEHandler: " + (endTime - startTime)); System.out.println( XmlUtils.marshaltoString(wordMLPackage.getMainDocumentPart().getJaxbElement(), true, true)); SaveToZipFile saver = new SaveToZipFile(wordMLPackage); saver.save(filepathprefix + "_1_preprocessed.docx"); System.out.println("Saved: " + filepathprefix + "_1_preprocessed.docx"); startTime = System.currentTimeMillis(); OpenDoPEIntegrity odi = new OpenDoPEIntegrity(); odi.process(wordMLPackage); endTime = System.currentTimeMillis(); timingSummary.append("\nOpenDoPEIntegrity: " + (endTime - startTime)); System.out.println( XmlUtils.marshaltoString(wordMLPackage.getMainDocumentPart().getJaxbElement(), true, true)); saver = new SaveToZipFile(wordMLPackage); saver.save(filepathprefix + "_2_integrity.docx"); System.out.println("Saved: " + filepathprefix + "_2_integrity.docx"); // Apply the bindings BindingHandler.setHyperlinkStyle("Hyperlink"); startTime = System.currentTimeMillis(); BindingHandler.applyBindings(wordMLPackage.getMainDocumentPart()); endTime = System.currentTimeMillis(); timingSummary.append("\nBindingHandler.applyBindings: " + (endTime - startTime)); System.out.println( XmlUtils.marshaltoString(wordMLPackage.getMainDocumentPart().getJaxbElement(), true, true)); saver.save(filepathprefix + "_3_bound.docx"); System.out.println("Saved: " + filepathprefix + "_3_bound.docx"); // Either demonstrate reverter, or stripping of controls; // you can't do both. So comment out one or the other. // reverter(inputfilepath, filepathprefix + "_bound.docx"); // // Strip content controls startTime = System.currentTimeMillis(); RemovalHandler rh = new RemovalHandler(); rh.removeSDTs(wordMLPackage, Quantifier.ALL); endTime = System.currentTimeMillis(); timingSummary.append("\nRemovalHandler: " + (endTime - startTime)); saver.save(filepathprefix + "_4_stripped.docx"); System.out.println("Saved: " + filepathprefix + "_4_stripped.docx"); System.out.println(timingSummary); }
public static DocumentFragment createBlockForP( SvgConversionContext context, String lvl, String cNvPrName, String phType, NodeIterator childResults, NodeIterator lvlNpPr) { StyleTree styleTree = null; try { styleTree = context.getPmlPackage().getStyleTree(); } catch (InvalidFormatException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } log.debug("lvl:" + lvl); int level; if (lvl.equals("NaN")) { level = 1; } else { level = Integer.parseInt(lvl); } String pStyleVal; System.out.println("cNvPrName: " + cNvPrName + "; " + "phType: " + phType); if (cNvPrName.toLowerCase().indexOf("subtitle") > -1 || phType.toLowerCase().indexOf("subtitle") > -1) { // Subtitle on first page in default layout is styled as a Body. pStyleVal = "Lvl" + level + "Master" + context.getResolvedLayout().getMasterNumber() + "Body"; } else if (cNvPrName.toLowerCase().indexOf("title") > -1 || phType.toLowerCase().indexOf("title") > -1) { pStyleVal = "Lvl" + level + "Master" + context.getResolvedLayout().getMasterNumber() + "Title"; } else { // eg cNvPrName: TextBox 2; phType: pStyleVal = "Lvl" + level + "Master" + context.getResolvedLayout().getMasterNumber() + "Other"; } System.out.println("--> " + pStyleVal); try { // Create a DOM builder and parse the fragment Document document = XmlUtils.getNewDocumentBuilder().newDocument(); // log.info("Document: " + document.getClass().getName() ); Node xhtmlP = document.createElement("p"); document.appendChild(xhtmlP); // Set @class log.debug(pStyleVal); Tree<AugmentedStyle> pTree = styleTree.getParagraphStylesTree(); org.docx4j.model.styles.Node<AugmentedStyle> asn = pTree.get(pStyleVal); ((Element) xhtmlP).setAttribute("class", StyleTree.getHtmlClassAttributeValue(pTree, asn)); StringBuilder inlineStyle = new StringBuilder(); // Do we have CTTextParagraphProperties // <a:lvl?pPr> // Convert it to a WordML pPr CTTextParagraphProperties lvlPPr = unmarshalFormatting(lvlNpPr); if (lvlPPr != null) { log.debug("We have lvlPPr"); log.debug( XmlUtils.marshaltoString( lvlPPr, true, true, Context.jcPML, "FIXME", "lvl1pPr", CTTextParagraphProperties.class)); PPr pPr = TextStyles.getWmlPPr(lvlPPr); if (pPr != null) { HtmlCssHelper.createCss(context.getPmlPackage(), pPr, inlineStyle, false, false); } // TODO RPR } // Without this, top-margin is too large in Webkit (Midor). // Not tested elsewhere... inlineStyle.append("margin-left:3px; margin-top:3px;"); if (!inlineStyle.toString().equals("")) { ((Element) xhtmlP).setAttribute("style", inlineStyle.toString()); } // Our fo:block wraps whatever result tree fragment // our style sheet produced when it applied-templates // to the child nodes // init Node n = childResults.nextNode(); do { // getNumberXmlNode creates a span node, which is empty // if there is no numbering. // Let's get rid of any such <span/>. // What we actually get is a document node if (n.getNodeType() == Node.DOCUMENT_NODE) { log.debug("handling DOCUMENT_NODE"); // Do just enough of the handling here NodeList nodes = n.getChildNodes(); if (nodes != null) { for (int i = 0; i < nodes.getLength(); i++) { if (((Node) nodes.item(i)).getLocalName().equals("span") && !((Node) nodes.item(i)).hasChildNodes()) { // ignore log.debug(".. ignoring <span/> "); } else { XmlUtils.treeCopy((Node) nodes.item(i), xhtmlP); } } } } else { // log.info("Node we are importing: " + n.getClass().getName() ); // foBlockElement.appendChild( // document.importNode(n, true) ); /* * Node we'd like to import is of type org.apache.xml.dtm.ref.DTMNodeProxy * which causes * org.w3c.dom.DOMException: NOT_SUPPORTED_ERR: The implementation does not support the requested type of object or operation. * * See http://osdir.com/ml/text.xml.xerces-j.devel/2004-04/msg00066.html * * So instead of importNode, use */ XmlUtils.treeCopy(n, xhtmlP); } // next n = childResults.nextNode(); } while (n != null); DocumentFragment docfrag = document.createDocumentFragment(); docfrag.appendChild(document.getDocumentElement()); return docfrag; } catch (Exception e) { log.error(e.getMessage(), e); } return null; }