/**
   * The dummy section wrappers only contains one section with all the document. Therefore any
   * sections within the document are ignored in the conversion process. As it doesn't need to check
   * for sections it is faster and the html-Output only uses one section.<br>
   * It will use the Header/Footer of the body sectPr. This isn't correct, if there are several
   * Sections in the document, but to find the correct SectPr it would need to check the document
   * content - and the aim of this method is a low overhead.
   *
   * @param wmlPackage
   * @param dummyPageNumbering
   * @return
   */
  protected static List<ConversionSectionWrapper> processDummy(
      WordprocessingMLPackage wmlPackage,
      Document document,
      RelationshipsPart rels,
      BooleanDefaultTrue evenAndOddHeaders,
      boolean dummyPageNumbering) {

    List<ConversionSectionWrapper> conversionSections = new ArrayList<ConversionSectionWrapper>();
    ConversionSectionWrapper currentSectionWrapper = null;
    HeaderFooterPolicy previousHF =
        new HeaderFooterPolicy(document.getBody().getSectPr(), null, rels, evenAndOddHeaders);

    // SectionWrapper does work where sectPr is null (ie document has no body level sectPr),
    // so document.getBody().getSectPr() is ok

    currentSectionWrapper =
        createSectionWrapper(
            document.getBody().getSectPr(),
            previousHF,
            rels,
            evenAndOddHeaders,
            1,
            document.getBody().getContent(),
            dummyPageNumbering);
    conversionSections.add(currentSectionWrapper);
    return conversionSections;
  }
Exemplo n.º 2
0
  /**
   * Traverse the document, and return a map of all styles which are used directly in the document.
   * (IE this does not include styles on which others are just BasedOn).
   *
   * @return
   */
  public Set<String> getStylesInUse() {

    org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) this.getJaxbElement();
    Body body = wmlDocumentEl.getBody();

    List<Object> bodyChildren = body.getContent();

    Set<String> stylesInUse = new HashSet<String>();
    FontAndStyleFinder finder = new FontAndStyleFinder(null, null, stylesInUse);
    finder.defaultCharacterStyle = this.getStyleDefinitionsPart().getDefaultCharacterStyle();
    finder.defaultParagraphStyle = this.getStyleDefinitionsPart().getDefaultParagraphStyle();

    new TraversalUtil(bodyChildren, finder);
    finder.finish();

    // Styles in headers, footers?
    RelationshipsPart rp = this.getRelationshipsPart();
    if (rp != null) {
      for (Relationship r : rp.getRelationships().getRelationship()) {
        Part part = rp.getPart(r);
        if (part instanceof FooterPart) {

          Ftr ftr = ((FooterPart) part).getJaxbElement();
          finder.walkJAXBElements(ftr);

        } else if (part instanceof HeaderPart) {

          Hdr hdr = ((HeaderPart) part).getJaxbElement();
          finder.walkJAXBElements(hdr);
        }
      }
    }

    // Styles in endnotes, footnotes?
    if (this.getEndNotesPart() != null) {
      log.debug("Looking at endnotes");
      CTEndnotes endnotes = this.getEndNotesPart().getJaxbElement();
      finder.walkJAXBElements(endnotes);
    }
    if (this.getFootnotesPart() != null) {
      log.debug("Looking at footnotes");
      CTFootnotes footnotes = this.getFootnotesPart().getJaxbElement();
      finder.walkJAXBElements(footnotes);
    }

    // Comments
    if (this.getCommentsPart() != null) {
      log.debug("Looking at comments");
      Comments comments = this.getCommentsPart().getJaxbElement();
      finder.walkJAXBElements(comments);
    }

    return stylesInUse;
  }
Exemplo n.º 3
0
  public static void main(String[] args) throws Exception {

    String inputfilepath = System.getProperty("user.dir") + "/sample-docs/sample-docx.xml";

    WordprocessingMLPackage wordMLPackage =
        WordprocessingMLPackage.load(new java.io.File(inputfilepath));
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();

    org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart.getJaxbElement();
    Body body = wmlDocumentEl.getBody();

    new TraversalUtil(
        body,
        new Callback() {

          String indent = "";

          @Override
          public List<Object> apply(Object o) {

            String text = "";
            if (o instanceof org.docx4j.wml.Text) text = ((org.docx4j.wml.Text) o).getValue();

            System.out.println(indent + o.getClass().getName() + "  \"" + text + "\"");
            return null;
          }

          @Override
          public boolean shouldTraverse(Object o) {
            return true;
          }

          // Depth first
          @Override
          public void walkJAXBElements(Object parent) {

            indent += "    ";

            List children = getChildren(parent);
            if (children != null) {

              for (Object o : children) {

                // if its wrapped in javax.xml.bind.JAXBElement, get its
                // value; this is ok, provided the results of the Callback
                // won't be marshalled
                o = XmlUtils.unwrap(o);

                this.apply(o);

                if (this.shouldTraverse(o)) {
                  walkJAXBElements(o);
                }
              }
            }

            indent = indent.substring(0, indent.length() - 4);
          }

          @Override
          public List<Object> getChildren(Object o) {
            return TraversalUtil.getChildrenImpl(o);
          }
        });
  }
Exemplo n.º 4
0
  /**
   * Traverse the document, looking for fonts which have been applied, either directly, or via a
   * style.
   *
   * @return
   */
  public Set<String> fontsInUse() {

    log.info("fontsInUse..");

    getPropertyResolver(); // this inits our virtual DocDefaults style

    // Setup

    Set<String> fontsDiscovered = new java.util.HashSet<String>();

    //    	// Keep track of styles we encounter, so we can
    //    	// inspect these for fonts
    //    	Set<String> stylesInUse = new java.util.HashSet<String>();
    //
    //		org.docx4j.wml.Styles styles = null;
    //		if (this.getStyleDefinitionsPart()!=null) {
    //			styles = (org.docx4j.wml.Styles)this.getStyleDefinitionsPart().getJaxbElement();
    //		}
    //		// It is convenient to have a HashMap of styles
    //		Map<String, Style> stylesDefined = new java.util.HashMap<String, Style>();
    //		if (styles!=null) {
    //		     for (Iterator<Style> iter = styles.getStyle().iterator(); iter.hasNext();) {
    //		            Style s = iter.next();
    //		            stylesDefined.put(s.getStyleId(), s);
    //		     }
    //		}
    //    // We need to know what fonts and styles are used in the document

    org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) this.getJaxbElement();
    Body body = wmlDocumentEl.getBody();

    List<Object> bodyChildren = body.getContent();

    FontDiscoveryCharacterVisitor visitor = new FontDiscoveryCharacterVisitor(fontsDiscovered);
    RunFontSelector runFontSelector =
        new RunFontSelector(
            (WordprocessingMLPackage) this.pack, visitor, RunFontActionType.DISCOVERY);

    FontAndStyleFinder finder = new FontAndStyleFinder(runFontSelector, fontsDiscovered, null);
    finder.defaultCharacterStyle = this.getStyleDefinitionsPart().getDefaultCharacterStyle();
    finder.defaultParagraphStyle = this.getStyleDefinitionsPart().getDefaultParagraphStyle();
    new TraversalUtil(bodyChildren, finder);
    //		finder.finish();

    fontsDiscovered.add(runFontSelector.getDefaultFont());

    // fonts in headers, footers?
    RelationshipsPart rp = this.getRelationshipsPart();
    if (rp != null) {
      for (Relationship r : rp.getRelationships().getRelationship()) {
        Part part = rp.getPart(r);
        if (part instanceof FooterPart) {

          Ftr ftr = ((FooterPart) part).getJaxbElement();
          finder.walkJAXBElements(ftr);

        } else if (part instanceof HeaderPart) {

          Hdr hdr = ((HeaderPart) part).getJaxbElement();
          finder.walkJAXBElements(hdr);
        }
      }
    }

    // Styles in endnotes, footnotes?
    if (this.getEndNotesPart() != null) {
      log.debug("Looking at endnotes");
      CTEndnotes endnotes = this.getEndNotesPart().getJaxbElement();
      finder.walkJAXBElements(endnotes);
    }
    if (this.getFootnotesPart() != null) {
      log.debug("Looking at footnotes");
      CTFootnotes footnotes = this.getFootnotesPart().getJaxbElement();
      finder.walkJAXBElements(footnotes);
    }

    // Comments
    if (this.getCommentsPart() != null) {
      log.debug("Looking at comments");
      Comments comments = this.getCommentsPart().getJaxbElement();
      finder.walkJAXBElements(comments);
    }

    // Add fonts used in the styles we discovered
    // .. 2013 03 10: no longer necessary

    // Fonts can also be used in the numbering part
    // For now, treat any font mentioned in that part as in use.
    // Ideally, we'd only register fonts used in numbering levels
    // that were actually used in the document
    if (getNumberingDefinitionsPart() != null) {
      Numbering numbering = getNumberingDefinitionsPart().getJaxbElement();
      for (Numbering.AbstractNum abstractNumNode : numbering.getAbstractNum()) {
        for (Lvl lvl : abstractNumNode.getLvl()) {
          if (lvl.getRPr() != null && lvl.getRPr().getRFonts() != null) {
            String fontName = lvl.getRPr().getRFonts().getAscii();
            if (fontName != null) {
              fontsDiscovered.add(fontName);
              log.debug(
                  "Registered "
                      + fontName
                      + " for abstract list "
                      + abstractNumNode.getAbstractNumId()
                      + " lvl "
                      + lvl.getIlvl());
            }
          }
        }
      }
    }

    return fontsDiscovered;
  }
Exemplo n.º 5
0
  public static WordprocessingMLPackage createPackage(PageSizePaper sz, boolean landscape)
      throws InvalidFormatException {

    // Create a package
    WordprocessingMLPackage wmlPack = new WordprocessingMLPackage();

    // Create main document part
    MainDocumentPart wordDocumentPart = new MainDocumentPart();

    // Create main document part content
    org.docx4j.wml.ObjectFactory factory = Context.getWmlObjectFactory();
    org.docx4j.wml.Body body = factory.createBody();
    org.docx4j.wml.Document wmlDocumentEl = factory.createDocument();

    wmlDocumentEl.setBody(body);

    // Create a basic sectPr using our Page model
    PageDimensions page = new PageDimensions();
    page.setPgSize(sz, landscape);

    SectPr sectPr = factory.createSectPr();
    body.setSectPr(sectPr);
    sectPr.setPgSz(page.getPgSz());
    sectPr.setPgMar(page.getPgMar());

    // Put the content in the part
    wordDocumentPart.setJaxbElement(wmlDocumentEl);

    // Add the main document part to the package relationships
    // (creating it if necessary)
    wmlPack.addTargetPart(wordDocumentPart);

    // Create a styles part
    Part stylesPart = new org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart();
    try {
      ((org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart) stylesPart)
          .unmarshalDefaultStyles();

      // Add the styles part to the main document part relationships
      // (creating it if necessary)
      wordDocumentPart.addTargetPart(stylesPart); // NB - add it to main doc part, not package!		

    } catch (Exception e) {
      // TODO: handle exception
      // e.printStackTrace();
      log.error(e.getMessage(), e);
    }

    // Metadata: docx4j 2.7.1 can populate some of this from docx4j.properties
    // See SaveToZipFile
    DocPropsCorePart core = new DocPropsCorePart();
    org.docx4j.docProps.core.ObjectFactory coreFactory =
        new org.docx4j.docProps.core.ObjectFactory();
    core.setJaxbElement(coreFactory.createCoreProperties());
    wmlPack.addTargetPart(core);

    DocPropsExtendedPart app = new DocPropsExtendedPart();
    org.docx4j.docProps.extended.ObjectFactory extFactory =
        new org.docx4j.docProps.extended.ObjectFactory();
    app.setJaxbElement(extFactory.createProperties());
    wmlPack.addTargetPart(app);

    // Return the new package
    return wmlPack;
  }
Exemplo n.º 6
0
  public static void main(String[] args) throws Exception {

    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
    CTSettings ct = new CTSettings();
    DocumentSettingsPart dsp = documentPart.getDocumentSettingsPart();
    if (dsp == null) {
      dsp = new DocumentSettingsPart();
      CTView ctView = Context.getWmlObjectFactory().createCTView();
      ctView.setVal(STView.PRINT);
      ct.setView(ctView);
      BooleanDefaultTrue b = new BooleanDefaultTrue();
      b.setVal(true);
      ct.setUpdateFields(b);
      dsp.setJaxbElement(ct);
      documentPart.addTargetPart(dsp);
    }

    org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart.getJaxbElement();
    Body body = wmlDocumentEl.getBody();

    ObjectFactory factory = Context.getWmlObjectFactory();

    /*
     * Create the following:
     *
     * <w:p> <w:r> <w:fldChar w:dirty="true" w:fldCharType="begin"/>
     * <w:instrText xml:space="preserve">TOC \o &quot;1-3&quot; \h \z \ u
     * \h</w:instrText> </w:r> <w:r/> <w:r> <w:fldChar w:fldCharType="end"/>
     * </w:r> </w:p>
     */
    P paragraphForTOC = factory.createP();
    R r = factory.createR();

    FldChar fldchar = factory.createFldChar();
    fldchar.setFldCharType(STFldCharType.BEGIN);
    fldchar.setDirty(true);
    r.getContent().add(getWrappedFldChar(fldchar));
    paragraphForTOC.getContent().add(r);

    R r1 = factory.createR();
    Text txt = new Text();
    txt.setSpace("preserve");
    txt.setValue("TOC \\o \"1-3\" \\h \\z \\u ");
    r.getContent().add(factory.createRInstrText(txt));
    paragraphForTOC.getContent().add(r1);

    FldChar fldcharend = factory.createFldChar();
    fldcharend.setFldCharType(STFldCharType.END);
    R r2 = factory.createR();
    r2.getContent().add(getWrappedFldChar(fldcharend));
    paragraphForTOC.getContent().add(r2);

    body.getContent().add(paragraphForTOC);

    documentPart.addStyledParagraphOfText("Heading1", "Hello 1");
    documentPart.addStyledParagraphOfText("Heading2", "Hello 2");
    documentPart.addStyledParagraphOfText("Heading3", "Hello 3");
    documentPart.addStyledParagraphOfText("Heading1", "Hello 1");

    wordMLPackage.save(
        new java.io.File(System.getProperty("user.dir") + "/OUT_TableOfContentsAdd.docx"));
  }
  protected static List<ConversionSectionWrapper> processComplete(
      WordprocessingMLPackage wmlPackage,
      Document document,
      RelationshipsPart rels,
      BooleanDefaultTrue evenAndOddHeaders,
      boolean dummyPageNumbering) {
    List<ConversionSectionWrapper> conversionSections = new ArrayList<ConversionSectionWrapper>();
    List<Object> sectionContent = new ArrayList<Object>();
    ConversionSectionWrapper currentSectionWrapper = null;
    HeaderFooterPolicy previousHF = null;
    int conversionSectionIndex = 0;

    // According to the ECMA-376 2ed, if type is not specified, read it as next page
    // However Word 2007 sometimes treats it as continuous, and sometimes doesn't??
    // 20130216 Review above comment: !  In the Word UI, the Word "continuous" is shown where it is
    // effective.
    // In the XML, it is stored in the next following sectPr.

    // First, remove content controls,
    // since the P could be in a content control.
    // (It is easier to remove content controls, than
    //  to make the code below TraversalUtil based)
    // RemovalHandler is an XSLT-based way of doing this,
    // but here we avoid introducing a dependency on
    // XSLT (Xalan) for PDF output.
    SdtBlockFinder sbr = new SdtBlockFinder();
    new TraversalUtil(document.getContent(), sbr);
    for (int i = sbr.sdtBlocks.size() - 1; i >= 0; i--) {
      // Have to process in reverse order
      // so that parentList is correct for nested sdt

      SdtBlock sdtBlock = sbr.sdtBlocks.get(i);
      List<Object> parentList = null;
      if (sdtBlock.getParent() instanceof ArrayList) {
        parentList = (ArrayList) sdtBlock.getParent();
      } else {
        log.error("Handle " + sdtBlock.getParent().getClass().getName());
      }
      int index = parentList.indexOf(sdtBlock);
      parentList.remove(index);
      parentList.addAll(index, sdtBlock.getSdtContent().getContent());
    }

    //		if (log.isDebugEnabled()) {
    //			log.debug(XmlUtils.marshaltoString(document, true, true));
    //		}

    // Make a list, so it is easy to look at the following sectPr,
    // which we need to do to handle continuous sections properly
    List<SectPr> sectPrs = new ArrayList<SectPr>();
    for (Object o : document.getBody().getContent()) {

      if (o instanceof org.docx4j.wml.P) {
        if (((org.docx4j.wml.P) o).getPPr() != null) {
          org.docx4j.wml.PPr ppr = ((org.docx4j.wml.P) o).getPPr();
          if (ppr.getSectPr() != null) {
            sectPrs.add(ppr.getSectPr());
          }
        }
      }
    }

    if (document.getBody().getSectPr() != null) {
      // usual case
      sectPrs.add(document.getBody().getSectPr());

    } else {
      log.debug("No body level sectPr in document");

      // OK if the last object is w:p and it contains a sectPr.
      List<Object> all = document.getBody().getContent();
      Object last = all.get(all.size() - 1);
      if (last instanceof P
          && ((P) last).getPPr() != null
          && ((P) last).getPPr().getSectPr() != null) {
        // ok
        log.debug(
            ".. but last p contains sectPr .. move it"); // so our assumption later about there
                                                         // being a following section is correct

        SectPr thisSectPr = ((P) last).getPPr().getSectPr();
        document.getBody().setSectPr(thisSectPr);
        ((P) last).getPPr().setSectPr(null);
        sectPrs.remove(thisSectPr);

      } else {
        document.getBody().setSectPr(Context.getWmlObjectFactory().createSectPr());
        sectPrs.add(document.getBody().getSectPr());
      }
    }

    int sectPrIndex = 0; // includes continuous ones
    for (Object o : document.getBody().getContent()) {

      if (o instanceof org.docx4j.wml.P) {

        if (((org.docx4j.wml.P) o).getPPr() != null) {

          org.docx4j.wml.PPr ppr = ((org.docx4j.wml.P) o).getPPr();
          if (ppr.getSectPr() != null) {

            // If the *following* section is continuous, don't add *this* section
            boolean ignoreThisSection = false;
            SectPr followingSectPr = sectPrs.get(++sectPrIndex);
            if (followingSectPr.getType() != null
                && followingSectPr.getType().getVal().equals("continuous")) {

              ignoreThisSection = true;

              // If the w:pgSz on the two sections differs,
              // then Word inserts a page break (ie doesn't treat it as continuous).
              // If no w:pgSz element is present, then Word defaults
              // (presumably to Legal? TODO CHECK. There is no default setting in the docx).
              // Word always inserts a w:pgSz element?

              PgSz pgSzThis = ppr.getSectPr().getPgSz();
              PgSz pgSzNext = followingSectPr.getPgSz();

              if (pgSzThis != null && pgSzNext != null) {

                if (pgSzThis.getH().compareTo(pgSzNext.getH()) != 0) {
                  ignoreThisSection = false;
                }
                if (pgSzThis.getW().compareTo(pgSzNext.getW()) != 0) {
                  ignoreThisSection = false;
                }

                // Orientation:default is portrait
                boolean portraitThis = true;
                if (pgSzThis.getOrient() != null) {
                  portraitThis = pgSzThis.getOrient().equals(STPageOrientation.PORTRAIT);
                }
                boolean portraitNext = true;
                if (pgSzNext.getOrient() != null) {
                  portraitNext = pgSzNext.getOrient().equals(STPageOrientation.PORTRAIT);
                }
                if (portraitThis != portraitNext) {
                  ignoreThisSection = false;
                }
              }
              // TODO: handle cases where one or both pgSz elements are missing,
              // or H or W is missing.
              // Treat pgSz element missing as Legal size?
            }

            if (ignoreThisSection) {
              // In case there are some headers/footers that get inherited by the next section
              previousHF =
                  new HeaderFooterPolicy(ppr.getSectPr(), previousHF, rels, evenAndOddHeaders);

            } else {
              currentSectionWrapper =
                  createSectionWrapper(
                      ppr.getSectPr(),
                      previousHF,
                      rels,
                      evenAndOddHeaders,
                      ++conversionSectionIndex,
                      sectionContent,
                      dummyPageNumbering);
              conversionSections.add(currentSectionWrapper);
              previousHF = currentSectionWrapper.getHeaderFooterPolicy();
              sectionContent = new ArrayList<Object>();
            }
          }
        }
      }
      sectionContent.add(o);
      //			System.out.println(XmlUtils.marshaltoString(o, true));
    }

    currentSectionWrapper =
        createSectionWrapper(
            document.getBody().getSectPr(),
            previousHF,
            rels,
            evenAndOddHeaders,
            ++conversionSectionIndex,
            sectionContent,
            dummyPageNumbering);
    conversionSections.add(currentSectionWrapper);
    return conversionSections;
  }