예제 #1
0
  /**
   * Counts the number of characters (text nodes) in this element excluding footnotes etc.
   *
   * @param node the node to count in
   * @return the number of characters
   */
  public static int getCharacterCount(Node node) {
    Node child = node.getFirstChild();
    int nCount = 0;
    while (child != null) {
      short nodeType = child.getNodeType();

      switch (nodeType) {
        case Node.TEXT_NODE:
          nCount += child.getNodeValue().length();
          break;

        case Node.ELEMENT_NODE:
          String sName = child.getNodeName();
          if (sName.equals(XMLString.TEXT_S)) {
            nCount += Misc.getPosInteger(Misc.getAttribute(child, XMLString.TEXT_C), 1);
          } else if (sName.equals(XMLString.TEXT_TAB_STOP)) {
            nCount++; // treat as single space
          } else if (sName.equals(XMLString.TEXT_TAB)) { // oasis
            nCount++; // treat as single space
          } else if (isNoteElement(child)) {
            // ignore
          } else if (isTextElement(child)) {
            nCount += getCharacterCount(child);
          }
      }
      child = child.getNextSibling();
    }
    return nCount;
  }
예제 #2
0
  private void loadStylesFromDOM(
      Document stylesDOM, Document contentDOM, boolean bAllParagraphsAreSoft) {
    // Flat xml: stylesDOM will be null and contentDOM contain everything
    // This is only the case for old versions of xmerge; newer versions
    // creates DOM for styles, content, meta and settings.
    NodeList list;

    // font declarations: Try old format first
    if (stylesDOM == null) {
      list = contentDOM.getElementsByTagName(XMLString.OFFICE_FONT_DECLS);
    } else {
      list = stylesDOM.getElementsByTagName(XMLString.OFFICE_FONT_DECLS);
    }
    // If that fails, try oasis format
    if (list.getLength() == 0) {
      if (stylesDOM == null) {
        list = contentDOM.getElementsByTagName(XMLString.OFFICE_FONT_FACE_DECLS);
      } else {
        list = stylesDOM.getElementsByTagName(XMLString.OFFICE_FONT_FACE_DECLS);
      }
    }

    if (list.getLength() != 0) {
      Node node = list.item(0);
      if (node.hasChildNodes()) {
        NodeList nl = node.getChildNodes();
        int nLen = nl.getLength();
        for (int i = 0; i < nLen; i++) {
          Node child = nl.item(i);
          if (child.getNodeType() == Node.ELEMENT_NODE) {
            if (child.getNodeName().equals(XMLString.STYLE_FONT_DECL)) {
              font.loadStyleFromDOM(child, false);
            } else if (child.getNodeName().equals(XMLString.STYLE_FONT_FACE)) {
              font.loadStyleFromDOM(child, false);
            }
          }
        }
      }
    }

    // soft formatting:
    if (stylesDOM == null) {
      list = contentDOM.getElementsByTagName(XMLString.OFFICE_STYLES);
    } else {
      list = stylesDOM.getElementsByTagName(XMLString.OFFICE_STYLES);
    }
    if (list.getLength() != 0) {
      loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft);
    }

    // master styles:
    if (stylesDOM == null) {
      list = contentDOM.getElementsByTagName(XMLString.OFFICE_MASTER_STYLES);
    } else {
      list = stylesDOM.getElementsByTagName(XMLString.OFFICE_MASTER_STYLES);
    }
    if (list.getLength() != 0) {
      loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft);
    }

    // hard formatting:
    // Load from styles.xml first. Problem: There may be name clashes
    // with automatic styles from content.xml
    if (stylesDOM != null) {
      list = stylesDOM.getElementsByTagName(XMLString.OFFICE_AUTOMATIC_STYLES);
      if (list.getLength() != 0) {
        loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft);
      }
    }
    list = contentDOM.getElementsByTagName(XMLString.OFFICE_AUTOMATIC_STYLES);
    if (list.getLength() != 0) {
      loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft);
    }

    // footnotes configuration:
    if (stylesDOM == null) {
      list = contentDOM.getElementsByTagName(XMLString.TEXT_FOOTNOTES_CONFIGURATION);
    } else {
      list = stylesDOM.getElementsByTagName(XMLString.TEXT_FOOTNOTES_CONFIGURATION);
    }
    if (list.getLength() != 0) {
      footnotes = new PropertySet();
      footnotes.loadFromDOM(list.item(0));
    }

    // endnotes configuration:
    if (stylesDOM == null) {
      list = contentDOM.getElementsByTagName(XMLString.TEXT_ENDNOTES_CONFIGURATION);
    } else {
      list = stylesDOM.getElementsByTagName(XMLString.TEXT_ENDNOTES_CONFIGURATION);
    }
    if (list.getLength() != 0) {
      endnotes = new PropertySet();
      endnotes.loadFromDOM(list.item(0));
    }

    // if it failed, try oasis format
    if (footnotes == null || endnotes == null) {
      if (stylesDOM == null) {
        list = contentDOM.getElementsByTagName(XMLString.TEXT_NOTES_CONFIGURATION);
      } else {
        list = stylesDOM.getElementsByTagName(XMLString.TEXT_NOTES_CONFIGURATION);
      }
      int nLen = list.getLength();
      for (int i = 0; i < nLen; i++) {
        String sClass = Misc.getAttribute(list.item(i), XMLString.TEXT_NOTE_CLASS);
        if ("endnote".equals(sClass)) {
          endnotes = new PropertySet();
          endnotes.loadFromDOM(list.item(i));
        } else {
          footnotes = new PropertySet();
          footnotes.loadFromDOM(list.item(i));
        }
      }
    }
  }
예제 #3
0
 private void loadStylesFromDOM(Node node, boolean bAllParagraphsAreSoft) {
   // node should be office:master-styles, office:styles or office:automatic-styles
   boolean bAutomatic = XMLString.OFFICE_AUTOMATIC_STYLES.equals(node.getNodeName());
   if (node.hasChildNodes()) {
     NodeList nl = node.getChildNodes();
     int nLen = nl.getLength();
     for (int i = 0; i < nLen; i++) {
       Node child = nl.item(i);
       if (child.getNodeType() == Node.ELEMENT_NODE) {
         if (child.getNodeName().equals(XMLString.STYLE_STYLE)) {
           String sFamily = Misc.getAttribute(child, XMLString.STYLE_FAMILY);
           if ("text".equals(sFamily)) {
             text.loadStyleFromDOM(child, bAutomatic);
           } else if ("paragraph".equals(sFamily)) {
             par.loadStyleFromDOM(child, bAutomatic && !bAllParagraphsAreSoft);
           } else if ("section".equals(sFamily)) {
             section.loadStyleFromDOM(child, bAutomatic);
           } else if ("table".equals(sFamily)) {
             table.loadStyleFromDOM(child, bAutomatic);
           } else if ("table-column".equals(sFamily)) {
             column.loadStyleFromDOM(child, bAutomatic);
           } else if ("table-row".equals(sFamily)) {
             row.loadStyleFromDOM(child, bAutomatic);
           } else if ("table-cell".equals(sFamily)) {
             cell.loadStyleFromDOM(child, bAutomatic);
           } else if ("graphics".equals(sFamily)) {
             frame.loadStyleFromDOM(child, bAutomatic);
           } else if ("graphic".equals(sFamily)) { // oasis
             frame.loadStyleFromDOM(child, bAutomatic);
           } else if ("presentation".equals(sFamily)) {
             presentation.loadStyleFromDOM(child, bAutomatic);
           } else if ("drawing-page".equals(sFamily)) {
             // Bug in OOo 1.x: The same name may be used for a real and an automatic style...
             if (drawingPage.getStyle(Misc.getAttribute(child, XMLString.STYLE_NAME)) == null) {
               drawingPage.loadStyleFromDOM(child, bAutomatic);
             }
           }
         } else if (child.getNodeName().equals(XMLString.STYLE_PAGE_MASTER)) { // old
           pageLayout.loadStyleFromDOM(child, bAutomatic);
         } else if (child.getNodeName().equals(XMLString.STYLE_PAGE_LAYOUT)) { // oasis
           pageLayout.loadStyleFromDOM(child, bAutomatic);
         } else if (child.getNodeName().equals(XMLString.STYLE_MASTER_PAGE)) {
           masterPage.loadStyleFromDOM(child, bAutomatic);
           if (firstMasterPage == null) {
             firstMasterPage =
                 (MasterPage) masterPage.getStyle(Misc.getAttribute(child, XMLString.STYLE_NAME));
           }
         } else if (child.getNodeName().equals(XMLString.TEXT_LIST_STYLE)) {
           list.loadStyleFromDOM(child, bAutomatic);
         } else if (child.getNodeName().equals(XMLString.TEXT_OUTLINE_STYLE)) {
           outline.loadStyleFromDOM(child);
         } else if (child.getNodeName().equals(XMLString.STYLE_DEFAULT_STYLE)) {
           String sFamily = Misc.getAttribute(child, XMLString.STYLE_FAMILY);
           if ("paragraph".equals(sFamily)) {
             StyleWithProperties defaultPar = new StyleWithProperties();
             defaultPar.loadStyleFromDOM(child);
             par.setDefaultStyle(defaultPar);
           } else if ("graphics".equals(sFamily) || "graphic".equals(sFamily)) { // oasis: no s
             StyleWithProperties defaultFrame = new StyleWithProperties();
             defaultFrame.loadStyleFromDOM(child);
             frame.setDefaultStyle(defaultFrame);
           } else if ("table-cell".equals(sFamily)) {
             StyleWithProperties defaultCell = new StyleWithProperties();
             defaultCell.loadStyleFromDOM(child);
             cell.setDefaultStyle(defaultCell);
           }
         }
       }
     }
   }
 }
예제 #4
0
  private void traverseContent(Element node) {
    // Handle this node first
    String sName = node.getTagName();
    if (sName.equals(XMLString.TEXT_P)) {
      // collectMasterPage(getParStyle(node.getAttribute(XMLString.TEXT_STYLE_NAME)));
    } else if (sName.equals(XMLString.TEXT_H)) {
      int nLevel;
      if (node.hasAttribute(XMLString.TEXT_OUTLINE_LEVEL)) {
        nLevel = Misc.getPosInteger(node.getAttribute(XMLString.TEXT_OUTLINE_LEVEL), 1);
      } else {
        nLevel = Misc.getPosInteger(node.getAttribute(XMLString.TEXT_LEVEL), 1);
      }
      StyleWithProperties style = getParStyle(node.getAttribute(XMLString.TEXT_STYLE_NAME));
      // collectMasterPage(style);
      if (1 <= nLevel && nLevel <= 10 && heading[nLevel] == null) {
        if (style != null && style.isAutomatic()) {
          heading[nLevel] = getParStyle(style.getParentName());
        } else {
          heading[nLevel] = style;
        }
      }
    } else if (sName.equals(XMLString.TEXT_SEQUENCE)) {
      String sSeqName = Misc.getAttribute(node, XMLString.TEXT_NAME);
      String sRefName = Misc.getAttribute(node, XMLString.TEXT_REF_NAME);
      if (sSeqName != null) {
        Element par = getParagraph(node);
        if (!sequenceNames.containsKey(par)) {
          // Only the first text:seqence should be registered as possible caption sequence
          sequenceNames.put(par, sSeqName);
        }
        if (sRefName != null) {
          seqrefNames.put(sRefName, sSeqName);
        }
      }
    } else if (sName.equals(XMLString.TEXT_FOOTNOTE_REF)) {
      collectRefName(footnoteRef, node);
    } else if (sName.equals(XMLString.TEXT_ENDNOTE_REF)) {
      collectRefName(endnoteRef, node);
    } else if (sName.equals(XMLString.TEXT_NOTE_REF)) { // oasis
      String sClass = Misc.getAttribute(node, XMLString.TEXT_NOTE_CLASS);
      if ("footnote".equals(sClass)) {
        collectRefName(footnoteRef, node);
      } else if ("endnote".equals(sClass)) {
        collectRefName(endnoteRef, node);
      }
    } else if (sName.equals(XMLString.TEXT_REFERENCE_MARK)) {
      collectMarkInHeading(referenceHeading, node);
    } else if (sName.equals(XMLString.TEXT_REFERENCE_MARK_START)) {
      collectMarkInHeading(referenceHeading, node);
    } else if (sName.equals(XMLString.TEXT_REFERENCE_REF)) {
      collectRefName(referenceRef, node);
    } else if (sName.equals(XMLString.TEXT_BOOKMARK)) {
      collectMarkInHeading(bookmarkHeading, node);
    } else if (sName.equals(XMLString.TEXT_BOOKMARK_START)) {
      collectMarkInHeading(bookmarkHeading, node);
    } else if (sName.equals(XMLString.TEXT_BOOKMARK_REF)) {
      collectRefName(bookmarkRef, node);
    } else if (sName.equals(XMLString.TEXT_SEQUENCE_REF)) {
      collectRefName(sequenceRef, node);
    } else if (sName.equals(XMLString.TEXT_A)) {
      String sHref = node.getAttribute(XMLString.XLINK_HREF);
      if (sHref != null && sHref.startsWith("#")) {
        links.add(sHref.substring(1));
      }
    } else if (sName.equals(XMLString.OFFICE_FORMS)) {
      forms.read(node);
    } else if (sName.equals(XMLString.TEXT_TABLE_OF_CONTENT)) {
      TocReader tocReader = new TocReader(node);
      indexes.put(node, tocReader);
      indexSourceStyles.addAll(tocReader.getIndexSourceStyles());
    } else if (sName.equals(XMLString.TEXT_TABLE_INDEX)
        || sName.equals(XMLString.TEXT_ILLUSTRATION_INDEX)) {
      LoftReader loftReader = new LoftReader(node);
      indexes.put(node, loftReader);
      if (loftReader.useCaption()) {
        if (loftReader.isTableIndex()) {
          tableSequenceNames.add(loftReader.getCaptionSequenceName());
        } else {
          figureSequenceNames.add(loftReader.getCaptionSequenceName());
        }
      }
    }
    // todo: other indexes

    // Traverse the children
    Node child = node.getFirstChild();
    while (child != null) {
      if (child.getNodeType() == Node.ELEMENT_NODE) {
        traverseContent((Element) child);
      }
      child = child.getNextSibling();
    }

    // Collect automatic captions sequences
    // Use OOo defaults: Captions have style names Illustration and Table resp.
    if ((sAutoFigureSequenceName == null || sAutoTableSequenceName == null)
        && sName.equals(XMLString.TEXT_P)) {
      String sStyleName =
          getParStyles().getDisplayName(node.getAttribute(XMLString.TEXT_STYLE_NAME));
      if (sAutoFigureSequenceName == null) {
        if ("Illustration".equals(sStyleName)) {
          sAutoFigureSequenceName = getSequenceName(node);
        }
      }
      if (sAutoTableSequenceName == null) {
        if ("Table".equals(sStyleName)) {
          sAutoTableSequenceName = getSequenceName(node);
        }
      }
    }
  }