/** * Counts the number of characters (text nodes) in this element excluding footnotes etc. * * @param node the node to count in * @return the number of characters */ public static int getCharacterCount(Node node) { Node child = node.getFirstChild(); int nCount = 0; while (child != null) { short nodeType = child.getNodeType(); switch (nodeType) { case Node.TEXT_NODE: nCount += child.getNodeValue().length(); break; case Node.ELEMENT_NODE: String sName = child.getNodeName(); if (sName.equals(XMLString.TEXT_S)) { nCount += Misc.getPosInteger(Misc.getAttribute(child, XMLString.TEXT_C), 1); } else if (sName.equals(XMLString.TEXT_TAB_STOP)) { nCount++; // treat as single space } else if (sName.equals(XMLString.TEXT_TAB)) { // oasis nCount++; // treat as single space } else if (isNoteElement(child)) { // ignore } else if (isTextElement(child)) { nCount += getCharacterCount(child); } } child = child.getNextSibling(); } return nCount; }
private void loadStylesFromDOM( Document stylesDOM, Document contentDOM, boolean bAllParagraphsAreSoft) { // Flat xml: stylesDOM will be null and contentDOM contain everything // This is only the case for old versions of xmerge; newer versions // creates DOM for styles, content, meta and settings. NodeList list; // font declarations: Try old format first if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.OFFICE_FONT_DECLS); } else { list = stylesDOM.getElementsByTagName(XMLString.OFFICE_FONT_DECLS); } // If that fails, try oasis format if (list.getLength() == 0) { if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.OFFICE_FONT_FACE_DECLS); } else { list = stylesDOM.getElementsByTagName(XMLString.OFFICE_FONT_FACE_DECLS); } } if (list.getLength() != 0) { Node node = list.item(0); if (node.hasChildNodes()) { NodeList nl = node.getChildNodes(); int nLen = nl.getLength(); for (int i = 0; i < nLen; i++) { Node child = nl.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { if (child.getNodeName().equals(XMLString.STYLE_FONT_DECL)) { font.loadStyleFromDOM(child, false); } else if (child.getNodeName().equals(XMLString.STYLE_FONT_FACE)) { font.loadStyleFromDOM(child, false); } } } } } // soft formatting: if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.OFFICE_STYLES); } else { list = stylesDOM.getElementsByTagName(XMLString.OFFICE_STYLES); } if (list.getLength() != 0) { loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft); } // master styles: if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.OFFICE_MASTER_STYLES); } else { list = stylesDOM.getElementsByTagName(XMLString.OFFICE_MASTER_STYLES); } if (list.getLength() != 0) { loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft); } // hard formatting: // Load from styles.xml first. Problem: There may be name clashes // with automatic styles from content.xml if (stylesDOM != null) { list = stylesDOM.getElementsByTagName(XMLString.OFFICE_AUTOMATIC_STYLES); if (list.getLength() != 0) { loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft); } } list = contentDOM.getElementsByTagName(XMLString.OFFICE_AUTOMATIC_STYLES); if (list.getLength() != 0) { loadStylesFromDOM(list.item(0), bAllParagraphsAreSoft); } // footnotes configuration: if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.TEXT_FOOTNOTES_CONFIGURATION); } else { list = stylesDOM.getElementsByTagName(XMLString.TEXT_FOOTNOTES_CONFIGURATION); } if (list.getLength() != 0) { footnotes = new PropertySet(); footnotes.loadFromDOM(list.item(0)); } // endnotes configuration: if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.TEXT_ENDNOTES_CONFIGURATION); } else { list = stylesDOM.getElementsByTagName(XMLString.TEXT_ENDNOTES_CONFIGURATION); } if (list.getLength() != 0) { endnotes = new PropertySet(); endnotes.loadFromDOM(list.item(0)); } // if it failed, try oasis format if (footnotes == null || endnotes == null) { if (stylesDOM == null) { list = contentDOM.getElementsByTagName(XMLString.TEXT_NOTES_CONFIGURATION); } else { list = stylesDOM.getElementsByTagName(XMLString.TEXT_NOTES_CONFIGURATION); } int nLen = list.getLength(); for (int i = 0; i < nLen; i++) { String sClass = Misc.getAttribute(list.item(i), XMLString.TEXT_NOTE_CLASS); if ("endnote".equals(sClass)) { endnotes = new PropertySet(); endnotes.loadFromDOM(list.item(i)); } else { footnotes = new PropertySet(); footnotes.loadFromDOM(list.item(i)); } } } }
private void loadStylesFromDOM(Node node, boolean bAllParagraphsAreSoft) { // node should be office:master-styles, office:styles or office:automatic-styles boolean bAutomatic = XMLString.OFFICE_AUTOMATIC_STYLES.equals(node.getNodeName()); if (node.hasChildNodes()) { NodeList nl = node.getChildNodes(); int nLen = nl.getLength(); for (int i = 0; i < nLen; i++) { Node child = nl.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { if (child.getNodeName().equals(XMLString.STYLE_STYLE)) { String sFamily = Misc.getAttribute(child, XMLString.STYLE_FAMILY); if ("text".equals(sFamily)) { text.loadStyleFromDOM(child, bAutomatic); } else if ("paragraph".equals(sFamily)) { par.loadStyleFromDOM(child, bAutomatic && !bAllParagraphsAreSoft); } else if ("section".equals(sFamily)) { section.loadStyleFromDOM(child, bAutomatic); } else if ("table".equals(sFamily)) { table.loadStyleFromDOM(child, bAutomatic); } else if ("table-column".equals(sFamily)) { column.loadStyleFromDOM(child, bAutomatic); } else if ("table-row".equals(sFamily)) { row.loadStyleFromDOM(child, bAutomatic); } else if ("table-cell".equals(sFamily)) { cell.loadStyleFromDOM(child, bAutomatic); } else if ("graphics".equals(sFamily)) { frame.loadStyleFromDOM(child, bAutomatic); } else if ("graphic".equals(sFamily)) { // oasis frame.loadStyleFromDOM(child, bAutomatic); } else if ("presentation".equals(sFamily)) { presentation.loadStyleFromDOM(child, bAutomatic); } else if ("drawing-page".equals(sFamily)) { // Bug in OOo 1.x: The same name may be used for a real and an automatic style... if (drawingPage.getStyle(Misc.getAttribute(child, XMLString.STYLE_NAME)) == null) { drawingPage.loadStyleFromDOM(child, bAutomatic); } } } else if (child.getNodeName().equals(XMLString.STYLE_PAGE_MASTER)) { // old pageLayout.loadStyleFromDOM(child, bAutomatic); } else if (child.getNodeName().equals(XMLString.STYLE_PAGE_LAYOUT)) { // oasis pageLayout.loadStyleFromDOM(child, bAutomatic); } else if (child.getNodeName().equals(XMLString.STYLE_MASTER_PAGE)) { masterPage.loadStyleFromDOM(child, bAutomatic); if (firstMasterPage == null) { firstMasterPage = (MasterPage) masterPage.getStyle(Misc.getAttribute(child, XMLString.STYLE_NAME)); } } else if (child.getNodeName().equals(XMLString.TEXT_LIST_STYLE)) { list.loadStyleFromDOM(child, bAutomatic); } else if (child.getNodeName().equals(XMLString.TEXT_OUTLINE_STYLE)) { outline.loadStyleFromDOM(child); } else if (child.getNodeName().equals(XMLString.STYLE_DEFAULT_STYLE)) { String sFamily = Misc.getAttribute(child, XMLString.STYLE_FAMILY); if ("paragraph".equals(sFamily)) { StyleWithProperties defaultPar = new StyleWithProperties(); defaultPar.loadStyleFromDOM(child); par.setDefaultStyle(defaultPar); } else if ("graphics".equals(sFamily) || "graphic".equals(sFamily)) { // oasis: no s StyleWithProperties defaultFrame = new StyleWithProperties(); defaultFrame.loadStyleFromDOM(child); frame.setDefaultStyle(defaultFrame); } else if ("table-cell".equals(sFamily)) { StyleWithProperties defaultCell = new StyleWithProperties(); defaultCell.loadStyleFromDOM(child); cell.setDefaultStyle(defaultCell); } } } } } }
private void traverseContent(Element node) { // Handle this node first String sName = node.getTagName(); if (sName.equals(XMLString.TEXT_P)) { // collectMasterPage(getParStyle(node.getAttribute(XMLString.TEXT_STYLE_NAME))); } else if (sName.equals(XMLString.TEXT_H)) { int nLevel; if (node.hasAttribute(XMLString.TEXT_OUTLINE_LEVEL)) { nLevel = Misc.getPosInteger(node.getAttribute(XMLString.TEXT_OUTLINE_LEVEL), 1); } else { nLevel = Misc.getPosInteger(node.getAttribute(XMLString.TEXT_LEVEL), 1); } StyleWithProperties style = getParStyle(node.getAttribute(XMLString.TEXT_STYLE_NAME)); // collectMasterPage(style); if (1 <= nLevel && nLevel <= 10 && heading[nLevel] == null) { if (style != null && style.isAutomatic()) { heading[nLevel] = getParStyle(style.getParentName()); } else { heading[nLevel] = style; } } } else if (sName.equals(XMLString.TEXT_SEQUENCE)) { String sSeqName = Misc.getAttribute(node, XMLString.TEXT_NAME); String sRefName = Misc.getAttribute(node, XMLString.TEXT_REF_NAME); if (sSeqName != null) { Element par = getParagraph(node); if (!sequenceNames.containsKey(par)) { // Only the first text:seqence should be registered as possible caption sequence sequenceNames.put(par, sSeqName); } if (sRefName != null) { seqrefNames.put(sRefName, sSeqName); } } } else if (sName.equals(XMLString.TEXT_FOOTNOTE_REF)) { collectRefName(footnoteRef, node); } else if (sName.equals(XMLString.TEXT_ENDNOTE_REF)) { collectRefName(endnoteRef, node); } else if (sName.equals(XMLString.TEXT_NOTE_REF)) { // oasis String sClass = Misc.getAttribute(node, XMLString.TEXT_NOTE_CLASS); if ("footnote".equals(sClass)) { collectRefName(footnoteRef, node); } else if ("endnote".equals(sClass)) { collectRefName(endnoteRef, node); } } else if (sName.equals(XMLString.TEXT_REFERENCE_MARK)) { collectMarkInHeading(referenceHeading, node); } else if (sName.equals(XMLString.TEXT_REFERENCE_MARK_START)) { collectMarkInHeading(referenceHeading, node); } else if (sName.equals(XMLString.TEXT_REFERENCE_REF)) { collectRefName(referenceRef, node); } else if (sName.equals(XMLString.TEXT_BOOKMARK)) { collectMarkInHeading(bookmarkHeading, node); } else if (sName.equals(XMLString.TEXT_BOOKMARK_START)) { collectMarkInHeading(bookmarkHeading, node); } else if (sName.equals(XMLString.TEXT_BOOKMARK_REF)) { collectRefName(bookmarkRef, node); } else if (sName.equals(XMLString.TEXT_SEQUENCE_REF)) { collectRefName(sequenceRef, node); } else if (sName.equals(XMLString.TEXT_A)) { String sHref = node.getAttribute(XMLString.XLINK_HREF); if (sHref != null && sHref.startsWith("#")) { links.add(sHref.substring(1)); } } else if (sName.equals(XMLString.OFFICE_FORMS)) { forms.read(node); } else if (sName.equals(XMLString.TEXT_TABLE_OF_CONTENT)) { TocReader tocReader = new TocReader(node); indexes.put(node, tocReader); indexSourceStyles.addAll(tocReader.getIndexSourceStyles()); } else if (sName.equals(XMLString.TEXT_TABLE_INDEX) || sName.equals(XMLString.TEXT_ILLUSTRATION_INDEX)) { LoftReader loftReader = new LoftReader(node); indexes.put(node, loftReader); if (loftReader.useCaption()) { if (loftReader.isTableIndex()) { tableSequenceNames.add(loftReader.getCaptionSequenceName()); } else { figureSequenceNames.add(loftReader.getCaptionSequenceName()); } } } // todo: other indexes // Traverse the children Node child = node.getFirstChild(); while (child != null) { if (child.getNodeType() == Node.ELEMENT_NODE) { traverseContent((Element) child); } child = child.getNextSibling(); } // Collect automatic captions sequences // Use OOo defaults: Captions have style names Illustration and Table resp. if ((sAutoFigureSequenceName == null || sAutoTableSequenceName == null) && sName.equals(XMLString.TEXT_P)) { String sStyleName = getParStyles().getDisplayName(node.getAttribute(XMLString.TEXT_STYLE_NAME)); if (sAutoFigureSequenceName == null) { if ("Illustration".equals(sStyleName)) { sAutoFigureSequenceName = getSequenceName(node); } } if (sAutoTableSequenceName == null) { if ("Table".equals(sStyleName)) { sAutoTableSequenceName = getSequenceName(node); } } } }