private boolean shouldSkip(StartElement se) { try { if (skips == null) { skips = new HashSet<StartElement>(); XMLEventFactory xef = StAXEventFactoryPool.getInstance().acquire(); Set<Attribute> attrs; skips.add( xef.createStartElement( new QName(Namespaces.Z2005_DTBOOK_NS_URI, "pagenum"), null, null)); skips.add( xef.createStartElement( new QName(Namespaces.Z2005_DTBOOK_NS_URI, "noteref"), null, null)); skips.add( xef.createStartElement( new QName(Namespaces.Z2005_DTBOOK_NS_URI, "annoref"), null, null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "page-normal")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "page-front")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "page-special")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "noteref")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); StAXEventFactoryPool.getInstance().release(xef); } for (StartElement test : skips) { boolean matchesName = true; boolean matchesAttrs = true; if (se.getName().equals(test.getName())) { Iterator<?> iter = test.getAttributes(); // real elem must have all attrs of test elem while (iter.hasNext()) { Attribute testAttr = (Attribute) iter.next(); Attribute realAttr = AttributeByName.get(testAttr.getName(), se); if (realAttr == null || !realAttr.getValue().equals(testAttr.getValue())) { matchesAttrs = false; } } } else { matchesName = false; } if (matchesName && matchesAttrs) return true; } } catch (Exception e) { e.printStackTrace(); } return false; }
private void scramble(EFile input) throws XMLStreamException, IOException { File output = new File( input.getParentFile(), input.getNameMinusExtension() + ".scrambled." + input.getExtension()); Map<String, Object> xifProperties = null; Map<String, Object> xofProperties = null; XMLInputFactory xif = null; XMLOutputFactory xof = null; XMLEventWriter writer = null; XMLEventFactory xef = null; FileInputStream fis = null; FileOutputStream fos = null; try { xifProperties = StAXInputFactoryPool.getInstance().getDefaultPropertyMap(false); xofProperties = StAXOutputFactoryPool.getInstance().getDefaultPropertyMap(); xif = StAXInputFactoryPool.getInstance().acquire(xifProperties); xif.setXMLResolver(new StaxEntityResolver(CatalogEntityResolver.getInstance())); xof = StAXOutputFactoryPool.getInstance().acquire(xofProperties); xef = StAXEventFactoryPool.getInstance().acquire(); fis = new FileInputStream(input); fos = new FileOutputStream(output); XMLEventReader baseReader = xif.createXMLEventReader(fis); writer = xof.createXMLEventWriter(fos); BookmarkedXMLEventReader reader = new BookmarkedXMLEventReader(baseReader); ContextStack context = new ContextStack(true); boolean skipElemTextScrambling = false; int c = 0; while (reader.hasNext()) { XMLEvent xe = reader.nextEvent(); context.addEvent(xe); if (xe.isStartElement()) { skipElemTextScrambling = shouldSkip(xe.asStartElement()); if (isMetaElement(xe.asStartElement())) { xe = handleMetaElement(xe.asStartElement(), xef); } else if (isImageElement(xe.asStartElement())) { xe = handleImageElement(xe.asStartElement(), xef); } else if (isAcronymElement(xe.asStartElement())) { xe = handleAcronymElement(xe.asStartElement(), xef); } else if (isLinkElement(xe.asStartElement())) { xe = handleLinkElement(xe.asStartElement(), xef); } else if (isAnchorElement(xe.asStartElement())) { xe = handleAnchorElement(xe.asStartElement(), xef); } } else if (xe.isCharacters() && !skipElemTextScrambling && !CharUtils.isXMLWhiteSpace(xe.asCharacters().getData())) { xe = xef.createCharacters( "[" + Integer.toString(++c) + "] " + context.getContextXPath( ContextStack.XPATH_SELECT_ELEMENTS_ONLY, ContextStack.XPATH_PREDICATES_NONE)); } else if (xe.getEventType() == XMLEvent.PROCESSING_INSTRUCTION) { xe = handleProcessingInstruction((ProcessingInstruction) xe, xef); } else if (xe.isEndElement()) { skipElemTextScrambling = false; } if (xe != null) writer.add(xe); } } catch (CatalogExceptionNotRecoverable e) { e.printStackTrace(); } finally { fis.close(); fos.close(); writer.flush(); writer.close(); StAXInputFactoryPool.getInstance().release(xif, xifProperties); StAXOutputFactoryPool.getInstance().release(xof, xofProperties); StAXEventFactoryPool.getInstance().release(xef); } }