private XMLEvent handleAnchorElement(StartElement se, XMLEventFactory xef) { Set<Attribute> attrs = new HashSet<Attribute>(); Attribute href = AttributeByName.get(new QName("href"), se); if (href != null) { String value = "http"; if (href.getValue().toLowerCase().contains(("mailto"))) { value = "mailto:[email protected]"; } else if (href.getValue().trim().startsWith("#")) { value = href.getValue(); } else if (href.getValue().toLowerCase().contains("smil")) { value = href.getValue(); } else { value = "http://dummy.org"; } attrs.add(xef.createAttribute(href.getName(), value)); } Iterator<?> i = se.getAttributes(); while (i.hasNext()) { Attribute a = (Attribute) i.next(); if (!a.getName().getLocalPart().equals("href")) { attrs.add(a); } } return xef.createStartElement(se.getName(), attrs.iterator(), se.getNamespaces()); }
private XMLEvent handleLinkElement(StartElement se, XMLEventFactory xef) { // remove all links since they may refer to copyrighted stuff // unless its a css link in which case we create a new one Attribute type = AttributeByName.get(new QName("type"), se); Attribute rel = AttributeByName.get(new QName("rel"), se); if ((type != null && type.getValue().toLowerCase().equals("text/css")) || (rel != null && rel.getValue().toLowerCase().equals("stylesheet"))) { Set<Attribute> attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute(new QName("href"), "dummy.css")); Iterator<?> i = se.getAttributes(); while (i.hasNext()) { Attribute a = (Attribute) i.next(); if (a.getName().getLocalPart() != "href") { attrs.add(a); } } return xef.createStartElement(se.getName(), attrs.iterator(), se.getNamespaces()); } return null; }
private boolean shouldSkip(StartElement se) { try { if (skips == null) { skips = new HashSet<StartElement>(); XMLEventFactory xef = StAXEventFactoryPool.getInstance().acquire(); Set<Attribute> attrs; skips.add( xef.createStartElement( new QName(Namespaces.Z2005_DTBOOK_NS_URI, "pagenum"), null, null)); skips.add( xef.createStartElement( new QName(Namespaces.Z2005_DTBOOK_NS_URI, "noteref"), null, null)); skips.add( xef.createStartElement( new QName(Namespaces.Z2005_DTBOOK_NS_URI, "annoref"), null, null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "page-normal")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "page-front")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "page-special")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); attrs = new HashSet<Attribute>(); attrs.add(xef.createAttribute("class", "noteref")); skips.add( xef.createStartElement( new QName(Namespaces.XHTML_10_NS_URI, "span"), attrs.iterator(), null)); StAXEventFactoryPool.getInstance().release(xef); } for (StartElement test : skips) { boolean matchesName = true; boolean matchesAttrs = true; if (se.getName().equals(test.getName())) { Iterator<?> iter = test.getAttributes(); // real elem must have all attrs of test elem while (iter.hasNext()) { Attribute testAttr = (Attribute) iter.next(); Attribute realAttr = AttributeByName.get(testAttr.getName(), se); if (realAttr == null || !realAttr.getValue().equals(testAttr.getValue())) { matchesAttrs = false; } } } else { matchesName = false; } if (matchesName && matchesAttrs) return true; } } catch (Exception e) { e.printStackTrace(); } return false; }