@Test public void testHtmlWithTags() throws Exception { final String htmlText = "<html><head><title>Title</title></head>" + "<body><p>this is a test</p></body></html>"; // Create FetchedDatum using data String url = "http://domain.com/page.html"; String contentType = "text/html; charset=utf-8"; HttpHeaders headers = new HttpHeaders(); headers.add(HttpHeaderNames.CONTENT_TYPE, contentType); ContentBytes content = new ContentBytes(htmlText.getBytes("utf-8")); FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0); // Call parser.parse SimpleParser parser = new SimpleParser(new ParserPolicy(), true); ParsedDatum parsedDatum = parser.parse(fetchedDatum); // Now take the resulting HTML, process it using Dom4J SAXReader reader = new SAXReader(new Parser()); reader.setEncoding("UTF-8"); String htmlWithMarkup = parsedDatum.getParsedText(); Document doc = reader.read(new StringInputStream(htmlWithMarkup)); // We have to do helicopter stunts since HTML has a global namespace on it, set // at the <html> element level. XPath xpath = DocumentHelper.createXPath("/xhtml:html/xhtml:body/xhtml:p"); Map<String, String> namespaceUris = new HashMap<String, String>(); namespaceUris.put("xhtml", "http://www.w3.org/1999/xhtml"); xpath.setNamespaceURIs(namespaceUris); Node paragraphNode = xpath.selectSingleNode(doc); Assert.assertNotNull(paragraphNode); Assert.assertEquals("this is a test", paragraphNode.getText()); }
/** * Finds the first occurrence of an element in a document. * * @param document the document to be searched for the element. Must not be null. * @param childName the name of the element to be searched for. Must not be null. * @return the found element or null if it could not be found * @throws NullPointerException thrown if any of the arguments is null */ public static Element findElement(Document document, String childName) { XPath xpath = document.createXPath("//" + childName + "[1]"); Object result = xpath.evaluate(document.getRootElement()); if (result instanceof List) { return null; } return (Element) result; }
private List<String> findValues(String xpathString, Node node) { XPath xpath = getXpath(xpathString); xpath.setNamespaceURIs(prefixNamespaceMap); List<? extends Node> nodeList = xpath.selectNodes(node); List<String> result = new ArrayList<String>(nodeList.size()); for (Node foundNode : nodeList) { result.add(foundNode.getText()); } return result; }
private String findValue(String xpathString, Node node) { XPath xpath = getXpath(xpathString); xpath.setNamespaceURIs(prefixNamespaceMap); Node resultNode = xpath.selectSingleNode(node); if (resultNode != null) { return resultNode.getText(); } else { return null; } }
public StrategyQueryBuilder(String topicsFile, boolean timesKey) throws MalformedURLException, DocumentException { queryProcessors = new ArrayList<QueryProcessor>(); topics = Dom4jUtil.parse(new File(topicsFile)); XPath xPath = topics.createXPath("//topic"); List<Element> topicsElems = xPath.selectNodes(topics); for (Element topic : topicsElems) { QueryParser queryParser = new QueryParser(topic); QueryProcessor queryProcessor = new QueryProcessor(queryParser.getQuery(), timesKey); queryProcessors.add(queryProcessor); } }
public String getNamespaceNodeText( Document document, String namespaceKey, String namespaceValue, String xPath) { HashMap<String, String> xmlMap = new HashMap<String, String>(); // xmlMap.put("tns","http://www.99bill.com/schema/fo/settlement"); xmlMap.put(namespaceKey, namespaceValue); // XPath xpath=document.createXPath("//tns:status"); //要获取哪个节点,改这里就可以了 XPath xpath = document.createXPath(xPath); // 要获取哪个节点,改这里就可以了 xpath.setNamespaceURIs(xmlMap); Element element = (Element) xpath.selectSingleNode(document); return element.getText(); }
public static Document unwrapMessage(Document soapEnvelope) throws InvalidInputFormatException { if (_log.isDebugEnabled()) _log.debug(soapEnvelope.asXML()); Document result = DocumentHelper.createDocument(); Node node = null; synchronized (path) { node = path.selectSingleNode(soapEnvelope); } result.add((Node) node.clone()); return result; }
private void readSignals(Element root, String antFile) throws DocumentException, IOException { SAXReader xmlReader = new SAXReader(); Document antDoc = xmlReader.read(new File(antFile)); XPath xpath = DocumentHelper.createXPath("//hlm:signalListenerConfig"); xpath.setNamespaceURIs(map); List signalNodes = xpath.selectNodes(antDoc); for (Iterator iterator = signalNodes.iterator(); iterator.hasNext(); ) { signaldoc = antDoc; Element propertyNode = (Element) iterator.next(); String signalid = propertyNode.attributeValue("id"); String signaltarget = propertyNode.attributeValue("target"); List existinglist = globalSignalList.get(signaltarget); String failbuild = signalType(signalid, signaldoc); if (existinglist == null) existinglist = new ArrayList<String>(); existinglist.add(signalid + "," + failbuild); globalSignalList.put(signaltarget, existinglist); } }
public List<Publication> toObject(final Document doc) throws XOMException { final List<Publication> l = new ArrayList<Publication>(); List nodes = xpathSelector.selectNodes(doc); for (final Iterator pubs = nodes.iterator(); pubs.hasNext(); ) { final Element p = (Element) pubs.next(); final Publication publication = new Publication(); publication.setName(p.elementText("name")); publication.setDescription(p.elementText("description")); publication.setPubroot(p.elementText("pubroot")); publication.setPreview(p.elementText("cs_preview")); publication.setPrefix(p.elementText("cs_prefix")); publication.setPreviewAsset(p.elementText("cs_previewasset")); final String v = p.elementText("id"); publication.setId(Long.parseLong(v)); l.add(publication); } return l; }
private String signalType(String signalid, Document antDoc) { XPath xpath2 = DocumentHelper.createXPath( "//hlm:signalListenerConfig[@id='" + signalid + "']/signalNotifierInput/signalInput"); xpath2.setNamespaceURIs(map); List signalNodes3 = xpath2.selectNodes(antDoc); for (Iterator iterator3 = signalNodes3.iterator(); iterator3.hasNext(); ) { Element propertyNode3 = (Element) iterator3.next(); String signalinputid = propertyNode3.attributeValue("refid"); XPath xpath3 = DocumentHelper.createXPath("//hlm:signalInput[@id='" + signalinputid + "']"); xpath3.setNamespaceURIs(map); List signalNodes4 = xpath3.selectNodes(antDoc); for (Iterator iterator4 = signalNodes4.iterator(); iterator4.hasNext(); ) { Element propertyNode4 = (Element) iterator4.next(); return propertyNode4.attributeValue("failbuild"); } } return null; }
private boolean accept(Object obj) { if (obj == null) { logger.warn("Applying JXPathFilter to null object."); return false; } if (pattern == null) { logger.warn("Expression for JXPathFilter is not set."); return false; } if (expectedValue == null) { // Handle the special case where the expected value really is null. if (pattern.endsWith("= null") || pattern.endsWith("=null")) { expectedValue = "null"; pattern = pattern.substring(0, pattern.lastIndexOf("=")); } else { if (logger.isInfoEnabled()) { logger.info("Expected value for JXPathFilter is not set, using 'true' by default"); } expectedValue = Boolean.TRUE.toString(); } } Object xpathResult = null; boolean accept = false; Document dom4jDoc; try { dom4jDoc = XMLUtils.toDocument(obj, muleContext); } catch (Exception e) { logger.warn( "JxPath filter rejected message because of an error while parsing XML: " + e.getMessage(), e); return false; } // Payload is XML if (dom4jDoc != null) { if (namespaces == null) { // no namespace defined, let's perform a direct evaluation xpathResult = dom4jDoc.valueOf(pattern); } else { // create an xpath expression with namespaces and evaluate it XPath xpath = DocumentHelper.createXPath(pattern); xpath.setNamespaceURIs(namespaces); xpathResult = xpath.valueOf(dom4jDoc); } } // Payload is a Java object else { if (logger.isDebugEnabled()) { logger.debug("Passing object of type " + obj.getClass().getName() + " to JXPathContext"); } JXPathContext context = JXPathContext.newContext(obj); initialise(context); xpathResult = context.getValue(pattern); } if (logger.isDebugEnabled()) { logger.debug( "JXPathFilter Expression result = '" + xpathResult + "' - Expected value = '" + expectedValue + "'"); } // Compare the XPath result with the expected result. if (xpathResult != null) { accept = xpathResult.toString().equals(expectedValue); } else { // A null result was actually expected. if (expectedValue.equals("null")) { accept = true; } // A null result was not expected, something probably went wrong. else { logger.warn("JXPathFilter expression evaluates to null: " + pattern); } } if (logger.isDebugEnabled()) { logger.debug("JXPathFilter accept object : " + accept); } return accept; }
static { path.setNamespaceURIs(MessageConstants._nsMap); }
public IdMap handle(Element element) throws IOException { XPath idXPath = element.createXPath(idXpath); if (namespaces != null) idXPath.setNamespaceURIs(namespaces); Node id = idXPath.selectSingleNode(element); if (id == null && idXpathSecondary != null) { idXPath = element.createXPath(idXpathSecondary); if (namespaces != null) idXPath.setNamespaceURIs(namespaces); id = idXPath.selectSingleNode(element); } else if (id == null && idXpathSecondary == null) { logger.error("Doc without id field: " + element); return null; } Map<String, String> textFields = new HashMap<String, String>(); Map<String, String> storedFields = new HashMap<String, String>(); Map<String, Field> uniqueFields = new HashMap<String, Field>(); Collection<IdMap.TextField> isolatedFields = new HashSet<IdMap.TextField>(); for (XmlFieldHandler handler : fieldHandlers) { XPath xPath = element.createXPath(handler.getFieldXpath()); if (namespaces != null) xPath.setNamespaceURIs(namespaces); List<Node> nodes = xPath.selectNodes(element); for (Node node : nodes) { FilteredFields fields = handler.getFields(node); // TextFields if (fields != null && fields.getTextFields() != null) { for (Map.Entry<String, String> entry : fields.getTextFields().entrySet()) { String old = textFields.get(entry.getKey()); String newOne = entry.getValue(); if (old == null) textFields.put(entry.getKey(), newOne); else textFields.put(entry.getKey(), old + ' ' + newOne); } } // StoredFields if (fields != null && fields.getStoredTextFields() != null) { for (Map.Entry<String, String> entry : fields.getStoredTextFields().entrySet()) { String old = storedFields.get(entry.getKey()); if (old == null) storedFields.put(entry.getKey(), entry.getValue()); else storedFields.put(entry.getKey(), old + ' ' + entry.getValue()); } } // IsolatedFields if (fields != null && fields.getIsolatedTextFields() != null) { for (FilteredFields.TextField t : fields.getIsolatedTextFields()) { isolatedFields.add(new IdMap.TextField(t.name, t.value)); } } // PreparedFields if (fields != null && fields.getPreparedFields() != null) { for (Field f : fields.getPreparedFields()) { Field old = uniqueFields.get(f.name()); if (old == null) uniqueFields.put(f.name(), f); else logger.error("Not UNIQUE Prepared field " + f.name() + " in doc: " + id.getText()); } } } } if (id instanceof Element) return new IdMap( ((Element) id).getTextTrim(), textFields, storedFields, new ArrayList<Field>(uniqueFields.values()), isolatedFields); else return new IdMap( id.getText().trim(), textFields, storedFields, new ArrayList<Field>(uniqueFields.values()), isolatedFields); }
private boolean accept(Object obj) { if (obj == null) { logger.warn("Applying JXPathFilter to null object."); return false; } if (pattern == null) { logger.warn("Expression for JXPathFilter is not set."); return false; } if (expectedValue == null) { // Handle the special case where the expected value really is null. if (pattern.endsWith("= null") || pattern.endsWith("=null")) { expectedValue = "null"; pattern = pattern.substring(0, pattern.lastIndexOf("=")); } else { if (logger.isInfoEnabled()) { logger.info("Expected value for JXPathFilter is not set, using 'true' by default"); } expectedValue = Boolean.TRUE.toString(); } } Object xpathResult = null; boolean accept = false; // Payload is a DOM Document if (obj instanceof Document) { if (namespaces == null) { // no namespace defined, let's perform a direct evaluation xpathResult = ((Document) obj).valueOf(pattern); } else { // create an xpath expression with namespaces and evaluate it XPath xpath = DocumentHelper.createXPath(pattern); xpath.setNamespaceURIs(namespaces); xpathResult = xpath.valueOf(obj); } } // Payload is a String of XML else if (obj instanceof String) { try { return accept(DocumentHelper.parseText((String) obj)); } catch (DocumentException e) { logger.warn("JXPathFilter unable to parse XML document: " + e.getMessage(), e); if (logger.isDebugEnabled()) logger.debug("XML = " + StringMessageUtils.truncate((String) obj, 200, false)); return false; } } // Payload is a Java object else { if (logger.isDebugEnabled()) { logger.debug("Passing object of type " + obj.getClass().getName() + " to JXPathContext"); } JXPathContext context = JXPathContext.newContext(obj); initialise(context); xpathResult = context.getValue(pattern); } if (logger.isDebugEnabled()) { logger.debug( "JXPathFilter Expression result = '" + xpathResult + "' - Expected value = '" + expectedValue + "'"); } // Compare the XPath result with the expected result. if (xpathResult != null) { accept = xpathResult.toString().equals(expectedValue); } else { // A null result was actually expected. if (expectedValue.equals("null")) { accept = true; } // A null result was not expected, something probably went wrong. else { logger.warn("JXPathFilter expression evaluates to null: " + pattern); } } if (logger.isDebugEnabled()) { logger.debug("JXPathFilter accept object : " + accept); } return accept; }