Пример #1
0
  @Test
  public void testHtmlWithTags() throws Exception {
    final String htmlText =
        "<html><head><title>Title</title></head>" + "<body><p>this is a test</p></body></html>";

    // Create FetchedDatum using data
    String url = "http://domain.com/page.html";
    String contentType = "text/html; charset=utf-8";
    HttpHeaders headers = new HttpHeaders();
    headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
    ContentBytes content = new ContentBytes(htmlText.getBytes("utf-8"));
    FetchedDatum fetchedDatum =
        new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);

    // Call parser.parse
    SimpleParser parser = new SimpleParser(new ParserPolicy(), true);
    ParsedDatum parsedDatum = parser.parse(fetchedDatum);

    // Now take the resulting HTML, process it using Dom4J
    SAXReader reader = new SAXReader(new Parser());
    reader.setEncoding("UTF-8");
    String htmlWithMarkup = parsedDatum.getParsedText();
    Document doc = reader.read(new StringInputStream(htmlWithMarkup));

    // We have to do helicopter stunts since HTML has a global namespace on it, set
    // at the <html> element level.
    XPath xpath = DocumentHelper.createXPath("/xhtml:html/xhtml:body/xhtml:p");
    Map<String, String> namespaceUris = new HashMap<String, String>();
    namespaceUris.put("xhtml", "http://www.w3.org/1999/xhtml");
    xpath.setNamespaceURIs(namespaceUris);

    Node paragraphNode = xpath.selectSingleNode(doc);
    Assert.assertNotNull(paragraphNode);
    Assert.assertEquals("this is a test", paragraphNode.getText());
  }
 /**
  * Finds the first occurrence of an element in a document.
  *
  * @param document the document to be searched for the element. Must not be null.
  * @param childName the name of the element to be searched for. Must not be null.
  * @return the found element or null if it could not be found
  * @throws NullPointerException thrown if any of the arguments is null
  */
 public static Element findElement(Document document, String childName) {
   XPath xpath = document.createXPath("//" + childName + "[1]");
   Object result = xpath.evaluate(document.getRootElement());
   if (result instanceof List) {
     return null;
   }
   return (Element) result;
 }
Пример #3
0
 private List<String> findValues(String xpathString, Node node) {
   XPath xpath = getXpath(xpathString);
   xpath.setNamespaceURIs(prefixNamespaceMap);
   List<? extends Node> nodeList = xpath.selectNodes(node);
   List<String> result = new ArrayList<String>(nodeList.size());
   for (Node foundNode : nodeList) {
     result.add(foundNode.getText());
   }
   return result;
 }
Пример #4
0
 private String findValue(String xpathString, Node node) {
   XPath xpath = getXpath(xpathString);
   xpath.setNamespaceURIs(prefixNamespaceMap);
   Node resultNode = xpath.selectSingleNode(node);
   if (resultNode != null) {
     return resultNode.getText();
   } else {
     return null;
   }
 }
Пример #5
0
 public StrategyQueryBuilder(String topicsFile, boolean timesKey)
     throws MalformedURLException, DocumentException {
   queryProcessors = new ArrayList<QueryProcessor>();
   topics = Dom4jUtil.parse(new File(topicsFile));
   XPath xPath = topics.createXPath("//topic");
   List<Element> topicsElems = xPath.selectNodes(topics);
   for (Element topic : topicsElems) {
     QueryParser queryParser = new QueryParser(topic);
     QueryProcessor queryProcessor = new QueryProcessor(queryParser.getQuery(), timesKey);
     queryProcessors.add(queryProcessor);
   }
 }
Пример #6
0
  public String getNamespaceNodeText(
      Document document, String namespaceKey, String namespaceValue, String xPath) {

    HashMap<String, String> xmlMap = new HashMap<String, String>();
    // xmlMap.put("tns","http://www.99bill.com/schema/fo/settlement");
    xmlMap.put(namespaceKey, namespaceValue);

    // XPath xpath=document.createXPath("//tns:status"); //要获取哪个节点,改这里就可以了
    XPath xpath = document.createXPath(xPath); // 要获取哪个节点,改这里就可以了
    xpath.setNamespaceURIs(xmlMap);
    Element element = (Element) xpath.selectSingleNode(document);

    return element.getText();
  }
Пример #7
0
 public static Document unwrapMessage(Document soapEnvelope) throws InvalidInputFormatException {
   if (_log.isDebugEnabled()) _log.debug(soapEnvelope.asXML());
   Document result = DocumentHelper.createDocument();
   Node node = null;
   synchronized (path) {
     node = path.selectSingleNode(soapEnvelope);
   }
   result.add((Node) node.clone());
   return result;
 }
Пример #8
0
  private void readSignals(Element root, String antFile) throws DocumentException, IOException {
    SAXReader xmlReader = new SAXReader();
    Document antDoc = xmlReader.read(new File(antFile));

    XPath xpath = DocumentHelper.createXPath("//hlm:signalListenerConfig");
    xpath.setNamespaceURIs(map);
    List signalNodes = xpath.selectNodes(antDoc);
    for (Iterator iterator = signalNodes.iterator(); iterator.hasNext(); ) {
      signaldoc = antDoc;
      Element propertyNode = (Element) iterator.next();
      String signalid = propertyNode.attributeValue("id");

      String signaltarget = propertyNode.attributeValue("target");
      List existinglist = globalSignalList.get(signaltarget);
      String failbuild = signalType(signalid, signaldoc);
      if (existinglist == null) existinglist = new ArrayList<String>();
      existinglist.add(signalid + "," + failbuild);
      globalSignalList.put(signaltarget, existinglist);
    }
  }
Пример #9
0
  public List<Publication> toObject(final Document doc) throws XOMException {
    final List<Publication> l = new ArrayList<Publication>();
    List nodes = xpathSelector.selectNodes(doc);

    for (final Iterator pubs = nodes.iterator(); pubs.hasNext(); ) {
      final Element p = (Element) pubs.next();
      final Publication publication = new Publication();
      publication.setName(p.elementText("name"));
      publication.setDescription(p.elementText("description"));
      publication.setPubroot(p.elementText("pubroot"));
      publication.setPreview(p.elementText("cs_preview"));
      publication.setPrefix(p.elementText("cs_prefix"));
      publication.setPreviewAsset(p.elementText("cs_previewasset"));

      final String v = p.elementText("id");
      publication.setId(Long.parseLong(v));
      l.add(publication);
    }

    return l;
  }
Пример #10
0
  private String signalType(String signalid, Document antDoc) {
    XPath xpath2 =
        DocumentHelper.createXPath(
            "//hlm:signalListenerConfig[@id='" + signalid + "']/signalNotifierInput/signalInput");
    xpath2.setNamespaceURIs(map);
    List signalNodes3 = xpath2.selectNodes(antDoc);

    for (Iterator iterator3 = signalNodes3.iterator(); iterator3.hasNext(); ) {
      Element propertyNode3 = (Element) iterator3.next();
      String signalinputid = propertyNode3.attributeValue("refid");

      XPath xpath3 = DocumentHelper.createXPath("//hlm:signalInput[@id='" + signalinputid + "']");
      xpath3.setNamespaceURIs(map);
      List signalNodes4 = xpath3.selectNodes(antDoc);
      for (Iterator iterator4 = signalNodes4.iterator(); iterator4.hasNext(); ) {
        Element propertyNode4 = (Element) iterator4.next();
        return propertyNode4.attributeValue("failbuild");
      }
    }
    return null;
  }
Пример #11
0
  private boolean accept(Object obj) {
    if (obj == null) {
      logger.warn("Applying JXPathFilter to null object.");
      return false;
    }
    if (pattern == null) {
      logger.warn("Expression for JXPathFilter is not set.");
      return false;
    }
    if (expectedValue == null) {
      // Handle the special case where the expected value really is null.
      if (pattern.endsWith("= null") || pattern.endsWith("=null")) {
        expectedValue = "null";
        pattern = pattern.substring(0, pattern.lastIndexOf("="));
      } else {
        if (logger.isInfoEnabled()) {
          logger.info("Expected value for JXPathFilter is not set, using 'true' by default");
        }
        expectedValue = Boolean.TRUE.toString();
      }
    }

    Object xpathResult = null;
    boolean accept = false;

    Document dom4jDoc;
    try {
      dom4jDoc = XMLUtils.toDocument(obj, muleContext);
    } catch (Exception e) {
      logger.warn(
          "JxPath filter rejected message because of an error while parsing XML: " + e.getMessage(),
          e);
      return false;
    }

    // Payload is XML
    if (dom4jDoc != null) {
      if (namespaces == null) {
        // no namespace defined, let's perform a direct evaluation
        xpathResult = dom4jDoc.valueOf(pattern);
      } else {
        // create an xpath expression with namespaces and evaluate it
        XPath xpath = DocumentHelper.createXPath(pattern);
        xpath.setNamespaceURIs(namespaces);
        xpathResult = xpath.valueOf(dom4jDoc);
      }
    }
    // Payload is a Java object
    else {
      if (logger.isDebugEnabled()) {
        logger.debug("Passing object of type " + obj.getClass().getName() + " to JXPathContext");
      }
      JXPathContext context = JXPathContext.newContext(obj);
      initialise(context);
      xpathResult = context.getValue(pattern);
    }

    if (logger.isDebugEnabled()) {
      logger.debug(
          "JXPathFilter Expression result = '"
              + xpathResult
              + "' -  Expected value = '"
              + expectedValue
              + "'");
    }
    // Compare the XPath result with the expected result.
    if (xpathResult != null) {
      accept = xpathResult.toString().equals(expectedValue);
    } else {
      // A null result was actually expected.
      if (expectedValue.equals("null")) {
        accept = true;
      }
      // A null result was not expected, something probably went wrong.
      else {
        logger.warn("JXPathFilter expression evaluates to null: " + pattern);
      }
    }

    if (logger.isDebugEnabled()) {
      logger.debug("JXPathFilter accept object  : " + accept);
    }

    return accept;
  }
Пример #12
0
 static {
   path.setNamespaceURIs(MessageConstants._nsMap);
 }
Пример #13
0
  public IdMap handle(Element element) throws IOException {
    XPath idXPath = element.createXPath(idXpath);
    if (namespaces != null) idXPath.setNamespaceURIs(namespaces);

    Node id = idXPath.selectSingleNode(element);
    if (id == null && idXpathSecondary != null) {
      idXPath = element.createXPath(idXpathSecondary);
      if (namespaces != null) idXPath.setNamespaceURIs(namespaces);
      id = idXPath.selectSingleNode(element);
    } else if (id == null && idXpathSecondary == null) {
      logger.error("Doc without id field: " + element);
      return null;
    }

    Map<String, String> textFields = new HashMap<String, String>();
    Map<String, String> storedFields = new HashMap<String, String>();
    Map<String, Field> uniqueFields = new HashMap<String, Field>();
    Collection<IdMap.TextField> isolatedFields = new HashSet<IdMap.TextField>();
    for (XmlFieldHandler handler : fieldHandlers) {
      XPath xPath = element.createXPath(handler.getFieldXpath());
      if (namespaces != null) xPath.setNamespaceURIs(namespaces);
      List<Node> nodes = xPath.selectNodes(element);
      for (Node node : nodes) {
        FilteredFields fields = handler.getFields(node);
        // TextFields
        if (fields != null && fields.getTextFields() != null) {
          for (Map.Entry<String, String> entry : fields.getTextFields().entrySet()) {
            String old = textFields.get(entry.getKey());
            String newOne = entry.getValue();
            if (old == null) textFields.put(entry.getKey(), newOne);
            else textFields.put(entry.getKey(), old + ' ' + newOne);
          }
        }
        // StoredFields
        if (fields != null && fields.getStoredTextFields() != null) {
          for (Map.Entry<String, String> entry : fields.getStoredTextFields().entrySet()) {
            String old = storedFields.get(entry.getKey());
            if (old == null) storedFields.put(entry.getKey(), entry.getValue());
            else storedFields.put(entry.getKey(), old + ' ' + entry.getValue());
          }
        }

        // IsolatedFields
        if (fields != null && fields.getIsolatedTextFields() != null) {
          for (FilteredFields.TextField t : fields.getIsolatedTextFields()) {
            isolatedFields.add(new IdMap.TextField(t.name, t.value));
          }
        }
        // PreparedFields
        if (fields != null && fields.getPreparedFields() != null) {
          for (Field f : fields.getPreparedFields()) {
            Field old = uniqueFields.get(f.name());
            if (old == null) uniqueFields.put(f.name(), f);
            else logger.error("Not UNIQUE Prepared field " + f.name() + " in doc: " + id.getText());
          }
        }
      }
    }
    if (id instanceof Element)
      return new IdMap(
          ((Element) id).getTextTrim(),
          textFields,
          storedFields,
          new ArrayList<Field>(uniqueFields.values()),
          isolatedFields);
    else
      return new IdMap(
          id.getText().trim(),
          textFields,
          storedFields,
          new ArrayList<Field>(uniqueFields.values()),
          isolatedFields);
  }
Пример #14
0
  private boolean accept(Object obj) {
    if (obj == null) {
      logger.warn("Applying JXPathFilter to null object.");
      return false;
    }
    if (pattern == null) {
      logger.warn("Expression for JXPathFilter is not set.");
      return false;
    }
    if (expectedValue == null) {
      // Handle the special case where the expected value really is null.
      if (pattern.endsWith("= null") || pattern.endsWith("=null")) {
        expectedValue = "null";
        pattern = pattern.substring(0, pattern.lastIndexOf("="));
      } else {
        if (logger.isInfoEnabled()) {
          logger.info("Expected value for JXPathFilter is not set, using 'true' by default");
        }
        expectedValue = Boolean.TRUE.toString();
      }
    }

    Object xpathResult = null;
    boolean accept = false;

    // Payload is a DOM Document
    if (obj instanceof Document) {
      if (namespaces == null) {
        // no namespace defined, let's perform a direct evaluation
        xpathResult = ((Document) obj).valueOf(pattern);
      } else {
        // create an xpath expression with namespaces and evaluate it
        XPath xpath = DocumentHelper.createXPath(pattern);
        xpath.setNamespaceURIs(namespaces);
        xpathResult = xpath.valueOf(obj);
      }

    }
    // Payload is a String of XML
    else if (obj instanceof String) {
      try {
        return accept(DocumentHelper.parseText((String) obj));
      } catch (DocumentException e) {
        logger.warn("JXPathFilter unable to parse XML document: " + e.getMessage(), e);
        if (logger.isDebugEnabled())
          logger.debug("XML = " + StringMessageUtils.truncate((String) obj, 200, false));
        return false;
      }
    }
    // Payload is a Java object
    else {
      if (logger.isDebugEnabled()) {
        logger.debug("Passing object of type " + obj.getClass().getName() + " to JXPathContext");
      }
      JXPathContext context = JXPathContext.newContext(obj);
      initialise(context);
      xpathResult = context.getValue(pattern);
    }

    if (logger.isDebugEnabled()) {
      logger.debug(
          "JXPathFilter Expression result = '"
              + xpathResult
              + "' -  Expected value = '"
              + expectedValue
              + "'");
    }
    // Compare the XPath result with the expected result.
    if (xpathResult != null) {
      accept = xpathResult.toString().equals(expectedValue);
    } else {
      // A null result was actually expected.
      if (expectedValue.equals("null")) {
        accept = true;
      }
      // A null result was not expected, something probably went wrong.
      else {
        logger.warn("JXPathFilter expression evaluates to null: " + pattern);
      }
    }

    if (logger.isDebugEnabled()) {
      logger.debug("JXPathFilter accept object  : " + accept);
    }

    return accept;
  }