public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String strId = ""; String strBody = ""; // Parse the xml and read data (page id and article body) // Using XOM library Builder builder = new Builder(); try { Document doc = builder.build(value.toString(), null); Nodes nodeId = doc.query("//eecs485_article_id"); strId = nodeId.get(0).getChild(0).getValue(); Nodes nodeBody = doc.query("//eecs485_article_body"); strBody = nodeBody.get(0).getChild(0).getValue(); } catch (ParsingException ex) { System.out.println("Not well-formed."); System.out.println(ex.getMessage()); } catch (IOException ex) { System.out.println("io exception"); } // Tokenize document body Pattern pattern = Pattern.compile("\\w+"); Matcher matcher = pattern.matcher(strBody); while (matcher.find()) { // Write the parsed token // key = term, docid value = 1 context.write(new Text(matcher.group() + "," + strId), one); } }
@Override public void execute_node( SesameDataSet dataset, String expression, TriplesMap parentTriplesMap, RMLPerformer performer, Object node, Resource subject) { // still need to make it work with more nore-results // currently it handles only one if (expression.startsWith("/")) expression = expression.substring(1); Node node2 = (Node) node; Nodes nodes = node2.query(expression, nsContext); for (int i = 0; i < nodes.size(); i++) { Node n = nodes.get(i); if (subject == null) performer.perform(n, dataset, parentTriplesMap); else { RMLProcessorFactory factory = new ConcreteRMLProcessorFactory(); RMLProcessor subprocessor = factory.create(map.getLogicalSource().getReferenceFormulation()); RMLPerformer subperformer = new NodeRMLPerformer(subprocessor); subperformer.perform(n, dataset, parentTriplesMap, subject); } } }
public static void runCommentExamples(String templateResourceName, String baseUri) { Element templateElement = getTemplate(templateResourceName, baseUri); Nodes templatesWithExamples = templateElement.query(".//template[comment[@class='" + EXAMPLE_INPUT + "' and @id]]"); for (int i = 0; i < templatesWithExamples.size(); i++) { runCommentExamples((Element) templatesWithExamples.get(i)); } }
public static String getAttributeValue(Nodes attributes) { if (attributes.hasAny()) { Node attribute = attributes.get(0); if (attribute instanceof Attribute) { return attribute.getValue(); } } return ""; }
public List<CMLMolecule> getToplevelMoleculeDescendants() { moleculeList = new ArrayList<CMLMolecule>(); if (rootElement != null) { Nodes nodes = rootElement.query(TOPLEVEL_MOLECULE_DESCENDANTS, CML_XPATH); for (int i = 0; i < nodes.size(); i++) { moleculeList.add((CMLMolecule) nodes.get(i)); } } return moleculeList; }
public List<CMLMolecule> getImmediateMoleculeChildren() { moleculeList = new ArrayList<CMLMolecule>(); if (rootElement != null) { Nodes nodes = rootElement.query(IMMEDIATE_MOLECULE_CHILDREN, CML_XPATH); for (int i = 0; i < nodes.size(); i++) { moleculeList.add((CMLMolecule) nodes.get(i)); } } return moleculeList; }
public List<CMLReaction> getToplevelReactionDescendants() { reactionList = new ArrayList<CMLReaction>(); if (rootElement != null) { Nodes nodes = rootElement.query(TOPLEVEL_REACTION_DESCENDANTS, CML_XPATH); for (int i = 0; i < nodes.size(); i++) { reactionList.add((CMLReaction) nodes.get(i)); } } return reactionList; }
public List<CMLReaction> getImmediateReactionChildren() { reactionList = new ArrayList<CMLReaction>(); if (rootElement != null) { Nodes nodes = rootElement.query(IMMEDIATE_REACTION_CHILDREN, CML_XPATH); for (int i = 0; i < nodes.size(); i++) { reactionList.add((CMLReaction) nodes.get(i)); } } return reactionList; }
private static Element getOutputElement(Element template, String id) { Nodes nodes = template.query("comment[@class='" + EXAMPLE_OUTPUT + "' and @id='" + id + "']"); if (nodes.size() != 1) { throw new RuntimeException( "Missing output for: " + id + "; found " + nodes.size() + " nodes"); } Element content = (Element) nodes.get(0).getChild(0); if (content == null) { throw new RuntimeException("output must have content: " + id); } return content; }
/** * True if a linkend has the value of id and carrier is not descendant of glosslist * * @param id * @param linkends * @param glosslist * @return */ private boolean referencedFromOutside(String id, Nodes linkends, Element glosslist) { for (int i = 0; i < linkends.size(); i++) { Element linkend = (Element) linkends.get(i); String ref = linkend.getAttributeValue("linkend"); if (id.equals(ref)) { if (getAncestor(linkend, "glosslist", dbns) == null) { return true; } } } return false; }
/** * Process a XPath expression against an XML node * * @param node * @param expression * @return value that matches expression */ private List<String> extractValueFromNode(Node node, String expression) { DefaultNamespaceContext dnc = get_namespaces(); List<String> list = new ArrayList<>(); if (expression.startsWith("count(")) { // Nodes result = node.query(expression, nsContext); String result; try { result = execute(node, expression); list.add(result.toString()); } catch (SaxonApiException ex) { Logger.getLogger(XPathProcessor.class.getName()).log(Level.SEVERE, null, ex); } } else { // if there's nothing to uniquelly identify, use # - temporary solution - challenge if (expression.equals("#")) { list.add(Integer.toString(enumerator++)); return list; } Nodes nodes = node.query(expression, nsContext); for (int i = 0; i < nodes.size(); i++) { Node n = nodes.get(i); // checks if the node has a value or children if (!n.getValue().isEmpty() || (n.getChildCount() != 0)) // MVS's for extracting elements and not the string /*if (!(n instanceof Attribute) && n.getChild(0) instanceof Element) { list.add(n.toXML()); } else { list.add(n.getValue()); }*/ // checks if the node has children, then cleans up new lines and extra spaces if (!(n instanceof Attribute) && n.getChildCount() > 1) list.add( n.getValue() .trim() .replaceAll("[\\t\\n\\r]", " ") .replaceAll(" +", " ") .replaceAll("\\( ", "\\(") .replaceAll(" \\)", "\\)") .replaceAll(" :", ":") .replaceAll(" ,", ",")); else list.add(n.getValue().toString()); } } return list; }
/** * In the given glosslist (assuming 1 per doc) 1) collect all glossentries that are referenced * from outside the current glosslist 2) collect all glossentries that are referenced from the * entries in 1 */ private Document process(Document doc) { Nodes linkends = doc.getRootElement().query("//db:*[@linkend]", con); Nodes glosslists = doc.getRootElement().query("//db:glosslist", con); if (glosslists.size() > 1) throw new IllegalArgumentException("only one glosslist per doc"); if (glosslists.size() == 0) return doc; Element glosslist = (Element) glosslists.get(0); // 1) collect all glossentries that are referenced from outside the current glosslist List<Element> usedGlossEntries = new LinkedList<Element>(); Elements allGlossentries = glosslist.getChildElements("glossentry", dbns); for (int k = 0; k < allGlossentries.size(); k++) { Element glossentry = allGlossentries.get(k); String id = glossentry.getAttributeValue("id", xmlns); if (referencedFromOutside(id, linkends, glosslist)) { if (!contains(usedGlossEntries, glossentry)) { usedGlossEntries.add(glossentry); } } } // 2: go through usedEntries nested linkends, and add any referenced glossEntries // recursively until list stops growing while (true) { List<Element> moreUsedGlossEntries = recurse(glosslist, allGlossentries, usedGlossEntries); if (moreUsedGlossEntries.size() == 0) { break; } for (Element more : moreUsedGlossEntries) { if (!contains(usedGlossEntries, more)) { usedGlossEntries.add(more); } } } // finally, remove any unused glossentries for (int k = 0; k < allGlossentries.size(); k++) { Element glossentry = allGlossentries.get(k); if (!contains(usedGlossEntries, glossentry)) { glossentry.getParent().removeChild(glossentry); } } return doc; }
private List<Element> recurse( Element glosslist, Elements allGlossentries, List<Element> usedGlossEntries) { List<Element> newUsedGlossEntries = new LinkedList<Element>(); for (Element usedEntry : usedGlossEntries) { Nodes linkends = usedEntry.query(".//db:*[@linkend]", con); for (int i = 0; i < linkends.size(); i++) { Element referer = (Element) linkends.get(i); String ref = referer.getAttributeValue("linkend"); Element referredTo = getGlossEntryByID(ref, allGlossentries); if (referredTo != null && !contains(usedGlossEntries, referredTo)) { newUsedGlossEntries.add(referredTo); } } } return newUsedGlossEntries; }
@Test public void update() throws ValidityException, ParsingException, IOException, MojoExecutionException { Document pom = new Builder().build(new File(new File("src/it/reflector"), "pom.xml")); Artifact artifact = new DefaultArtifact("net.stickycode", "sticky-coercion", "jar", "", "[3.1,4)"); new StickyBoundsMojo().updateDependency(pom, artifact, "[3.6,4)"); XPathContext context = new XPathContext("mvn", "http://maven.apache.org/POM/4.0.0"); Nodes versions = pom.query("//mvn:version", context); assertThat(versions.size()).isEqualTo(3); Nodes nodes = pom.query("//mvn:version[text()='[3.6,4)']", context); assertThat(nodes.size()).isEqualTo(1); Node node = nodes.get(0); assertThat(node.getValue()).isEqualTo("[3.6,4)"); }
public void loadLocationsAsync(String kml) { List<LotLocation> locations = new ArrayList<LotLocation>(); try { XMLReader parser = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); InputStream is = new ByteArrayInputStream(kml.getBytes()); // build out an XML document using TagSoup Document doc = new Builder(parser).build(is); // set the ns of the document as the XPathContext or we will not find the elements when we // attempt to parse XPathContext context = new XPathContext("ns", "http://www.w3.org/1999/xhtml"); // get the Placemark nodes within the data Nodes nodes = doc.query(".//ns:Placemark", context); for (int index = 0; index < nodes.size(); index++) { LotLocation placemark = new LotLocation(); Node placemarkNode = nodes.get(index); Node nameNode = placemarkNode.query("ns:name", context).get(0); if (nameNode != null) placemark.setLocation(nameNode.getValue()); Node descriptionNode = placemarkNode.query("ns:description", context).get(0); if (descriptionNode != null) placemark.setDescription(descriptionNode.getValue()); Node lnglatNode = placemarkNode.query("ns:Point/ns:coordinates", context).get(0); if (lnglatNode != null) { // get longitude,latitude,altitude, per KML spec String[] points = lnglatNode.getValue().split(","); placemark.setPoint( new LatLng( Double.parseDouble(points[1].trim()), Double.parseDouble(points[0].trim()))); } locations.add(placemark); } // spin off a new thread and load locations new LoadLocationsTask().execute(locations); } catch (Exception e) { Log.e("LoadLocationsTask", "Failure attempting to load locations", e); } }
public static void runCommentExamples(Element template) { Text2XMLTemplateConverter tc = new Text2XMLTemplateConverter(template); Nodes exampleInputComments = template.query("comment[@class='" + EXAMPLE_INPUT + "' and @id]"); if (exampleInputComments.size() == 0) { throw new RuntimeException("No examples found"); } for (int j = 0; j < exampleInputComments.size(); j++) { Element exampleInput = (Element) exampleInputComments.get(j); String id = exampleInput.getAttributeValue(ID); if (id == null) { throw new RuntimeException("outputElement must have id: "); } Element outputElement = getOutputElement(template, id); if (outputElement == null) { throw new RuntimeException("Cannot create OutputElement: " + id); } String exampleContent = exampleInput.getValue(); Element outputXML = parseText(tc, exampleContent); JumboTestUtils.assertEqualsCanonically("template", outputElement, outputXML, true); } }
@Test public void updateTheClassifier() throws ValidityException, ParsingException, IOException, MojoExecutionException { Document pom = new Builder() .build( new BufferedReader( new InputStreamReader(getClass().getResourceAsStream("classifiers.xml")))); Artifact artifact = new DefaultArtifact("net.stickycode", "sticky-coercion", "jar", "test-jar", "[2.1,4)"); new StickyBoundsMojo().updateDependency(pom, artifact, "[2.6,3)"); XPathContext context = new XPathContext("mvn", "http://maven.apache.org/POM/4.0.0"); Nodes versions = pom.query("//mvn:version", context); assertThat(versions.size()).isEqualTo(4); Nodes nodes = pom.query("//mvn:version[text()='[2.6,3)']", context); assertThat(nodes.size()).isEqualTo(1); Node node = nodes.get(0); assertThat(node.getValue()).isEqualTo("[2.6,3)"); }
public VariableLike getVariableByName(Referable referable, String variableName) throws NavigationException { Objects.requireNonNull(variableName, "VariableName must not be null!"); NodeHelper element = new NodeHelper(referable); String elementName = element.getLocalName(); if ("scope".equals(elementName) || "process".equals(elementName)) { Nodes variable = element .toXOM() .query("./bpel:variables/bpel:variable[@name='" + variableName + "']", CONTEXT); if (variable != null && !variable.isEmpty()) { return new VariableElement(variable.get(0), processContainer); } if ("process".equals(elementName)) { throw new NavigationException("Variable does not exist."); } } if ("onEvent".equals(elementName)) { if (variableName.equals(element.getAttribute("variable"))) { return new OnEventElement(element.toXOM(), processContainer); } } if ("catch".equals(elementName)) { if (variableName.equals(element.getAttribute("faultVariable"))) { return new CatchElement(element.toXOM(), processContainer); } } if ("forEach".equals(elementName)) { if (variableName.equals(element.getAttribute("counterName"))) { return new ForEachVariable(element.toXOM(), processContainer); } } return getVariableByName(element.getParent(), variableName); }