public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String strId = ""; String strBody = ""; // Parse the xml and read data (page id and article body) // Using XOM library Builder builder = new Builder(); try { Document doc = builder.build(value.toString(), null); Nodes nodeId = doc.query("//eecs485_article_id"); strId = nodeId.get(0).getChild(0).getValue(); Nodes nodeBody = doc.query("//eecs485_article_body"); strBody = nodeBody.get(0).getChild(0).getValue(); } catch (ParsingException ex) { System.out.println("Not well-formed."); System.out.println(ex.getMessage()); } catch (IOException ex) { System.out.println("io exception"); } // Tokenize document body Pattern pattern = Pattern.compile("\\w+"); Matcher matcher = pattern.matcher(strBody); while (matcher.find()) { // Write the parsed token // key = term, docid value = 1 context.write(new Text(matcher.group() + "," + strId), one); } }
public static String getAttributeValue(Nodes attributes) { if (attributes.hasAny()) { Node attribute = attributes.get(0); if (attribute instanceof Attribute) { return attribute.getValue(); } } return ""; }
public void loadLocationsAsync(String kml) { List<LotLocation> locations = new ArrayList<LotLocation>(); try { XMLReader parser = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); InputStream is = new ByteArrayInputStream(kml.getBytes()); // build out an XML document using TagSoup Document doc = new Builder(parser).build(is); // set the ns of the document as the XPathContext or we will not find the elements when we // attempt to parse XPathContext context = new XPathContext("ns", "http://www.w3.org/1999/xhtml"); // get the Placemark nodes within the data Nodes nodes = doc.query(".//ns:Placemark", context); for (int index = 0; index < nodes.size(); index++) { LotLocation placemark = new LotLocation(); Node placemarkNode = nodes.get(index); Node nameNode = placemarkNode.query("ns:name", context).get(0); if (nameNode != null) placemark.setLocation(nameNode.getValue()); Node descriptionNode = placemarkNode.query("ns:description", context).get(0); if (descriptionNode != null) placemark.setDescription(descriptionNode.getValue()); Node lnglatNode = placemarkNode.query("ns:Point/ns:coordinates", context).get(0); if (lnglatNode != null) { // get longitude,latitude,altitude, per KML spec String[] points = lnglatNode.getValue().split(","); placemark.setPoint( new LatLng( Double.parseDouble(points[1].trim()), Double.parseDouble(points[0].trim()))); } locations.add(placemark); } // spin off a new thread and load locations new LoadLocationsTask().execute(locations); } catch (Exception e) { Log.e("LoadLocationsTask", "Failure attempting to load locations", e); } }
public VariableLike getVariableByName(Referable referable, String variableName) throws NavigationException { Objects.requireNonNull(variableName, "VariableName must not be null!"); NodeHelper element = new NodeHelper(referable); String elementName = element.getLocalName(); if ("scope".equals(elementName) || "process".equals(elementName)) { Nodes variable = element .toXOM() .query("./bpel:variables/bpel:variable[@name='" + variableName + "']", CONTEXT); if (variable != null && !variable.isEmpty()) { return new VariableElement(variable.get(0), processContainer); } if ("process".equals(elementName)) { throw new NavigationException("Variable does not exist."); } } if ("onEvent".equals(elementName)) { if (variableName.equals(element.getAttribute("variable"))) { return new OnEventElement(element.toXOM(), processContainer); } } if ("catch".equals(elementName)) { if (variableName.equals(element.getAttribute("faultVariable"))) { return new CatchElement(element.toXOM(), processContainer); } } if ("forEach".equals(elementName)) { if (variableName.equals(element.getAttribute("counterName"))) { return new ForEachVariable(element.toXOM(), processContainer); } } return getVariableByName(element.getParent(), variableName); }