public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String strId = ""; String strBody = ""; // Parse the xml and read data (page id and article body) // Using XOM library Builder builder = new Builder(); try { Document doc = builder.build(value.toString(), null); Nodes nodeId = doc.query("//eecs485_article_id"); strId = nodeId.get(0).getChild(0).getValue(); Nodes nodeBody = doc.query("//eecs485_article_body"); strBody = nodeBody.get(0).getChild(0).getValue(); } catch (ParsingException ex) { System.out.println("Not well-formed."); System.out.println(ex.getMessage()); } catch (IOException ex) { System.out.println("io exception"); } // Tokenize document body Pattern pattern = Pattern.compile("\\w+"); Matcher matcher = pattern.matcher(strBody); while (matcher.find()) { // Write the parsed token // key = term, docid value = 1 context.write(new Text(matcher.group() + "," + strId), one); } }
@Test public void update() throws ValidityException, ParsingException, IOException, MojoExecutionException { Document pom = new Builder().build(new File(new File("src/it/reflector"), "pom.xml")); Artifact artifact = new DefaultArtifact("net.stickycode", "sticky-coercion", "jar", "", "[3.1,4)"); new StickyBoundsMojo().updateDependency(pom, artifact, "[3.6,4)"); XPathContext context = new XPathContext("mvn", "http://maven.apache.org/POM/4.0.0"); Nodes versions = pom.query("//mvn:version", context); assertThat(versions.size()).isEqualTo(3); Nodes nodes = pom.query("//mvn:version[text()='[3.6,4)']", context); assertThat(nodes.size()).isEqualTo(1); Node node = nodes.get(0); assertThat(node.getValue()).isEqualTo("[3.6,4)"); }
@Test public void updateTheClassifier() throws ValidityException, ParsingException, IOException, MojoExecutionException { Document pom = new Builder() .build( new BufferedReader( new InputStreamReader(getClass().getResourceAsStream("classifiers.xml")))); Artifact artifact = new DefaultArtifact("net.stickycode", "sticky-coercion", "jar", "test-jar", "[2.1,4)"); new StickyBoundsMojo().updateDependency(pom, artifact, "[2.6,3)"); XPathContext context = new XPathContext("mvn", "http://maven.apache.org/POM/4.0.0"); Nodes versions = pom.query("//mvn:version", context); assertThat(versions.size()).isEqualTo(4); Nodes nodes = pom.query("//mvn:version[text()='[2.6,3)']", context); assertThat(nodes.size()).isEqualTo(1); Node node = nodes.get(0); assertThat(node.getValue()).isEqualTo("[2.6,3)"); }
public void loadLocationsAsync(String kml) { List<LotLocation> locations = new ArrayList<LotLocation>(); try { XMLReader parser = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); InputStream is = new ByteArrayInputStream(kml.getBytes()); // build out an XML document using TagSoup Document doc = new Builder(parser).build(is); // set the ns of the document as the XPathContext or we will not find the elements when we // attempt to parse XPathContext context = new XPathContext("ns", "http://www.w3.org/1999/xhtml"); // get the Placemark nodes within the data Nodes nodes = doc.query(".//ns:Placemark", context); for (int index = 0; index < nodes.size(); index++) { LotLocation placemark = new LotLocation(); Node placemarkNode = nodes.get(index); Node nameNode = placemarkNode.query("ns:name", context).get(0); if (nameNode != null) placemark.setLocation(nameNode.getValue()); Node descriptionNode = placemarkNode.query("ns:description", context).get(0); if (descriptionNode != null) placemark.setDescription(descriptionNode.getValue()); Node lnglatNode = placemarkNode.query("ns:Point/ns:coordinates", context).get(0); if (lnglatNode != null) { // get longitude,latitude,altitude, per KML spec String[] points = lnglatNode.getValue().split(","); placemark.setPoint( new LatLng( Double.parseDouble(points[1].trim()), Double.parseDouble(points[0].trim()))); } locations.add(placemark); } // spin off a new thread and load locations new LoadLocationsTask().execute(locations); } catch (Exception e) { Log.e("LoadLocationsTask", "Failure attempting to load locations", e); } }