public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String strId = ""; String strBody = ""; // Parse the xml and read data (page id and article body) // Using XOM library Builder builder = new Builder(); try { Document doc = builder.build(value.toString(), null); Nodes nodeId = doc.query("//eecs485_article_id"); strId = nodeId.get(0).getChild(0).getValue(); Nodes nodeBody = doc.query("//eecs485_article_body"); strBody = nodeBody.get(0).getChild(0).getValue(); } catch (ParsingException ex) { System.out.println("Not well-formed."); System.out.println(ex.getMessage()); } catch (IOException ex) { System.out.println("io exception"); } // Tokenize document body Pattern pattern = Pattern.compile("\\w+"); Matcher matcher = pattern.matcher(strBody); while (matcher.find()) { // Write the parsed token // key = term, docid value = 1 context.write(new Text(matcher.group() + "," + strId), one); } }
private Collection<ConnectableDefinition> loadConnectableDefs(File connectableDefs) { Collection<ConnectableDefinition> defs = new HashSet<ConnectableDefinition>(); try { Builder parser = new Builder(); Document doc = parser.build(connectableDefs); Element root = doc.getRootElement(); Elements definitions = root.getChildElements(); for (int i = 0; i < definitions.size(); i++) { Element definition = definitions.get(i); String id = definition.getAttributeValue("id"); String pinCount = definition.getAttributeValue("pins"); String type = definition.getAttributeValue("type"); ConnectableDefinition d = new ConnectableDefinition(id, type, Integer.parseInt(pinCount)); Elements pins = definition.getChildElements(); for (int j = 0; j < pins.size(); j++) { Element pinElement = pins.get(j); String pinNumber = pinElement.getAttributeValue("number"); String pinName = pinElement.getAttributeValue("name"); d.addPin(Integer.parseInt(pinNumber), pinName); } defs.add(d); } } catch (ParsingException ex) { System.err.println("malformed XML file : " + ex.getMessage()); } catch (IOException ex) { System.err.println("io error : " + ex.getMessage()); } return defs; }
protected ComponentSet loadSet(File componentsDef, File connectionsDef) { ComponentSet set = null; // for collecting all connectables Map<String, Connectable> connectables = new HashMap<String, Connectable>(); try { // open the components.xml file Builder builder = new Builder(); Document doc = builder.build(componentsDef); Element componentsRoot = doc.getRootElement(); int rows = Integer.parseInt(componentsRoot.getAttributeValue("rows")); int cols = Integer.parseInt(componentsRoot.getAttributeValue("cols")); boolean autoLocate = Boolean.parseBoolean(componentsRoot.getAttributeValue("autoLocate")); // add connectables (components need to be located in the set, // endpoints are not so they are just added to the connectables map Elements componentElements = componentsRoot.getChildElements(); if (autoLocate) { set = addComponentsWithAutoLocate(connectables, rows, cols, componentElements); } else { set = addComponents(connectables, rows, cols, componentElements); } doc = builder.build(connectionsDef); Element connectionsRoot = doc.getRootElement(); // add connections Elements connections = connectionsRoot.getChildElements(); for (int i = 0; i < connections.size(); i++) { Element c = connections.get(i); String from = c.getAttributeValue("from"); String to = c.getAttributeValue("to"); String fromPinLabel = c.getAttributeValue("fromPin"); String toPinLabel = c.getAttributeValue("toPin"); Connectable fromComp = connectables.get(from); Connectable toComp = connectables.get(to); Collection<Pin> fromPins = fromComp.getPins(fromPinLabel); Collection<Pin> toPins = toComp.getPins(toPinLabel); Connection con = new Connection(fromComp, fromPins, toComp, toPins); set.addConnection(con); } } catch (ParsingException ex) { System.err.println("malformed XML file : " + ex.getMessage()); } catch (IOException ex) { System.err.println("io error : " + ex.getMessage()); } return set; }
public static void main(String[] args) { if (args.length <= 0) { System.out.println("Usage: java nu.xom.samples.PureValidator URL"); return; } try { Builder parser = new Builder(true, new MinimalNodeFactory()); parser.build(args[0]); System.out.println(args[0] + " is valid."); } catch (ValidityException ex) { System.out.println(args[0] + " is not valid."); System.out.println(ex.getMessage()); System.out.println(" at line " + ex.getLineNumber() + ", column " + ex.getColumnNumber()); } catch (ParsingException ex) { System.out.println(args[0] + " is not well-formed."); System.out.println(ex.getMessage()); System.out.println(" at line " + ex.getLineNumber() + ", column " + ex.getColumnNumber()); } catch (IOException ex) { System.out.println("Due to an IOException, the parser could not check " + args[0]); } }