@Override protected void map( BytesWritable key, ArrayWritable value, Mapper<BytesWritable, ArrayWritable, BytesWritable, TextArrayWritable>.Context context) throws IOException, InterruptedException { Map<String, Report> reports = new LinkedHashMap<String, Report>(); Writable[] repts = value.get(); if (repts.length == 0 || !(repts[0] instanceof Text)) { System.out.println("error: bad input."); return; // bail out more drastically } Text[] repts_as_text = (Text[]) repts; for (Text t : repts_as_text) { Report r = Report.createFromString(t.toString()); reports.put(r.getMetadata().getOpIdString(), r); } Text[] indexed = indexGraph(reports); TextArrayWritable output = new TextArrayWritable(); output.set(indexed); context.write(key, output); }
/** * Indexes a set of reports, using Start and End tags output is a list of entries of the form: A: * time1,time2,time3 * * <p>If no matches, will return an empty array */ @SuppressWarnings("unchecked") public static Text[] indexGraph(Map<String, Report> reports) { org.apache.commons.collections.MultiMap index = new org.apache.commons.collections.MultiHashMap(); // map from start tag to opIds of nodes containing the ends for (Map.Entry<String, Report> report : reports.entrySet()) { Report start = report.getValue(); List<String> starts = start.get("Start"); if (starts != null) { for (String s : starts) { Report end = findMatchingEnd(reports, start, s); if (end == null) continue; List<String> endTL = end.get("Timestamp"); List<String> staTL = start.get("Timestamp"); if (staTL != null && endTL != null && staTL.size() > 0 && endTL.size() > 0) { // FIXME: perhaps parse more cleverly? double startT = Double.parseDouble(staTL.get(0)); double endT = Double.parseDouble(endTL.get(0)); Long diff = new Long((long) (1000 * (endT - startT))); index.put(s, diff); } } } } Text[] out = new Text[index.size()]; int i = 0; for (Object k : index.keySet()) { StringBuilder sb = new StringBuilder(); sb.append(k.toString()); sb.append(' '); Collection coll = (Collection) index.get(k); for (Object v : coll) { assert v instanceof Long : "how did a non-Long get into my collection?"; sb.append(v.toString()); sb.append(","); } sb.deleteCharAt(sb.length() - 1); Text t = new Text(sb.toString()); out[i++] = t; } return out; }
// do a BFS find closest report to start with endTag static Report findMatchingEnd(Map<String, Report> reports, Report start, String endTag) { LinkedList<Report> bfsQ = new LinkedList<Report>(); Set<String> seen = new HashSet<String>(); bfsQ.add(start); while (!bfsQ.isEmpty()) { Report cur = bfsQ.poll(); List<String> ends = cur.get("End"); if (ends != null && ends.contains(endTag)) return cur; List<String> outlinks = start.get(XtrExtract.OUTLINK_FIELD); if (outlinks == null) return null; for (String s : outlinks) { if (seen.contains(s)) continue; else seen.add(s); Report r = reports.get(s); if (r != null) bfsQ.add(r); } } return null; }