@Override public int compare(Tuple o1, Tuple o2) { try { ScoredTuple t1 = ScoredTuple.fromIntermediateTuple(o1); ScoredTuple t2 = ScoredTuple.fromIntermediateTuple(o2); return t1.getScore().compareTo(t2.getScore()); } catch (Throwable e) { throw new RuntimeException("Cannot compare " + o1 + " and " + o2 + ".", e); } }
@Override public Tuple exec(Tuple input) throws IOException { DataBag bag = (DataBag) input.get(0); DataBag selected = bagFactory.newDefaultBag(); DataBag aggWaiting = bagFactory.newSortedBag(new ScoredTupleComparator()); DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator()); Tuple output = tupleFactory.newTuple(); long n = 0L; for (Tuple innerTuple : bag) { n += (Long) innerTuple.get(0); selected.addAll((DataBag) innerTuple.get(1)); double q1 = getQ1(n, _samplingProbability); double q2 = getQ2(n, _samplingProbability); for (Tuple t : (DataBag) innerTuple.get(2)) { ScoredTuple scored = ScoredTuple.fromIntermediateTuple(t); if (scored.getScore() < q1) { selected.add(scored.getTuple()); } else if (scored.getScore() < q2) { aggWaiting.add(t); } else { break; } } } double q1 = getQ1(n, _samplingProbability); double q2 = getQ2(n, _samplingProbability); for (Tuple t : aggWaiting) { ScoredTuple scored = ScoredTuple.fromIntermediateTuple(t); if (scored.getScore() < q1) { selected.add(scored.getTuple()); } else if (scored.getScore() < q2) { waiting.add(t); } else { break; } } output.append(n); output.append(selected); output.append(waiting); System.err.println( "Read " + n + " items, selected " + selected.size() + ", and wait-listed " + aggWaiting.size() + "."); return output; }