@Override public void reduce( GenericKey key, Iterator<GenericValue> values, OutputCollector<GenericKey, GenericValue> output, Reporter reporter) throws IOException { if (key.getSecondary() < Preprocesser.MINIMUM_ID) { // vector output.collect(key, values.next()); if (values.hasNext()) assert false : "Vectors should not get grouped by combiner: " + key; } else { // addend reporter.progress(); int counter = 0; float sim = 0; HalfPair hp = null; while (values.hasNext()) { hp = (HalfPair) values.next().get(); sim += hp.getSimilarity(); if (counter++ % REPORTER_INTERVAL == 0) reporter.progress(); } if (hp != null) { payload.set(hp.getID(), sim); outValue.set(payload); output.collect(key, outValue); } else { assert false : "There is nothing to combine!"; } } }
@Override public void reduce( GenericKey key, Iterator<GenericValue> values, OutputCollector<VectorPair, FloatWritable> output, Reporter reporter) throws IOException { int vectorID = key.getPrimary(); assert (key.getSecondary() == -1); // the vector is the first value VectorComponentArrayWritable vector = (VectorComponentArrayWritable) values.next().get(); // half pairs are sorted such that all equal pairs are consecutive if (values.hasNext()) { reporter.incrCounter(APS.COMBINED, 1); HalfPair hp1 = (HalfPair) values.next().get(); float similarity = hp1.getSimilarity(); HalfPair hp2; int counter = 0; while (values.hasNext()) { reporter.incrCounter(APS.COMBINED, 1); if (counter++ % REPORTER_INTERVAL == 0) reporter.progress(); hp2 = (HalfPair) values.next().get(); if (hp1.equals(hp2)) { similarity += hp2.getSimilarity(); } else { // output outputHelper(hp1, vectorID, vector, similarity, output, reporter); // start new stripe hp1 = hp2; similarity = hp1.getSimilarity(); } } // output the last one outputHelper(hp1, vectorID, vector, similarity, output, reporter); } }