private boolean outputHelper( HalfPair hp, int vectorID, VectorComponentArrayWritable vector, float similarity, OutputCollector<VectorPair, FloatWritable> output, Reporter reporter) throws IOException { reporter.incrCounter(APS.EVALUATED, 1); reporter.progress(); if (haspruned) { VectorComponentArrayWritable remainder = pruned.get(hp.getID()); if (remainder != null) { // cheap upper bound dot(x,y) <= min(|x|,|y|) * maxweight(x) * maxweight(y) // double dotProdBound = min(remainder.length(), vector.length()) * // remainder.getMaxWeight() // * vector.getMaxWeight(); // if (compare(similarity + dotProdBound, threshold) >= 0) similarity += VectorComponentArrayWritable.dotProduct(vector, remainder); } else { LOG.warn("No remainder found for vector " + hp.getID()); } } if (compare(similarity, threshold) >= 0) { int firstID = VectorPair.canonicalFirst(vectorID, hp.getID()); int secondID = VectorPair.canonicalSecond(vectorID, hp.getID()); outKey.set(firstID, secondID); outValue.set(similarity); output.collect(outKey, outValue); reporter.incrCounter(APS.SIMILAR, 1); return true; } return false; }
@Override public void reduce( GenericKey key, Iterator<GenericValue> values, OutputCollector<GenericKey, GenericValue> output, Reporter reporter) throws IOException { if (key.getSecondary() < Preprocesser.MINIMUM_ID) { // vector output.collect(key, values.next()); if (values.hasNext()) assert false : "Vectors should not get grouped by combiner: " + key; } else { // addend reporter.progress(); int counter = 0; float sim = 0; HalfPair hp = null; while (values.hasNext()) { hp = (HalfPair) values.next().get(); sim += hp.getSimilarity(); if (counter++ % REPORTER_INTERVAL == 0) reporter.progress(); } if (hp != null) { payload.set(hp.getID(), sim); outValue.set(payload); output.collect(key, outValue); } else { assert false : "There is nothing to combine!"; } } }