Пример #1
0
    private boolean outputHelper(
        HalfPair hp,
        int vectorID,
        VectorComponentArrayWritable vector,
        float similarity,
        OutputCollector<VectorPair, FloatWritable> output,
        Reporter reporter)
        throws IOException {
      reporter.incrCounter(APS.EVALUATED, 1);
      reporter.progress();
      if (haspruned) {
        VectorComponentArrayWritable remainder = pruned.get(hp.getID());
        if (remainder != null) {
          // cheap upper bound dot(x,y) <= min(|x|,|y|) * maxweight(x) * maxweight(y)
          // double dotProdBound = min(remainder.length(), vector.length()) *
          // remainder.getMaxWeight()
          // * vector.getMaxWeight();
          // if (compare(similarity + dotProdBound, threshold) >= 0)
          similarity += VectorComponentArrayWritable.dotProduct(vector, remainder);

        } else {
          LOG.warn("No remainder found for vector " + hp.getID());
        }
      }
      if (compare(similarity, threshold) >= 0) {
        int firstID = VectorPair.canonicalFirst(vectorID, hp.getID());
        int secondID = VectorPair.canonicalSecond(vectorID, hp.getID());
        outKey.set(firstID, secondID);
        outValue.set(similarity);
        output.collect(outKey, outValue);
        reporter.incrCounter(APS.SIMILAR, 1);
        return true;
      }
      return false;
    }
Пример #2
0
 @Override
 public void reduce(
     GenericKey key,
     Iterator<GenericValue> values,
     OutputCollector<GenericKey, GenericValue> output,
     Reporter reporter)
     throws IOException {
   if (key.getSecondary() < Preprocesser.MINIMUM_ID) { // vector
     output.collect(key, values.next());
     if (values.hasNext()) assert false : "Vectors should not get grouped by combiner: " + key;
   } else { // addend
     reporter.progress();
     int counter = 0;
     float sim = 0;
     HalfPair hp = null;
     while (values.hasNext()) {
       hp = (HalfPair) values.next().get();
       sim += hp.getSimilarity();
       if (counter++ % REPORTER_INTERVAL == 0) reporter.progress();
     }
     if (hp != null) {
       payload.set(hp.getID(), sim);
       outValue.set(payload);
       output.collect(key, outValue);
     } else {
       assert false : "There is nothing to combine!";
     }
   }
 }