コード例 #1
0
 @Override
 public void reduce(
     GenericKey key,
     Iterator<GenericValue> values,
     OutputCollector<GenericKey, GenericValue> output,
     Reporter reporter)
     throws IOException {
   if (key.getSecondary() < Preprocesser.MINIMUM_ID) { // vector
     output.collect(key, values.next());
     if (values.hasNext()) assert false : "Vectors should not get grouped by combiner: " + key;
   } else { // addend
     reporter.progress();
     int counter = 0;
     float sim = 0;
     HalfPair hp = null;
     while (values.hasNext()) {
       hp = (HalfPair) values.next().get();
       sim += hp.getSimilarity();
       if (counter++ % REPORTER_INTERVAL == 0) reporter.progress();
     }
     if (hp != null) {
       payload.set(hp.getID(), sim);
       outValue.set(payload);
       output.collect(key, outValue);
     } else {
       assert false : "There is nothing to combine!";
     }
   }
 }
コード例 #2
0
 @Override
 public void map(
     IntWritable vectorID,
     VectorComponentArrayWritable value,
     OutputCollector<GenericKey, GenericValue> output,
     Reporter reporter)
     throws IOException {
   // vectors sort before pairs using a secondary key < MINIMUM_ID
   for (int i = 1; i <= nstripes; i++) {
     outKey.set(vectorID.get(), Preprocesser.MINIMUM_ID - i);
     outValue.set(value);
     output.collect(outKey, outValue);
   }
 }
コード例 #3
0
    @Override
    public void reduce(
        GenericKey key,
        Iterator<GenericValue> values,
        OutputCollector<VectorPair, FloatWritable> output,
        Reporter reporter)
        throws IOException {

      int vectorID = key.getPrimary();
      assert (key.getSecondary() == -1);
      // the vector is the first value
      VectorComponentArrayWritable vector = (VectorComponentArrayWritable) values.next().get();
      // half pairs are sorted such that all equal pairs are consecutive
      if (values.hasNext()) {
        reporter.incrCounter(APS.COMBINED, 1);
        HalfPair hp1 = (HalfPair) values.next().get();
        float similarity = hp1.getSimilarity();
        HalfPair hp2;
        int counter = 0;
        while (values.hasNext()) {
          reporter.incrCounter(APS.COMBINED, 1);
          if (counter++ % REPORTER_INTERVAL == 0) reporter.progress();
          hp2 = (HalfPair) values.next().get();
          if (hp1.equals(hp2)) {
            similarity += hp2.getSimilarity();
          } else {
            // output
            outputHelper(hp1, vectorID, vector, similarity, output, reporter);
            // start new stripe
            hp1 = hp2;
            similarity = hp1.getSimilarity();
          }
        }
        // output the last one
        outputHelper(hp1, vectorID, vector, similarity, output, reporter);
      }
    }
コード例 #4
0
 @Override
 public void map(
     LongWritable key,
     IndexItemArrayWritable value,
     OutputCollector<GenericKey, GenericValue> output,
     Reporter reporter)
     throws IOException {
   IndexItem[] postingList = value.toIndexItemArray();
   for (int i = 1; i < postingList.length; i++) {
     for (int j = 0; j < i; j++) {
       IndexItem x = postingList[i];
       IndexItem y = postingList[j];
       // |y| >= t / maxweight(x) && |x| >= t / maxweight(y)
       if (compare(x.vectorLength(), Math.ceil(threshold / y.vectorMaxWeight())) >= 0
           && compare(y.vectorLength(), Math.ceil(threshold / x.vectorMaxWeight())) >= 0
           // tight upper bound on similarity score
           && compare(
                   min(x.vectorMaxWeight() * y.vectorSum(), y.vectorMaxWeight() * x.vectorSum()),
                   threshold)
               >= 0) {
         // positional filter
         // && compare(
         // min(x.positionalMaxWeight() * y.positionalSum(),
         // y.positionalMaxWeight() * x.positionalSum())
         // + x.getWeight() * y.getWeight(), threshold) >= 0)
         if (j % REPORTER_INTERVAL == 0) reporter.progress();
         int lpv = IndexItem.getLeastPrunedVectorID(x, y);
         int mpv = IndexItem.getMostPrunedVectorID(x, y);
         float psim = (float) (x.getWeight() * y.getWeight());
         outKey.set(lpv, mpv);
         payload.set(mpv, psim);
         outValue.set(payload);
         output.collect(outKey, outValue);
         reporter.incrCounter(APS.ADDEND, 1);
       }
     }
   }
 }