@Override
    protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx)
        throws IOException, InterruptedException {
      Iterator<VectorWritable> partialDotsIterator = partialDots.iterator();
      Vector dots = partialDotsIterator.next().get();
      while (partialDotsIterator.hasNext()) {
        Vector toAdd = partialDotsIterator.next().get();
        Iterator<Vector.Element> nonZeroElements = toAdd.iterateNonZero();
        while (nonZeroElements.hasNext()) {
          Vector.Element nonZeroElement = nonZeroElements.next();
          dots.setQuick(
              nonZeroElement.index(), dots.getQuick(nonZeroElement.index()) + nonZeroElement.get());
        }
      }

      Vector similarities = dots.like();
      double normA = norms.getQuick(row.get());
      Iterator<Vector.Element> dotsWith = dots.iterateNonZero();
      while (dotsWith.hasNext()) {
        Vector.Element b = dotsWith.next();
        double similarityValue =
            similarity.similarity(b.get(), normA, norms.getQuick(b.index()), numberOfColumns);
        if (similarityValue >= treshold) {
          similarities.set(b.index(), similarityValue);
        }
      }
      if (excludeSelfSimilarity) {
        similarities.setQuick(row.get(), 0);
      }
      ctx.write(row, new VectorWritable(similarities));
    }
Example #2
0
 // utility functions
 static void denseVectorPlusAbsDenseDiff(
     DenseVector denseVector, Vector sparseVector, DenseVector meanVector) {
   for (int i = 0; i < denseVector.size(); i++) {
     double denseV = denseVector.getQuick(i);
     double v = sparseVector.getQuick(i);
     double mean = meanVector.getQuick(i);
     denseVector.setQuick(i, denseV + Math.abs(v - mean));
   }
 }
Example #3
0
 static void denseVectorSubtractSparseSubtractDense(
     DenseVector mainVector, Vector subtractor1, DenseVector subtractor2) {
   int nCols = mainVector.size();
   for (int c = 0; c < nCols; c++) {
     double v = mainVector.getQuick(c);
     v -= subtractor1.getQuick(c);
     v -= subtractor2.getQuick(c);
     mainVector.setQuick(c, v);
   }
 }
Example #4
0
      public void map(Chunk chks[]) {
        int chunkSize = chks[0].len();
        Vector x = bx.value();
        long start = chks[0].start();

        atx = new double[chks.length];
        for (int r = 0; r < chunkSize; r++) {
          double d = x.getQuick((int) start + r);
          for (int c = 0; c < chks.length; c++) {
            atx[c] += (chks[c].at0(r) * d);
          }
        }
      }
    @Override
    protected void map(Writable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
      omega.computeYRow(value.get(), yRow);
      // compute outer product update for YtY

      if (yRow.isDense()) {
        for (int i = 0; i < kp; i++) {
          double yi;
          if ((yi = yRow.getQuick(i)) == 0.0) {
            continue; // avoid densing up here unnecessarily
          }
          for (int j = i; j < kp; j++) {
            double yj;
            if ((yj = yRow.getQuick(j)) != 0.0) {
              mYtY.setQuick(i, j, mYtY.getQuick(i, j) + yi * yj);
            }
          }
        }
      } else {
        /*
         * the disadvantage of using sparse vector (aside from the fact that we
         * are creating some short-lived references) here is that we obviously
         * do two times more iterations then necessary if y row is pretty dense.
         */
        for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI.hasNext(); ) {
          Vector.Element eli = iterI.next();
          int i = eli.index();
          for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ.hasNext(); ) {
            Vector.Element elj = iterJ.next();
            int j = elj.index();
            if (j < i) {
              continue;
            }
            mYtY.setQuick(i, j, mYtY.getQuick(i, j) + eli.get() * elj.get());
          }
        }
      }
    }
Example #6
0
 /**
  * A version to compute yRow as a sparse vector in case of extremely sparse matrices
  *
  * @param aRow
  * @param yRowOut
  */
 public void computeYRow(Vector aRow, Vector yRowOut) {
   yRowOut.assign(0.0);
   if (aRow.isDense()) {
     int n = aRow.size();
     for (int j = 0; j < n; j++) {
       accumDots(j, aRow.getQuick(j), yRowOut);
     }
   } else {
     for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) {
       Element el = iter.next();
       accumDots(el.index(), el.get(), yRowOut);
     }
   }
 }
Example #7
0
 /**
  * compute YRow=ARow*Omega.
  *
  * @param aRow row of matrix A (size n)
  * @param yRow row of matrix Y (result) must be pre-allocated to size of (k+p)
  */
 @Deprecated
 public void computeYRow(Vector aRow, double[] yRow) {
   // assert yRow.length == kp;
   Arrays.fill(yRow, 0.0);
   if (aRow.isDense()) {
     int n = aRow.size();
     for (int j = 0; j < n; j++) {
       accumDots(j, aRow.getQuick(j), yRow);
     }
   } else {
     for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) {
       Element el = iter.next();
       accumDots(el.index(), el.get(), yRow);
     }
   }
 }
Example #8
0
 protected void accumDots(int aIndex, double aElement, Vector yRow) {
   for (int i = 0; i < kp; i++) {
     yRow.setQuick(i, yRow.getQuick(i) + getQuick(aIndex, i) * aElement);
   }
 }
 public double featureWeight(int feature) {
   return weightsPerFeature.getQuick(feature);
 }
 public double labelWeight(int label) {
   return weightsPerLabel.getQuick(label);
 }