@Override
    protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx)
        throws IOException, InterruptedException {
      Iterator<VectorWritable> partialDotsIterator = partialDots.iterator();
      Vector dots = partialDotsIterator.next().get();
      while (partialDotsIterator.hasNext()) {
        Vector toAdd = partialDotsIterator.next().get();
        Iterator<Vector.Element> nonZeroElements = toAdd.iterateNonZero();
        while (nonZeroElements.hasNext()) {
          Vector.Element nonZeroElement = nonZeroElements.next();
          dots.setQuick(
              nonZeroElement.index(), dots.getQuick(nonZeroElement.index()) + nonZeroElement.get());
        }
      }

      Vector similarities = dots.like();
      double normA = norms.getQuick(row.get());
      Iterator<Vector.Element> dotsWith = dots.iterateNonZero();
      while (dotsWith.hasNext()) {
        Vector.Element b = dotsWith.next();
        double similarityValue =
            similarity.similarity(b.get(), normA, norms.getQuick(b.index()), numberOfColumns);
        if (similarityValue >= treshold) {
          similarities.set(b.index(), similarityValue);
        }
      }
      if (excludeSelfSimilarity) {
        similarities.setQuick(row.get(), 0);
      }
      ctx.write(row, new VectorWritable(similarities));
    }
 @Override
 protected void map(IntWritable key, VectorWritable value, Context context)
     throws IOException, InterruptedException {
   Vector v = value.get();
   Iterator<Vector.Element> iter = v.iterateNonZero();
   TopK<RecommendedItem> topKItems =
       new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE);
   while (iter.hasNext()) {
     Vector.Element e = iter.next();
     topKItems.offer(new GenericRecommendedItem(e.index(), (float) e.get()));
   }
   List<RecommendedItem> recommendedItems =
       Lists.newArrayListWithExpectedSize(recommendationsPerUser);
   for (RecommendedItem topItem : topKItems.retrieve()) {
     recommendedItems.add(new GenericRecommendedItem(topItem.getItemID(), topItem.getValue()));
   }
   if (recommendedItems.size() > 0) {
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < recommendedItems.size(); i++) {
       RecommendedItem item = recommendedItems.get(i);
       if (i != 0) {
         sb.append(DELIMETER);
       }
       sb.append(item.getItemID()).append(DELIMETER).append(item.getValue());
     }
     outValue.set(sb.toString());
     context.write(key, outValue);
   }
 }
    @Override
    protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
        throws IOException, InterruptedException {
      Vector similarities = similaritiesWritable.get();
      // For performance, the creation of transposedPartial is moved out of the while loop and it is
      // reused inside
      Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1);
      TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow);
      Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero();
      while (nonZeroElements.hasNext()) {
        Vector.Element nonZeroElement = nonZeroElements.next();

        MutableElement top = topKQueue.top();
        double candidateValue = nonZeroElement.get();
        if (candidateValue > top.get()) {
          top.setIndex(nonZeroElement.index());
          top.set(candidateValue);
          topKQueue.updateTop();
        }

        transposedPartial.setQuick(row.get(), candidateValue);
        ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial));
        transposedPartial.setQuick(row.get(), 0.0);
      }
      Vector topKSimilarities =
          new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow);
      for (Vector.Element topKSimilarity : topKQueue.getTopElements()) {
        topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
      }
      ctx.write(row, new VectorWritable(topKSimilarities));
    }
    @Override
    protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx)
        throws IOException, InterruptedException {

      Vector rowVector = similarity.normalize(vectorWritable.get());

      int numNonZeroEntries = 0;
      double maxValue = Double.MIN_VALUE;

      Iterator<Vector.Element> nonZeroElements = rowVector.iterateNonZero();
      while (nonZeroElements.hasNext()) {
        Vector.Element element = nonZeroElements.next();
        RandomAccessSparseVector partialColumnVector =
            new RandomAccessSparseVector(Integer.MAX_VALUE);
        partialColumnVector.setQuick(row.get(), element.get());
        ctx.write(new IntWritable(element.index()), new VectorWritable(partialColumnVector));

        numNonZeroEntries++;
        if (maxValue < element.get()) {
          maxValue = element.get();
        }
      }

      if (threshold != NO_THRESHOLD) {
        nonZeroEntries.setQuick(row.get(), numNonZeroEntries);
        maxValues.setQuick(row.get(), maxValue);
      }
      norms.setQuick(row.get(), similarity.norm(rowVector));

      ctx.getCounter(Counters.ROWS).increment(1);
    }
    @Override
    protected void map(Writable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
      omega.computeYRow(value.get(), yRow);
      // compute outer product update for YtY

      if (yRow.isDense()) {
        for (int i = 0; i < kp; i++) {
          double yi;
          if ((yi = yRow.getQuick(i)) == 0.0) {
            continue; // avoid densing up here unnecessarily
          }
          for (int j = i; j < kp; j++) {
            double yj;
            if ((yj = yRow.getQuick(j)) != 0.0) {
              mYtY.setQuick(i, j, mYtY.getQuick(i, j) + yi * yj);
            }
          }
        }
      } else {
        /*
         * the disadvantage of using sparse vector (aside from the fact that we
         * are creating some short-lived references) here is that we obviously
         * do two times more iterations then necessary if y row is pretty dense.
         */
        for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI.hasNext(); ) {
          Vector.Element eli = iterI.next();
          int i = eli.index();
          for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ.hasNext(); ) {
            Vector.Element elj = iterJ.next();
            int j = elj.index();
            if (j < i) {
              continue;
            }
            mYtY.setQuick(i, j, mYtY.getQuick(i, j) + eli.get() * elj.get());
          }
        }
      }
    }
Exemple #6
0
 /**
  * A version to compute yRow as a sparse vector in case of extremely sparse matrices
  *
  * @param aRow
  * @param yRowOut
  */
 public void computeYRow(Vector aRow, Vector yRowOut) {
   yRowOut.assign(0.0);
   if (aRow.isDense()) {
     int n = aRow.size();
     for (int j = 0; j < n; j++) {
       accumDots(j, aRow.getQuick(j), yRowOut);
     }
   } else {
     for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) {
       Element el = iter.next();
       accumDots(el.index(), el.get(), yRowOut);
     }
   }
 }
Exemple #7
0
 /**
  * compute YRow=ARow*Omega.
  *
  * @param aRow row of matrix A (size n)
  * @param yRow row of matrix Y (result) must be pre-allocated to size of (k+p)
  */
 @Deprecated
 public void computeYRow(Vector aRow, double[] yRow) {
   // assert yRow.length == kp;
   Arrays.fill(yRow, 0.0);
   if (aRow.isDense()) {
     int n = aRow.size();
     for (int j = 0; j < n; j++) {
       accumDots(j, aRow.getQuick(j), yRow);
     }
   } else {
     for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) {
       Element el = iter.next();
       accumDots(el.index(), el.get(), yRow);
     }
   }
 }
Exemple #8
0
  private static String getTopFeatures(Vector vector, String[] dictionary, int numTerms) {

    List<TermIndexWeight> vectorTerms = new ArrayList<TermIndexWeight>();

    Iterator<Vector.Element> iter = vector.iterateNonZero();
    while (iter.hasNext()) {
      Vector.Element elt = iter.next();
      vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
    }

    // Sort results in reverse order (ie weight in descending order)
    Collections.sort(
        vectorTerms,
        new Comparator<TermIndexWeight>() {
          @Override
          public int compare(TermIndexWeight one, TermIndexWeight two) {
            return Double.compare(two.weight, one.weight);
          }
        });

    Collection<Pair<String, Double>> topTerms = new LinkedList<Pair<String, Double>>();

    for (int i = 0; (i < vectorTerms.size()) && (i < numTerms); i++) {
      int index = vectorTerms.get(i).index;
      String dictTerm = dictionary[index];
      if (dictTerm == null) {
        log.error("Dictionary entry missing for {}", index);
        continue;
      }
      topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
    }

    StringBuilder sb = new StringBuilder(100);

    for (Pair<String, Double> item : topTerms) {
      String term = item.getFirst();
      sb.append("\n\t\t");
      sb.append(StringUtils.rightPad(term, 40));
      sb.append("=>");
      sb.append(StringUtils.leftPad(item.getSecond().toString(), 20));
    }
    return sb.toString();
  }
Exemple #9
0
 /**
  * Return a human-readable formatted string representation of the vector, not intended to be
  * complete nor usable as an input/output representation
  */
 public static String formatVector(Vector v, String[] bindings) {
   StringBuilder buf = new StringBuilder();
   if (v instanceof NamedVector) {
     buf.append(((NamedVector) v).getName()).append(" = ");
   }
   int nzero = 0;
   Iterator<Vector.Element> iterateNonZero = v.iterateNonZero();
   while (iterateNonZero.hasNext()) {
     iterateNonZero.next();
     nzero++;
   }
   // if vector is sparse or if we have bindings, use sparse notation
   if (nzero < v.size() || bindings != null) {
     buf.append('[');
     for (int i = 0; i < v.size(); i++) {
       double elem = v.get(i);
       if (elem == 0.0) {
         continue;
       }
       String label;
       if (bindings != null && (label = bindings[i]) != null) {
         buf.append(label).append(':');
       } else {
         buf.append(i).append(':');
       }
       buf.append(String.format(Locale.ENGLISH, "%.3f", elem)).append(", ");
     }
   } else {
     buf.append('[');
     for (int i = 0; i < v.size(); i++) {
       double elem = v.get(i);
       buf.append(String.format(Locale.ENGLISH, "%.3f", elem)).append(", ");
     }
   }
   if (buf.length() > 1) {
     buf.setLength(buf.length() - 2);
   }
   buf.append(']');
   return buf.toString();
 }