@Override protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx) throws IOException, InterruptedException { Iterator<VectorWritable> partialDotsIterator = partialDots.iterator(); Vector dots = partialDotsIterator.next().get(); while (partialDotsIterator.hasNext()) { Vector toAdd = partialDotsIterator.next().get(); Iterator<Vector.Element> nonZeroElements = toAdd.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); dots.setQuick( nonZeroElement.index(), dots.getQuick(nonZeroElement.index()) + nonZeroElement.get()); } } Vector similarities = dots.like(); double normA = norms.getQuick(row.get()); Iterator<Vector.Element> dotsWith = dots.iterateNonZero(); while (dotsWith.hasNext()) { Vector.Element b = dotsWith.next(); double similarityValue = similarity.similarity(b.get(), normA, norms.getQuick(b.index()), numberOfColumns); if (similarityValue >= treshold) { similarities.set(b.index(), similarityValue); } } if (excludeSelfSimilarity) { similarities.setQuick(row.get(), 0); } ctx.write(row, new VectorWritable(similarities)); }
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector v = value.get(); Iterator<Vector.Element> iter = v.iterateNonZero(); TopK<RecommendedItem> topKItems = new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE); while (iter.hasNext()) { Vector.Element e = iter.next(); topKItems.offer(new GenericRecommendedItem(e.index(), (float) e.get())); } List<RecommendedItem> recommendedItems = Lists.newArrayListWithExpectedSize(recommendationsPerUser); for (RecommendedItem topItem : topKItems.retrieve()) { recommendedItems.add(new GenericRecommendedItem(topItem.getItemID(), topItem.getValue())); } if (recommendedItems.size() > 0) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < recommendedItems.size(); i++) { RecommendedItem item = recommendedItems.get(i); if (i != 0) { sb.append(DELIMETER); } sb.append(item.getItemID()).append(DELIMETER).append(item.getValue()); } outValue.set(sb.toString()); context.write(key, outValue); } }
@Override protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx) throws IOException, InterruptedException { Vector similarities = similaritiesWritable.get(); // For performance, the creation of transposedPartial is moved out of the while loop and it is // reused inside Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1); TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow); Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } transposedPartial.setQuick(row.get(), candidateValue); ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial)); transposedPartial.setQuick(row.get(), 0.0); } Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } ctx.write(row, new VectorWritable(topKSimilarities)); }
@Override protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx) throws IOException, InterruptedException { Vector rowVector = similarity.normalize(vectorWritable.get()); int numNonZeroEntries = 0; double maxValue = Double.MIN_VALUE; Iterator<Vector.Element> nonZeroElements = rowVector.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element element = nonZeroElements.next(); RandomAccessSparseVector partialColumnVector = new RandomAccessSparseVector(Integer.MAX_VALUE); partialColumnVector.setQuick(row.get(), element.get()); ctx.write(new IntWritable(element.index()), new VectorWritable(partialColumnVector)); numNonZeroEntries++; if (maxValue < element.get()) { maxValue = element.get(); } } if (threshold != NO_THRESHOLD) { nonZeroEntries.setQuick(row.get(), numNonZeroEntries); maxValues.setQuick(row.get(), maxValue); } norms.setQuick(row.get(), similarity.norm(rowVector)); ctx.getCounter(Counters.ROWS).increment(1); }
@Override protected void map(Writable key, VectorWritable value, Context context) throws IOException, InterruptedException { omega.computeYRow(value.get(), yRow); // compute outer product update for YtY if (yRow.isDense()) { for (int i = 0; i < kp; i++) { double yi; if ((yi = yRow.getQuick(i)) == 0.0) { continue; // avoid densing up here unnecessarily } for (int j = i; j < kp; j++) { double yj; if ((yj = yRow.getQuick(j)) != 0.0) { mYtY.setQuick(i, j, mYtY.getQuick(i, j) + yi * yj); } } } } else { /* * the disadvantage of using sparse vector (aside from the fact that we * are creating some short-lived references) here is that we obviously * do two times more iterations then necessary if y row is pretty dense. */ for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI.hasNext(); ) { Vector.Element eli = iterI.next(); int i = eli.index(); for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ.hasNext(); ) { Vector.Element elj = iterJ.next(); int j = elj.index(); if (j < i) { continue; } mYtY.setQuick(i, j, mYtY.getQuick(i, j) + eli.get() * elj.get()); } } } }
/** * A version to compute yRow as a sparse vector in case of extremely sparse matrices * * @param aRow * @param yRowOut */ public void computeYRow(Vector aRow, Vector yRowOut) { yRowOut.assign(0.0); if (aRow.isDense()) { int n = aRow.size(); for (int j = 0; j < n; j++) { accumDots(j, aRow.getQuick(j), yRowOut); } } else { for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) { Element el = iter.next(); accumDots(el.index(), el.get(), yRowOut); } } }
/** * compute YRow=ARow*Omega. * * @param aRow row of matrix A (size n) * @param yRow row of matrix Y (result) must be pre-allocated to size of (k+p) */ @Deprecated public void computeYRow(Vector aRow, double[] yRow) { // assert yRow.length == kp; Arrays.fill(yRow, 0.0); if (aRow.isDense()) { int n = aRow.size(); for (int j = 0; j < n; j++) { accumDots(j, aRow.getQuick(j), yRow); } } else { for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) { Element el = iter.next(); accumDots(el.index(), el.get(), yRow); } } }
private static String getTopFeatures(Vector vector, String[] dictionary, int numTerms) { List<TermIndexWeight> vectorTerms = new ArrayList<TermIndexWeight>(); Iterator<Vector.Element> iter = vector.iterateNonZero(); while (iter.hasNext()) { Vector.Element elt = iter.next(); vectorTerms.add(new TermIndexWeight(elt.index(), elt.get())); } // Sort results in reverse order (ie weight in descending order) Collections.sort( vectorTerms, new Comparator<TermIndexWeight>() { @Override public int compare(TermIndexWeight one, TermIndexWeight two) { return Double.compare(two.weight, one.weight); } }); Collection<Pair<String, Double>> topTerms = new LinkedList<Pair<String, Double>>(); for (int i = 0; (i < vectorTerms.size()) && (i < numTerms); i++) { int index = vectorTerms.get(i).index; String dictTerm = dictionary[index]; if (dictTerm == null) { log.error("Dictionary entry missing for {}", index); continue; } topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight)); } StringBuilder sb = new StringBuilder(100); for (Pair<String, Double> item : topTerms) { String term = item.getFirst(); sb.append("\n\t\t"); sb.append(StringUtils.rightPad(term, 40)); sb.append("=>"); sb.append(StringUtils.leftPad(item.getSecond().toString(), 20)); } return sb.toString(); }
/** * Return a human-readable formatted string representation of the vector, not intended to be * complete nor usable as an input/output representation */ public static String formatVector(Vector v, String[] bindings) { StringBuilder buf = new StringBuilder(); if (v instanceof NamedVector) { buf.append(((NamedVector) v).getName()).append(" = "); } int nzero = 0; Iterator<Vector.Element> iterateNonZero = v.iterateNonZero(); while (iterateNonZero.hasNext()) { iterateNonZero.next(); nzero++; } // if vector is sparse or if we have bindings, use sparse notation if (nzero < v.size() || bindings != null) { buf.append('['); for (int i = 0; i < v.size(); i++) { double elem = v.get(i); if (elem == 0.0) { continue; } String label; if (bindings != null && (label = bindings[i]) != null) { buf.append(label).append(':'); } else { buf.append(i).append(':'); } buf.append(String.format(Locale.ENGLISH, "%.3f", elem)).append(", "); } } else { buf.append('['); for (int i = 0; i < v.size(); i++) { double elem = v.get(i); buf.append(String.format(Locale.ENGLISH, "%.3f", elem)).append(", "); } } if (buf.length() > 1) { buf.setLength(buf.length() - 2); } buf.append(']'); return buf.toString(); }