@Override protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx) throws IOException, InterruptedException { Iterator<VectorWritable> partialDotsIterator = partialDots.iterator(); Vector dots = partialDotsIterator.next().get(); while (partialDotsIterator.hasNext()) { Vector toAdd = partialDotsIterator.next().get(); Iterator<Vector.Element> nonZeroElements = toAdd.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); dots.setQuick( nonZeroElement.index(), dots.getQuick(nonZeroElement.index()) + nonZeroElement.get()); } } Vector similarities = dots.like(); double normA = norms.getQuick(row.get()); Iterator<Vector.Element> dotsWith = dots.iterateNonZero(); while (dotsWith.hasNext()) { Vector.Element b = dotsWith.next(); double similarityValue = similarity.similarity(b.get(), normA, norms.getQuick(b.index()), numberOfColumns); if (similarityValue >= treshold) { similarities.set(b.index(), similarityValue); } } if (excludeSelfSimilarity) { similarities.setQuick(row.get(), 0); } ctx.write(row, new VectorWritable(similarities)); }
// utility functions static void denseVectorPlusAbsDenseDiff( DenseVector denseVector, Vector sparseVector, DenseVector meanVector) { for (int i = 0; i < denseVector.size(); i++) { double denseV = denseVector.getQuick(i); double v = sparseVector.getQuick(i); double mean = meanVector.getQuick(i); denseVector.setQuick(i, denseV + Math.abs(v - mean)); } }
static void denseVectorSubtractSparseSubtractDense( DenseVector mainVector, Vector subtractor1, DenseVector subtractor2) { int nCols = mainVector.size(); for (int c = 0; c < nCols; c++) { double v = mainVector.getQuick(c); v -= subtractor1.getQuick(c); v -= subtractor2.getQuick(c); mainVector.setQuick(c, v); } }
public void map(Chunk chks[]) { int chunkSize = chks[0].len(); Vector x = bx.value(); long start = chks[0].start(); atx = new double[chks.length]; for (int r = 0; r < chunkSize; r++) { double d = x.getQuick((int) start + r); for (int c = 0; c < chks.length; c++) { atx[c] += (chks[c].at0(r) * d); } } }
@Override protected void map(Writable key, VectorWritable value, Context context) throws IOException, InterruptedException { omega.computeYRow(value.get(), yRow); // compute outer product update for YtY if (yRow.isDense()) { for (int i = 0; i < kp; i++) { double yi; if ((yi = yRow.getQuick(i)) == 0.0) { continue; // avoid densing up here unnecessarily } for (int j = i; j < kp; j++) { double yj; if ((yj = yRow.getQuick(j)) != 0.0) { mYtY.setQuick(i, j, mYtY.getQuick(i, j) + yi * yj); } } } } else { /* * the disadvantage of using sparse vector (aside from the fact that we * are creating some short-lived references) here is that we obviously * do two times more iterations then necessary if y row is pretty dense. */ for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI.hasNext(); ) { Vector.Element eli = iterI.next(); int i = eli.index(); for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ.hasNext(); ) { Vector.Element elj = iterJ.next(); int j = elj.index(); if (j < i) { continue; } mYtY.setQuick(i, j, mYtY.getQuick(i, j) + eli.get() * elj.get()); } } } }
/** * A version to compute yRow as a sparse vector in case of extremely sparse matrices * * @param aRow * @param yRowOut */ public void computeYRow(Vector aRow, Vector yRowOut) { yRowOut.assign(0.0); if (aRow.isDense()) { int n = aRow.size(); for (int j = 0; j < n; j++) { accumDots(j, aRow.getQuick(j), yRowOut); } } else { for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) { Element el = iter.next(); accumDots(el.index(), el.get(), yRowOut); } } }
/** * compute YRow=ARow*Omega. * * @param aRow row of matrix A (size n) * @param yRow row of matrix Y (result) must be pre-allocated to size of (k+p) */ @Deprecated public void computeYRow(Vector aRow, double[] yRow) { // assert yRow.length == kp; Arrays.fill(yRow, 0.0); if (aRow.isDense()) { int n = aRow.size(); for (int j = 0; j < n; j++) { accumDots(j, aRow.getQuick(j), yRow); } } else { for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) { Element el = iter.next(); accumDots(el.index(), el.get(), yRow); } } }
protected void accumDots(int aIndex, double aElement, Vector yRow) { for (int i = 0; i < kp; i++) { yRow.setQuick(i, yRow.getQuick(i) + getQuick(aIndex, i) * aElement); } }
public double featureWeight(int feature) { return weightsPerFeature.getQuick(feature); }
public double labelWeight(int label) { return weightsPerLabel.getQuick(label); }