Ejemplo n.º 1
0
  public static Job createTimesSquaredJob(
      Configuration initialConf,
      Vector v,
      int outputVectorDim,
      Path matrixInputPath,
      Path outputVectorPathBase,
      Class<? extends TimesSquaredMapper> mapClass,
      Class<? extends VectorSummingReducer> redClass)
      throws IOException {

    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), initialConf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    outputVectorPathBase = fs.makeQualified(outputVectorPathBase);

    long now = System.nanoTime();
    Path inputVectorPath = new Path(outputVectorPathBase, INPUT_VECTOR + '/' + now);

    SequenceFile.Writer inputVectorPathWriter = null;

    try {
      inputVectorPathWriter =
          new SequenceFile.Writer(
              fs, initialConf, inputVectorPath, NullWritable.class, VectorWritable.class);
      inputVectorPathWriter.append(NullWritable.get(), new VectorWritable(v));
    } finally {
      Closeables.close(inputVectorPathWriter, false);
    }

    URI ivpURI = inputVectorPath.toUri();
    DistributedCache.setCacheFiles(new URI[] {ivpURI}, initialConf);

    Job job =
        HadoopUtil.prepareJob(
            matrixInputPath,
            new Path(outputVectorPathBase, OUTPUT_VECTOR_FILENAME),
            SequenceFileInputFormat.class,
            mapClass,
            NullWritable.class,
            VectorWritable.class,
            redClass,
            NullWritable.class,
            VectorWritable.class,
            SequenceFileOutputFormat.class,
            initialConf);
    job.setCombinerClass(redClass);
    job.setJobName("TimesSquaredJob: " + matrixInputPath);

    Configuration conf = job.getConfiguration();
    conf.set(INPUT_VECTOR, ivpURI.toString());
    conf.setBoolean(IS_SPARSE_OUTPUT, !v.isDense());
    conf.setInt(OUTPUT_VECTOR_DIMENSION, outputVectorDim);

    return job;
  }
Ejemplo n.º 2
0
 /**
  * A version to compute yRow as a sparse vector in case of extremely sparse matrices
  *
  * @param aRow
  * @param yRowOut
  */
 public void computeYRow(Vector aRow, Vector yRowOut) {
   yRowOut.assign(0.0);
   if (aRow.isDense()) {
     int n = aRow.size();
     for (int j = 0; j < n; j++) {
       accumDots(j, aRow.getQuick(j), yRowOut);
     }
   } else {
     for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) {
       Element el = iter.next();
       accumDots(el.index(), el.get(), yRowOut);
     }
   }
 }
Ejemplo n.º 3
0
 /**
  * compute YRow=ARow*Omega.
  *
  * @param aRow row of matrix A (size n)
  * @param yRow row of matrix Y (result) must be pre-allocated to size of (k+p)
  */
 @Deprecated
 public void computeYRow(Vector aRow, double[] yRow) {
   // assert yRow.length == kp;
   Arrays.fill(yRow, 0.0);
   if (aRow.isDense()) {
     int n = aRow.size();
     for (int j = 0; j < n; j++) {
       accumDots(j, aRow.getQuick(j), yRow);
     }
   } else {
     for (Iterator<Element> iter = aRow.iterateNonZero(); iter.hasNext(); ) {
       Element el = iter.next();
       accumDots(el.index(), el.get(), yRow);
     }
   }
 }
Ejemplo n.º 4
0
    @Override
    protected void map(Writable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
      omega.computeYRow(value.get(), yRow);
      // compute outer product update for YtY

      if (yRow.isDense()) {
        for (int i = 0; i < kp; i++) {
          double yi;
          if ((yi = yRow.getQuick(i)) == 0.0) {
            continue; // avoid densing up here unnecessarily
          }
          for (int j = i; j < kp; j++) {
            double yj;
            if ((yj = yRow.getQuick(j)) != 0.0) {
              mYtY.setQuick(i, j, mYtY.getQuick(i, j) + yi * yj);
            }
          }
        }
      } else {
        /*
         * the disadvantage of using sparse vector (aside from the fact that we
         * are creating some short-lived references) here is that we obviously
         * do two times more iterations then necessary if y row is pretty dense.
         */
        for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI.hasNext(); ) {
          Vector.Element eli = iterI.next();
          int i = eli.index();
          for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ.hasNext(); ) {
            Vector.Element elj = iterJ.next();
            int j = elj.index();
            if (j < i) {
              continue;
            }
            mYtY.setQuick(i, j, mYtY.getQuick(i, j) + eli.get() * elj.get());
          }
        }
      }
    }