예제 #1
0
 @Test
 public void testExtractorMapperCSV() throws Exception {
   FeatureExtractorMapper mapper = new FeatureExtractorMapper();
   Configuration conf = getConfiguration();
   conf.set("vector.implementation.class.name", "org.apache.mahout.math.RandomAccessSparseVector");
   conf.set(FeatureExtractorKeySet.FEATURE_NAMES, RAW_CSV[0]);
   conf.set(FeatureExtractorKeySet.SELECTED_DEPENDENT, DEPENDENT);
   conf.set(FeatureExtractorKeySet.SELECTED_INDEPENDENT, INDEPENDENT);
   conf.set(FeatureExtractorKeySet.SELECTED_INTERACTION, INTERACTION);
   conf.set(FeatureExtractorKeySet.SEPARATOR, SEP_CSV);
   DummyRecordWriter<Text, VectorWritable> writer = new DummyRecordWriter<Text, VectorWritable>();
   Mapper<LongWritable, Text, Text, VectorWritable>.Context context =
       DummyRecordWriter.build(mapper, conf, writer);
   mapper.setup(context);
   for (int i = 0; i < RAW_CSV.length; ++i) {
     mapper.map(new LongWritable(i), new Text(RAW_CSV[i]), context);
   }
   assertEquals("Number of map results", 1, writer.getData().size());
   assertEquals("Number of map results", 1, writer.getData().size());
   for (int i = 0; i < writer.getValue(new Text("5")).size(); ++i) {
     assertEquals(
         "Features: ",
         getFormatedOutput(writer.getValue(new Text("5")).get(i)),
         getFormatedOutput(RAW_DATA[i]));
   }
 }
예제 #2
0
  @Test
  public void testAffinityMatrixInputMapper() throws Exception {
    AffinityMatrixInputMapper mapper = new AffinityMatrixInputMapper();
    Configuration conf = getConfiguration();
    conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);

    // set up the dummy writer and the M/R context
    DummyRecordWriter<IntWritable, MatrixEntryWritable> writer = new DummyRecordWriter<>();
    Mapper<LongWritable, Text, IntWritable, MatrixEntryWritable>.Context context =
        DummyRecordWriter.build(mapper, conf, writer);

    // loop through all the points and test each one is converted
    // successfully to a DistributedRowMatrix.MatrixEntry
    for (String s : RAW) {
      mapper.map(new LongWritable(), new Text(s), context);
    }

    // test the data was successfully constructed
    assertEquals("Number of map results", RAW_DIMENSIONS, writer.getData().size());
    Set<IntWritable> keys = writer.getData().keySet();
    for (IntWritable i : keys) {
      List<MatrixEntryWritable> row = writer.getData().get(i);
      assertEquals("Number of items in row", RAW_DIMENSIONS, row.size());
    }
  }
예제 #3
0
  @Test
  public void testAffinitymatrixInputReducer() throws Exception {
    AffinityMatrixInputMapper mapper = new AffinityMatrixInputMapper();
    Configuration conf = getConfiguration();
    conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);

    // set up the dummy writer and the M/R context
    DummyRecordWriter<IntWritable, MatrixEntryWritable> mapWriter = new DummyRecordWriter<>();
    Mapper<LongWritable, Text, IntWritable, MatrixEntryWritable>.Context mapContext =
        DummyRecordWriter.build(mapper, conf, mapWriter);

    // loop through all the points and test each one is converted
    // successfully to a DistributedRowMatrix.MatrixEntry
    for (String s : RAW) {
      mapper.map(new LongWritable(), new Text(s), mapContext);
    }
    // store the data for checking later
    Map<IntWritable, List<MatrixEntryWritable>> map = mapWriter.getData();

    // now reduce the data
    AffinityMatrixInputReducer reducer = new AffinityMatrixInputReducer();
    DummyRecordWriter<IntWritable, VectorWritable> redWriter = new DummyRecordWriter<>();
    Reducer<IntWritable, MatrixEntryWritable, IntWritable, VectorWritable>.Context redContext =
        DummyRecordWriter.build(
            reducer, conf, redWriter, IntWritable.class, MatrixEntryWritable.class);
    for (IntWritable key : mapWriter.getKeys()) {
      reducer.reduce(key, mapWriter.getValue(key), redContext);
    }

    // check that all the elements are correctly ordered
    assertEquals("Number of reduce results", RAW_DIMENSIONS, redWriter.getData().size());
    for (IntWritable row : redWriter.getKeys()) {
      List<VectorWritable> list = redWriter.getValue(row);
      assertEquals("Should only be one vector", 1, list.size());
      // check that the elements in the array are correctly ordered
      Vector v = list.get(0).get();
      for (Vector.Element e : v.all()) {
        // find this value in the original map
        MatrixEntryWritable toCompare = new MatrixEntryWritable();
        toCompare.setRow(-1);
        toCompare.setCol(e.index());
        toCompare.setVal(e.get());
        assertTrue("This entry was correctly placed in its row", map.get(row).contains(toCompare));
      }
    }
  }
예제 #4
0
  @Test
  public void testMatrixDiagonalizeReducer() throws Exception {
    MatrixDiagonalizeMapper mapper = new MatrixDiagonalizeMapper();
    Configuration conf = getConfiguration();
    conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);

    // set up the dummy writers
    DummyRecordWriter<NullWritable, IntDoublePairWritable> mapWriter = new DummyRecordWriter<>();
    Mapper<IntWritable, VectorWritable, NullWritable, IntDoublePairWritable>.Context mapContext =
        DummyRecordWriter.build(mapper, conf, mapWriter);

    // perform the mapping
    for (int i = 0; i < RAW_DIMENSIONS; i++) {
      RandomAccessSparseVector toAdd = new RandomAccessSparseVector(RAW_DIMENSIONS);
      toAdd.assign(RAW[i]);
      mapper.map(new IntWritable(i), new VectorWritable(toAdd), mapContext);
    }

    // now perform the reduction
    MatrixDiagonalizeReducer reducer = new MatrixDiagonalizeReducer();
    DummyRecordWriter<NullWritable, VectorWritable> redWriter = new DummyRecordWriter<>();
    Reducer<NullWritable, IntDoublePairWritable, NullWritable, VectorWritable>.Context redContext =
        DummyRecordWriter.build(
            reducer, conf, redWriter, NullWritable.class, IntDoublePairWritable.class);

    // only need one reduction
    reducer.reduce(NullWritable.get(), mapWriter.getValue(NullWritable.get()), redContext);

    // first, make sure there's only one result
    List<VectorWritable> list = redWriter.getValue(NullWritable.get());
    assertEquals("Only a single resulting vector", 1, list.size());
    Vector v = list.get(0).get();
    for (int i = 0; i < v.size(); i++) {
      assertEquals("Element sum is correct", rowSum(RAW[i]), v.get(i), 0.01);
    }
  }
  /**
   * Testing the mapper is fairly straightforward: there are two matrices to be processed
   * simultaneously (cut matrix of sensitivities, and the affinity matrix), and since both are
   * symmetric, two entries from each will be grouped together with the same key (or, in the case of
   * an entry along the diagonal, only two entries).
   *
   * <p>The correct grouping of these quad or pair vertices is the only output of the mapper.
   *
   * @throws Exception
   */
  @Test
  public void testEigencutsAffinityCutsMapper() throws Exception {
    EigencutsAffinityCutsMapper mapper = new EigencutsAffinityCutsMapper();
    Configuration conf = new Configuration();
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);

    // set up the writer
    DummyRecordWriter<Text, VertexWritable> writer = new DummyRecordWriter<Text, VertexWritable>();
    Mapper<IntWritable, VectorWritable, Text, VertexWritable>.Context context =
        DummyRecordWriter.build(mapper, conf, writer);

    // perform the maps
    for (int i = 0; i < this.affinity.length; i++) {
      VectorWritable aff = new VectorWritable(new DenseVector(this.affinity[i]));
      VectorWritable sens = new VectorWritable(new DenseVector(this.sensitivity[i]));
      IntWritable key = new IntWritable(i);
      mapper.map(key, aff, context);
      mapper.map(key, sens, context);
    }

    // were the vertices constructed correctly? if so, then for two 4x4
    // matrices, there should be 10 unique keys with 56 total entries
    assertEquals("Number of keys", 10, writer.getKeys().size());
    for (int i = 0; i < this.affinity.length; i++) {
      for (int j = 0; j < this.affinity.length; j++) {
        Text key = new Text(Math.max(i, j) + "_" + Math.min(i, j));
        List<VertexWritable> values = writer.getValue(key);

        // if we're on a diagonal, there should only be 2 entries
        // otherwise, there should be 4
        if (i == j) {
          assertEquals("Diagonal entry", 2, values.size());
          for (VertexWritable v : values) {
            assertFalse("Diagonal values are zero", v.getValue() > 0);
          }
        } else {
          assertEquals("Off-diagonal entry", 4, values.size());
          if (i + j == 3) { // all have values greater than 0
            for (VertexWritable v : values) {
              assertTrue("Off-diagonal non-zero entries", v.getValue() > 0);
            }
          }
        }
      }
    }
  }
예제 #6
0
  @Test
  public void testMatrixDiagonalizeMapper() throws Exception {
    MatrixDiagonalizeMapper mapper = new MatrixDiagonalizeMapper();
    Configuration conf = getConfiguration();
    conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);

    // set up the dummy writers
    DummyRecordWriter<NullWritable, IntDoublePairWritable> writer = new DummyRecordWriter<>();
    Mapper<IntWritable, VectorWritable, NullWritable, IntDoublePairWritable>.Context context =
        DummyRecordWriter.build(mapper, conf, writer);

    // perform the mapping
    for (int i = 0; i < RAW_DIMENSIONS; i++) {
      RandomAccessSparseVector toAdd = new RandomAccessSparseVector(RAW_DIMENSIONS);
      toAdd.assign(RAW[i]);
      mapper.map(new IntWritable(i), new VectorWritable(toAdd), context);
    }

    // check the number of the results
    assertEquals(
        "Number of map results", RAW_DIMENSIONS, writer.getValue(NullWritable.get()).size());
  }
  /**
   * Fairly straightforward: the task here is to reassemble the rows of the affinity matrix. The
   * tricky part is that any specific element in the list of elements which does NOT lay on the
   * diagonal will be so because it did not drop below the sensitivity threshold, hence it was not
   * "cut".
   *
   * <p>On the flip side, there will be many entries whose coordinate is now set to the diagonal,
   * indicating they were previously affinity entries whose sensitivities were below the threshold,
   * and hence were "cut" - set to 0 at their original coordinates, and had their values added to
   * the diagonal entry (hence the numerous entries with the coordinate of the diagonal).
   *
   * @throws Exception
   */
  @Test
  public void testEigencutsAffinityCutsReducer() throws Exception {
    Configuration conf = new Configuration();
    Path affinity = new Path("affinity");
    Path sensitivity = new Path("sensitivity");
    conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName());
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);

    // since we need the working paths to distinguish the vertex types,
    // we can't use the mapper (since we have no way of manually setting
    // the Context.workingPath() )
    Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity);

    // now, set up the combiner
    EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner();
    DummyRecordWriter<Text, VertexWritable> comWriter =
        new DummyRecordWriter<Text, VertexWritable>();
    Reducer<Text, VertexWritable, Text, VertexWritable>.Context comContext =
        DummyRecordWriter.build(combiner, conf, comWriter, Text.class, VertexWritable.class);

    // perform the combining
    for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) {
      combiner.reduce(entry.getKey(), entry.getValue(), comContext);
    }

    // finally, set up the reduction writers
    EigencutsAffinityCutsReducer reducer = new EigencutsAffinityCutsReducer();
    DummyRecordWriter<IntWritable, VectorWritable> redWriter =
        new DummyRecordWriter<IntWritable, VectorWritable>();
    Reducer<Text, VertexWritable, IntWritable, VectorWritable>.Context redContext =
        DummyRecordWriter.build(reducer, conf, redWriter, Text.class, VertexWritable.class);

    // perform the reduction
    for (Text key : comWriter.getKeys()) {
      reducer.reduce(key, comWriter.getValue(key), redContext);
    }

    // now, check that the affinity matrix is correctly formed
    for (IntWritable row : redWriter.getKeys()) {
      List<VectorWritable> results = redWriter.getValue(row);
      // there should only be 1 vector
      assertEquals("Only one vector with a given row number", 1, results.size());
      Vector therow = results.get(0).get();
      for (Vector.Element e : therow.all()) {
        // check the diagonal
        if (row.get() == e.index()) {
          assertEquals(
              "Correct diagonal sum of cuts",
              sumOfRowCuts(row.get(), this.sensitivity),
              e.get(),
              EPSILON);
        } else {
          // not on the diagonal...if it was an element labeled to be cut,
          // it should have a value of 0. Otherwise, it should have kept its
          // previous value
          if (this.sensitivity[row.get()][e.index()] == 0.0) {
            // should be what it was originally
            assertEquals(
                "Preserved element", this.affinity[row.get()][e.index()], e.get(), EPSILON);
          } else {
            // should be 0
            assertEquals("Cut element", 0.0, e.get(), EPSILON);
          }
        }
      }
    }
  }
  /**
   * This is by far the trickiest step. However, an easy condition is if we have only two vertices -
   * indicating vertices on the diagonal of the two matrices - then we simply exit (since the
   * algorithm does not operate on the diagonal; it makes no sense to perform cuts by isolating data
   * points from themselves).
   *
   * <p>If there are four points, then first we must separate the two which belong to the affinity
   * matrix from the two that are sensitivities. In theory, each pair should have exactly the same
   * value (symmetry). If the sensitivity is below a certain threshold, then we set the two values
   * of the affinity matrix to 0 (but not before adding the affinity values to the diagonal, so as
   * to maintain the overall sum of the row of the affinity matrix).
   *
   * @throws Exception
   */
  @Test
  public void testEigencutsAffinityCutsCombiner() throws Exception {
    Configuration conf = new Configuration();
    Path affinity = new Path("affinity");
    Path sensitivity = new Path("sensitivity");
    conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName());
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);

    // since we need the working paths to distinguish the vertex types,
    // we can't use the mapper (since we have no way of manually setting
    // the Context.workingPath() )
    Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity);

    // now, set up the combiner
    EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner();
    DummyRecordWriter<Text, VertexWritable> redWriter =
        new DummyRecordWriter<Text, VertexWritable>();
    Reducer<Text, VertexWritable, Text, VertexWritable>.Context redContext =
        DummyRecordWriter.build(combiner, conf, redWriter, Text.class, VertexWritable.class);

    // perform the combining
    for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) {
      combiner.reduce(entry.getKey(), entry.getValue(), redContext);
    }

    // test the number of cuts, there should be 2
    assertEquals(
        "Number of cuts detected",
        4,
        redContext.getCounter(EigencutsAffinityCutsJob.CUTSCOUNTER.NUM_CUTS).getValue());

    // loop through all the results; let's see if they match up to our
    // affinity matrix (and all the cuts appear where they should
    Map<Text, List<VertexWritable>> results = redWriter.getData();
    for (Map.Entry<Text, List<VertexWritable>> entry : results.entrySet()) {
      List<VertexWritable> row = entry.getValue();
      IntWritable key = new IntWritable(Integer.parseInt(entry.getKey().toString()));

      double calcDiag = 0.0;
      double trueDiag = sumOfRowCuts(key.get(), this.sensitivity);
      for (VertexWritable e : row) {

        // should the value have been cut, e.g. set to 0?
        if (key.get() == e.getCol()) {
          // we have our diagonal
          calcDiag += e.getValue();
        } else if (this.sensitivity[key.get()][e.getCol()] == 0.0) {
          // no, corresponding affinity should have same value as before
          assertEquals(
              "Preserved affinity value",
              this.affinity[key.get()][e.getCol()],
              e.getValue(),
              EPSILON);
        } else {
          // yes, corresponding affinity value should be 0
          assertEquals("Cut affinity value", 0.0, e.getValue(), EPSILON);
        }
      }
      // check the diagonal has the correct sum
      assertEquals("Diagonal sum from cuts", trueDiag, calcDiag, EPSILON);
    }
  }