Java RandomUtils示例，org.apache.mahout.common.RandomUtils Java示例

示例#1

0

显示文件

文件： UpperQuantileTest.java 项目： jiayuasu/log-synth

 @Before
 public void generate() {
   RandomUtils.useTestSeed();
   uq = new UpperQuantile(101);
   data = new double[1001];
   Random gen = RandomUtils.getRandom();
   for (int i = 0; i < 1001; i++) {
     double x = gen.nextDouble();
     data[i] = x;
     uq.add(x);
   }
   Arrays.sort(data);
 }

示例#2

0

显示文件

文件： UpperQuantileTest.java 项目： jiayuasu/log-synth

  @Test
  public void testSpeed() {
    long total = 0;
    UpperQuantile data = new UpperQuantile(5000);
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < 10000; i++) {
      data.add(gen.nextDouble());
    }
    data.clear();
    int n = 100000;
    for (int i = 0; i < n; i++) {
      double x = gen.nextDouble();
      long t0 = System.nanoTime();
      data.add(x);
      long t1 = System.nanoTime();
      total += t1 - t0;
    }
    // time per insert should be less than a micro-second.  Typically this actually comes out ~300
    // ns
    log.debug("t = {} us", total / 1e9 / n / 1e-6);
    Assert.assertTrue(total / 1e9 / n < 100e-6);

    total = 0;
    for (int i = 0; i < 10; i++) {
      double q = gen.nextDouble() * 0.01 + 0.99;
      long t0 = System.nanoTime();
      double r = data.quantile(q);
      long t1 = System.nanoTime();
      Assert.assertEquals(String.format("q=%.3f r=%.3f i=%d", q, r, i), q, r, 0.01);
      total += t1 - t0;
    }
    log.debug("t = {} us", total / 1e9 / 10 / 1e-6);
  }

示例#3

0

显示文件

文件： DummyEvaluator.java 项目： jokeshen/mahout-rbmClassifier

/**
 * Dummy FitnessEvaluator that stores the evaluations it calculates. Uses a static storage to handle
 * the evaluator duplication when passed as a Job parameter.
 */
public final class DummyEvaluator implements FitnessEvaluator<DummyCandidate> {

  private final Random rng = RandomUtils.getRandom();

  private static final Map<Integer, Double> evaluations = Maps.newHashMap();

  public static double getFitness(Integer key) {
    if (!evaluations.containsKey(key)) {
      throw new IllegalArgumentException("Fitness not found");
    }
    return evaluations.get(key);
  }

  public static void clearEvaluations() {
    evaluations.clear();
  }

  @Override
  public double getFitness(DummyCandidate candidate, List<? extends DummyCandidate> population) {
    if (evaluations.containsKey(candidate.getIndex())) {
      throw new IllegalArgumentException("Duplicate Fitness");
    }

    double fitness = rng.nextDouble();
    evaluations.put(candidate.getIndex(), fitness);

    return fitness;
  }

  @Override
  public boolean isNatural() {
    return false;
  }
}

示例#4

0

显示文件

文件： InMemInputFormatTest.java 项目： hakangunduz/mahout

  @Test
  public void testRecordReader() throws Exception {
    int n = 1;
    int maxNumSplits = 100;
    int maxNbTrees = 1000;

    Random rng = RandomUtils.getRandom();

    for (int nloop = 0; nloop < n; nloop++) {
      int numSplits = rng.nextInt(maxNumSplits) + 1;
      int nbTrees = rng.nextInt(maxNbTrees) + 1;

      Configuration conf = getConfiguration();
      Builder.setNbTrees(conf, nbTrees);

      InMemInputFormat inputFormat = new InMemInputFormat();
      List<InputSplit> splits = inputFormat.getSplits(conf, numSplits);

      for (int index = 0; index < numSplits; index++) {
        InMemInputSplit split = (InMemInputSplit) splits.get(index);
        InMemRecordReader reader = new InMemRecordReader(split);

        reader.initialize(split, null);

        for (int tree = 0; tree < split.getNbTrees(); tree++) {
          // reader.next() should return true until there is no tree left
          assertEquals(tree < split.getNbTrees(), reader.nextKeyValue());
          assertEquals(split.getFirstId() + tree, reader.getCurrentKey().get());
        }
      }
    }
  }

示例#5

0

显示文件

文件： RandomVectorIterable.java 项目： guitao/tyful

  private class VectIterator implements Iterator<Vector> {
    private int count;
    private final Random random = RandomUtils.getRandom();

    @Override
    public boolean hasNext() {
      return count < numItems;
    }

    @Override
    public Vector next() {
      if (!hasNext()) {
        throw new NoSuchElementException();
      }
      Vector result =
          type == VectorType.SPARSE
              ? new RandomAccessSparseVector(numItems)
              : new DenseVector(numItems);
      result.assign(
          new UnaryFunction() {
            @Override
            public double apply(double arg1) {
              return random.nextDouble();
            }
          });
      count++;
      return result;
    }

    @Override
    public void remove() {
      throw new UnsupportedOperationException();
    }
  }

示例#6

0

显示文件

文件： TestDistributedConjugateGradientSolverCLI.java 项目： Richiexy/mahout

 private static Vector randomVector(int size, double entryMean) {
   Vector v = new DenseVector(size);
   Random r = RandomUtils.getRandom();
   for (int i = 0; i < size; ++i) {
     v.setQuick(i, r.nextGaussian() * entryMean);
   }
   return v;
 }

示例#7

0

显示文件

文件： Step0JobTest.java 项目： maximzhao/Mahout-GSOC-LibLinear

  public void testStep0Mapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);
    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);

    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

      LongWritable key = reader.createKey();
      Text value = reader.createValue();

      Step0Mapper mapper = new Step0Mapper();
      mapper.configure(p);

      Long firstKey = null;
      int size = 0;

      while (reader.next(key, value)) {
        if (firstKey == null) {
          firstKey = key.get();
        }

        mapper.map(key, value, collector, reporter);

        size++;
      }

      mapper.close();

      // validate the mapper's output
      assertEquals(p, collector.keys[p]);
      assertEquals(firstKey.longValue(), collector.values[p].getFirstId());
      assertEquals(size, collector.values[p].getSize());
    }
  }

示例#8

0

显示文件

文件： HashUtilsTest.java 项目： hakangunduz/mahout

  public void testHashLong() {
    List<Long> original = Lists.newArrayList();

    for (int k = 0; k < 10; k++) {
      Random gen = RandomUtils.getRandom();
      for (int i = 0; i < 10000; i++) {
        long x = gen.nextLong();
        original.add(x);
      }

      checkCounts(original);
    }
  }

示例#9

0

显示文件

文件： HashUtilsTest.java 项目： hakangunduz/mahout

  public void testHashDouble() {
    List<Double> original = Lists.newArrayList();

    for (int k = 0; k < 10; k++) {
      Random gen = RandomUtils.getRandom();
      for (int i = 0; i < 10000; i++) {
        double x = gen.nextDouble();
        original.add(x);
      }

      checkCounts(original);
    }
  }

示例#10

0

显示文件

文件： HashUtilsTest.java 项目： hakangunduz/mahout

  public void testHashFloat() {
    Multiset<Integer> violations = HashMultiset.create();
    for (int k = 0; k < 1000; k++) {
      List<Float> original = Lists.newArrayList();

      Random gen = RandomUtils.getRandom();
      for (int i = 0; i < 10000; i++) {
        float x = (float) gen.nextDouble();
        original.add(x);
      }

      violations.add(checkCounts(original) <= 12 ? 0 : 1);
    }
    // the hashes for floats don't really have 32 bits of entropy so the test
    // only succeeds at better than about 99% rate.
    assertTrue(violations.count(0) >= 985);
  }

示例#11

0

显示文件

文件： PartialBuilderTest.java 项目： hmcl/mahout

  @Test
  public void testProcessOutput() throws Exception {
    Configuration conf = getConfiguration();
    conf.setInt("mapred.map.tasks", NUM_MAPS);

    Random rng = RandomUtils.getRandom();

    // prepare the output
    TreeID[] keys = new TreeID[NUM_TREES];
    MapredOutput[] values = new MapredOutput[NUM_TREES];
    int[] firstIds = new int[NUM_MAPS];
    randomKeyValues(rng, keys, values, firstIds);

    // store the output in a sequence file
    Path base = getTestTempDirPath("testdata");
    FileSystem fs = base.getFileSystem(conf);

    Path outputFile = new Path(base, "PartialBuilderTest.seq");
    Writer writer =
        SequenceFile.createWriter(fs, conf, outputFile, TreeID.class, MapredOutput.class);

    try {
      for (int index = 0; index < NUM_TREES; index++) {
        writer.append(keys[index], values[index]);
      }
    } finally {
      Closeables.close(writer, false);
    }

    // load the output and make sure its valid
    TreeID[] newKeys = new TreeID[NUM_TREES];
    Node[] newTrees = new Node[NUM_TREES];

    PartialBuilder.processOutput(new Job(conf), base, newKeys, newTrees);

    // check the forest
    for (int tree = 0; tree < NUM_TREES; tree++) {
      assertEquals(values[tree].getTree(), newTrees[tree]);
    }

    assertTrue("keys not equal", Arrays.deepEquals(keys, newKeys));
  }

示例#12

0

显示文件

文件： InMemInputFormatTest.java 项目： hakangunduz/mahout

  @Test
  public void testSplits() throws Exception {
    int n = 1;
    int maxNumSplits = 100;
    int maxNbTrees = 1000;

    Random rng = RandomUtils.getRandom();

    for (int nloop = 0; nloop < n; nloop++) {
      int numSplits = rng.nextInt(maxNumSplits) + 1;
      int nbTrees = rng.nextInt(maxNbTrees) + 1;

      Configuration conf = getConfiguration();
      Builder.setNbTrees(conf, nbTrees);

      InMemInputFormat inputFormat = new InMemInputFormat();
      List<InputSplit> splits = inputFormat.getSplits(conf, numSplits);

      assertEquals(numSplits, splits.size());

      int nbTreesPerSplit = nbTrees / numSplits;
      int totalTrees = 0;
      int expectedId = 0;

      for (int index = 0; index < numSplits; index++) {
        assertTrue(splits.get(index) instanceof InMemInputSplit);

        InMemInputSplit split = (InMemInputSplit) splits.get(index);

        assertEquals(expectedId, split.getFirstId());

        if (index < numSplits - 1) {
          assertEquals(nbTreesPerSplit, split.getNbTrees());
        } else {
          assertEquals(nbTrees - totalTrees, split.getNbTrees());
        }

        totalTrees += split.getNbTrees();
        expectedId += split.getNbTrees();
      }
    }
  }

示例#13

0

显示文件

文件： TreeIDTest.java 项目： Earne/HiBench

  @Test
  public void testTreeID() {
    Random rng = RandomUtils.getRandom();

    for (int nloop = 0; nloop < 1000000; nloop++) {
      int partition = Math.abs(rng.nextInt());
      int treeId = rng.nextInt(TreeID.MAX_TREEID);

      TreeID t1 = new TreeID(partition, treeId);

      assertEquals(partition, t1.partition());
      assertEquals(treeId, t1.treeId());

      TreeID t2 = new TreeID();
      t2.set(partition, treeId);

      assertEquals(partition, t2.partition());
      assertEquals(treeId, t2.treeId());
    }
  }

示例#14

0

显示文件

文件： TopItemsTest.java 项目： ChineseDr/mahout

 @Test
 public void testTopItemsRandom() throws Exception {
   long[] ids = new long[100];
   for (int i = 0; i < 100; i++) {
     ids[i] = i;
   }
   LongPrimitiveIterator possibleItemIds = new LongPrimitiveArrayIterator(ids);
   final Random random = RandomUtils.getRandom();
   TopItems.Estimator<Long> estimator =
       new TopItems.Estimator<Long>() {
         @Override
         public double estimate(Long thing) {
           return random.nextDouble();
         }
       };
   List<RecommendedItem> topItems = TopItems.getTopItems(10, possibleItemIds, null, estimator);
   assertEquals(10, topItems.size());
   double last = 2.0;
   for (RecommendedItem topItem : topItems) {
     assertTrue(topItem.getValue() <= last);
     last = topItem.getItemID();
   }
 }

示例#15

0

显示文件

文件： InMemoryCollapsedVariationalBayes0.java 项目： janardhanv/On-Demand-Analytics

  private void initializeModel() {
    TopicModel topicModel =
        new TopicModel(
            numTopics,
            numTerms,
            eta,
            alpha,
            RandomUtils.getRandom(),
            terms,
            numUpdatingThreads,
            initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
    topicModel.setConf(getConf());

    TopicModel updatedModel =
        initialModelCorpusFraction == 0
            ? new TopicModel(numTopics, numTerms, eta, alpha, null, terms, numUpdatingThreads, 1)
            : topicModel;
    updatedModel.setConf(getConf());
    docTopicCounts = new DenseMatrix(numDocuments, numTopics);
    docTopicCounts.assign(1.0 / numTopics);
    modelTrainer =
        new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms);
  }

示例#16

0

显示文件

文件： ModelDissector.java 项目： hakangunduz/mahout

 @Override
 public int hashCode() {
   return feature.hashCode() ^ RandomUtils.hashDouble(value) ^ maxIndex ^ categories.hashCode();
 }

示例#17

0

显示文件

文件： ModelDissector.java 项目： hakangunduz/mahout

 @Override
 public int hashCode() {
   return RandomUtils.hashDouble(weight) ^ index;
 }

示例#18

0

显示文件

文件： Uniform.java 项目： guitao/tyful

public class Uniform extends AbstractContinousDistribution {

  private double min;
  private double max;

  // The uniform random number generated shared by all <b>static</b> methods.
  protected static final Uniform shared = new Uniform(RandomUtils.getRandom());

  /**
   * Constructs a uniform distribution with the given minimum and maximum, using a {@link
   * org.apache.mahout.math.jet.random.engine.MersenneTwister} seeded with the given seed.
   */
  public Uniform(double min, double max, int seed) {
    this(min, max, RandomUtils.getRandom(seed));
  }

  /** Constructs a uniform distribution with the given minimum and maximum. */
  public Uniform(double min, double max, Random randomGenerator) {
    setRandomGenerator(randomGenerator);
    setState(min, max);
  }

  /** Constructs a uniform distribution with <tt>min=0.0</tt> and <tt>max=1.0</tt>. */
  public Uniform(Random randomGenerator) {
    this(0, 1, randomGenerator);
  }

  /** Returns the cumulative distribution function (assuming a continous uniform distribution). */
  @Override
  public double cdf(double x) {
    if (x <= min) {
      return 0.0;
    }
    if (x >= max) {
      return 1.0;
    }
    return (x - min) / (max - min);
  }

  /** Returns a uniformly distributed random <tt>boolean</tt>. */
  public boolean nextBoolean() {
    return randomGenerator.nextDouble() > 0.5;
  }

  /**
   * Returns a uniformly distributed random number in the open interval <tt>(min,max)</tt>
   * (excluding <tt>min</tt> and <tt>max</tt>).
   */
  @Override
  public double nextDouble() {
    return min + (max - min) * randomGenerator.nextDouble();
  }

  /**
   * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt>
   * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public double nextDoubleFromTo(double from, double to) {
    return from + (to - from) * randomGenerator.nextDouble();
  }

  /**
   * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt>
   * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public float nextFloatFromTo(float from, float to) {
    return (float) nextDoubleFromTo(from, to);
  }

  /**
   * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt>
   * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public int nextIntFromTo(int from, int to) {
    return (int)
        ((long) from + (long) ((1L + (long) to - (long) from) * randomGenerator.nextDouble()));
  }

  /**
   * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt>
   * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public long nextLongFromTo(long from, long to) {
    /* Doing the thing turns out to be more tricky than expected.
       avoids overflows and underflows.
       treats cases like from=-1, to=1 and the like right.
       the following code would NOT solve the problem: return (long) (Doubles.randomFromTo(from,to));

       rounding avoids the unsymmetric behaviour of casts from double to long: (long) -0.7 = 0, (long) 0.7 = 0.
       checking for overflows and underflows is also necessary.
    */

    // first the most likely and also the fastest case.
    if (from >= 0 && to < Long.MAX_VALUE) {
      return from + (long) (nextDoubleFromTo(0.0, to - from + 1));
    }

    // would we get a numeric overflow?
    // if not, we can still handle the case rather efficient.
    double diff = ((double) to) - (double) from + 1.0;
    if (diff <= Long.MAX_VALUE) {
      return from + (long) (nextDoubleFromTo(0.0, diff));
    }

    // now the pathologic boundary cases.
    // they are handled rather slow.
    long random;
    if (from == Long.MIN_VALUE) {
      if (to == Long.MAX_VALUE) {
        // return Math.round(nextDoubleFromTo(from,to));
        int i1 = nextIntFromTo(Integer.MIN_VALUE, Integer.MAX_VALUE);
        int i2 = nextIntFromTo(Integer.MIN_VALUE, Integer.MAX_VALUE);
        return ((i1 & 0xFFFFFFFFL) << 32) | (i2 & 0xFFFFFFFFL);
      }
      random = Math.round(nextDoubleFromTo(from, to + 1));
      if (random > to) {
        random = from;
      }
    } else {
      random = Math.round(nextDoubleFromTo(from - 1, to));
      if (random < from) {
        random = to;
      }
    }
    return random;
  }

  /** Returns the probability distribution function (assuming a continous uniform distribution). */
  @Override
  public double pdf(double x) {
    if (x <= min || x >= max) {
      return 0.0;
    }
    return 1.0 / (max - min);
  }

  /** Sets the internal state. */
  public void setState(double min, double max) {
    if (max < min) {
      setState(max, min);
      return;
    }
    this.min = min;
    this.max = max;
  }

  /** Returns a uniformly distributed random <tt>boolean</tt>. */
  public static boolean staticNextBoolean() {
    synchronized (shared) {
      return shared.nextBoolean();
    }
  }

  /**
   * Returns a uniformly distributed random number in the open interval <tt>(0,1)</tt> (excluding
   * <tt>0</tt> and <tt>1</tt>).
   */
  public static double staticNextDouble() {
    synchronized (shared) {
      return shared.nextDouble();
    }
  }

  /**
   * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt>
   * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public static double staticNextDoubleFromTo(double from, double to) {
    synchronized (shared) {
      return shared.nextDoubleFromTo(from, to);
    }
  }

  /**
   * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt>
   * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public static float staticNextFloatFromTo(float from, float to) {
    synchronized (shared) {
      return shared.nextFloatFromTo(from, to);
    }
  }

  /**
   * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt>
   * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public static int staticNextIntFromTo(int from, int to) {
    synchronized (shared) {
      return shared.nextIntFromTo(from, to);
    }
  }

  /**
   * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt>
   * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from &lt;= to</tt>.
   */
  public static long staticNextLongFromTo(long from, long to) {
    synchronized (shared) {
      return shared.nextLongFromTo(from, to);
    }
  }

  /** Returns a String representation of the receiver. */
  public String toString() {
    return this.getClass().getName() + '(' + min + ',' + max + ')';
  }
}

示例#19

0

显示文件

文件： Uniform.java 项目： guitao/tyful

 /**
  * Constructs a uniform distribution with the given minimum and maximum, using a {@link
  * org.apache.mahout.math.jet.random.engine.MersenneTwister} seeded with the given seed.
  */
 public Uniform(double min, double max, int seed) {
   this(min, max, RandomUtils.getRandom(seed));
 }

示例#20

0

显示文件

文件： Step0JobTest.java 项目： maximzhao/Mahout-GSOC-LibLinear

  public void testProcessOutput() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);

    // each instance label is its index in the dataset
    int labelId = Utils.findLabel(descriptor);
    for (int index = 0; index < numInstances; index++) {
      source[index][labelId] = index;
    }

    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Reporter reporter = Reporter.NULL;

    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];

    int[] expectedIds = new int[numMaps];

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

      LongWritable key = reader.createKey();
      Text value = reader.createValue();

      Long firstKey = null;
      int size = 0;

      while (reader.next(key, value)) {
        if (firstKey == null) {
          firstKey = key.get();
          expectedIds[p] = converter.convert(0, value.toString()).label;
        }

        size++;
      }

      keys[p] = p;
      values[p] = new Step0Output(firstKey, size);
    }

    Step0Output[] partitions = Step0Job.processOutput(keys, values);

    int[] actualIds = Step0Output.extractFirstIds(partitions);

    assertTrue(
        "Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds),
        Arrays.equals(expectedIds, actualIds));
  }

示例#21

0

显示文件

文件： AbstractDifferenceRecommenderEvaluatorCrossDomain.java 项目： douglasveras/tbookmarksrecsys

 protected AbstractDifferenceRecommenderEvaluatorCrossDomain() {
   random = RandomUtils.getRandom();
   maxPreference = Float.NaN;
   minPreference = Float.NaN;
 }

示例#22

0

显示文件

文件： LocalSSVDSolverSparseSequentialTest.java 项目： tavoaqp/yandex

  @Test
  public void testSSVDSolver() throws Exception {

    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");

    // conf.set("mapred.job.tracker","localhost:11011");
    // conf.set("fs.default.name","hdfs://localhost:11010/");

    Deque<Closeable> closeables = new LinkedList<Closeable>();
    Random rnd = RandomUtils.getRandom();

    File tmpDir = getTestTempDir("svdtmp");
    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");

    // create distributed row matrix-like struct
    SequenceFile.Writer w =
        SequenceFile.createWriter(
            FileSystem.getLocal(conf),
            conf,
            aLocPath,
            IntWritable.class,
            VectorWritable.class,
            CompressionType.BLOCK,
            new DefaultCodec());
    closeables.addFirst(w);

    int n = 100;
    Vector dv;
    VectorWritable vw = new VectorWritable();
    IntWritable roww = new IntWritable();

    double muAmplitude = 50.0;
    int m = 1000;
    for (int i = 0; i < m; i++) {
      dv = new SequentialAccessSparseVector(n);
      for (int j = 0; j < n / 5; j++) {
        dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5));
      }
      roww.set(i);
      vw.set(dv);
      w.append(roww, vw);
    }
    closeables.remove(w);
    w.close();

    FileSystem fs = FileSystem.get(conf);

    Path tempDirPath = getTestTempDirPath("svd-proc");
    Path aPath = new Path(tempDirPath, "A/A.seq");
    fs.copyFromLocalFile(aLocPath, aPath);

    Path svdOutPath = new Path(tempDirPath, "SSVD-out");

    // make sure we wipe out previous test results, just a convenience
    fs.delete(svdOutPath, true);

    int ablockRows = 251;
    int p = 60;
    int k = 40;
    SSVDSolver ssvd = new SSVDSolver(conf, new Path[] {aPath}, svdOutPath, ablockRows, k, p, 3);
    // ssvd.setcUHalfSigma(true);
    // ssvd.setcVHalfSigma(true);
    ssvd.setOverwrite(true);
    ssvd.run();

    double[] stochasticSValues = ssvd.getSingularValues();
    System.out.println("--SSVD solver singular values:");
    dumpSv(stochasticSValues);
    System.out.println("--Colt SVD solver singular values:");

    // try to run the same thing without stochastic algo
    double[][] a = SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf);

    // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new
    // Array2DRowRealMatrix(a));
    SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a));

    a = null;

    double[] svalues2 = svd2.getSingularValues();
    dumpSv(svalues2);

    for (int i = 0; i < k + p; i++) {
      Assert.assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon);
    }

    double[][] q =
        SSVDSolver.loadDistributedRowMatrix(
            fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf);

    SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(q), false, s_epsilon);

    double[][] u = SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "U/[^_]*"), conf);

    SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon);
    double[][] v = SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "V/[^_]*"), conf);

    SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon);
  }