@Before public void generate() { RandomUtils.useTestSeed(); uq = new UpperQuantile(101); data = new double[1001]; Random gen = RandomUtils.getRandom(); for (int i = 0; i < 1001; i++) { double x = gen.nextDouble(); data[i] = x; uq.add(x); } Arrays.sort(data); }
@Test public void testSpeed() { long total = 0; UpperQuantile data = new UpperQuantile(5000); Random gen = RandomUtils.getRandom(); for (int i = 0; i < 10000; i++) { data.add(gen.nextDouble()); } data.clear(); int n = 100000; for (int i = 0; i < n; i++) { double x = gen.nextDouble(); long t0 = System.nanoTime(); data.add(x); long t1 = System.nanoTime(); total += t1 - t0; } // time per insert should be less than a micro-second. Typically this actually comes out ~300 // ns log.debug("t = {} us", total / 1e9 / n / 1e-6); Assert.assertTrue(total / 1e9 / n < 100e-6); total = 0; for (int i = 0; i < 10; i++) { double q = gen.nextDouble() * 0.01 + 0.99; long t0 = System.nanoTime(); double r = data.quantile(q); long t1 = System.nanoTime(); Assert.assertEquals(String.format("q=%.3f r=%.3f i=%d", q, r, i), q, r, 0.01); total += t1 - t0; } log.debug("t = {} us", total / 1e9 / 10 / 1e-6); }
/** * Dummy FitnessEvaluator that stores the evaluations it calculates. Uses a static storage to handle * the evaluator duplication when passed as a Job parameter. */ public final class DummyEvaluator implements FitnessEvaluator<DummyCandidate> { private final Random rng = RandomUtils.getRandom(); private static final Map<Integer, Double> evaluations = Maps.newHashMap(); public static double getFitness(Integer key) { if (!evaluations.containsKey(key)) { throw new IllegalArgumentException("Fitness not found"); } return evaluations.get(key); } public static void clearEvaluations() { evaluations.clear(); } @Override public double getFitness(DummyCandidate candidate, List<? extends DummyCandidate> population) { if (evaluations.containsKey(candidate.getIndex())) { throw new IllegalArgumentException("Duplicate Fitness"); } double fitness = rng.nextDouble(); evaluations.put(candidate.getIndex(), fitness); return fitness; } @Override public boolean isNatural() { return false; } }
@Test public void testRecordReader() throws Exception { int n = 1; int maxNumSplits = 100; int maxNbTrees = 1000; Random rng = RandomUtils.getRandom(); for (int nloop = 0; nloop < n; nloop++) { int numSplits = rng.nextInt(maxNumSplits) + 1; int nbTrees = rng.nextInt(maxNbTrees) + 1; Configuration conf = getConfiguration(); Builder.setNbTrees(conf, nbTrees); InMemInputFormat inputFormat = new InMemInputFormat(); List<InputSplit> splits = inputFormat.getSplits(conf, numSplits); for (int index = 0; index < numSplits; index++) { InMemInputSplit split = (InMemInputSplit) splits.get(index); InMemRecordReader reader = new InMemRecordReader(split); reader.initialize(split, null); for (int tree = 0; tree < split.getNbTrees(); tree++) { // reader.next() should return true until there is no tree left assertEquals(tree < split.getNbTrees(), reader.nextKeyValue()); assertEquals(split.getFirstId() + tree, reader.getCurrentKey().get()); } } } }
private class VectIterator implements Iterator<Vector> { private int count; private final Random random = RandomUtils.getRandom(); @Override public boolean hasNext() { return count < numItems; } @Override public Vector next() { if (!hasNext()) { throw new NoSuchElementException(); } Vector result = type == VectorType.SPARSE ? new RandomAccessSparseVector(numItems) : new DenseVector(numItems); result.assign( new UnaryFunction() { @Override public double apply(double arg1) { return random.nextDouble(); } }); count++; return result; } @Override public void remove() { throw new UnsupportedOperationException(); } }
private static Vector randomVector(int size, double entryMean) { Vector v = new DenseVector(size); Random r = RandomUtils.getRandom(); for (int i = 0; i < size; ++i) { v.setQuick(i, r.nextGaussian() * entryMean); } return v; }
public void testStep0Mapper() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Step0OutputCollector collector = new Step0OutputCollector(numMaps); Reporter reporter = Reporter.NULL; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Step0Mapper mapper = new Step0Mapper(); mapper.configure(p); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); } mapper.map(key, value, collector, reporter); size++; } mapper.close(); // validate the mapper's output assertEquals(p, collector.keys[p]); assertEquals(firstKey.longValue(), collector.values[p].getFirstId()); assertEquals(size, collector.values[p].getSize()); } }
public void testHashLong() { List<Long> original = Lists.newArrayList(); for (int k = 0; k < 10; k++) { Random gen = RandomUtils.getRandom(); for (int i = 0; i < 10000; i++) { long x = gen.nextLong(); original.add(x); } checkCounts(original); } }
public void testHashDouble() { List<Double> original = Lists.newArrayList(); for (int k = 0; k < 10; k++) { Random gen = RandomUtils.getRandom(); for (int i = 0; i < 10000; i++) { double x = gen.nextDouble(); original.add(x); } checkCounts(original); } }
public void testHashFloat() { Multiset<Integer> violations = HashMultiset.create(); for (int k = 0; k < 1000; k++) { List<Float> original = Lists.newArrayList(); Random gen = RandomUtils.getRandom(); for (int i = 0; i < 10000; i++) { float x = (float) gen.nextDouble(); original.add(x); } violations.add(checkCounts(original) <= 12 ? 0 : 1); } // the hashes for floats don't really have 32 bits of entropy so the test // only succeeds at better than about 99% rate. assertTrue(violations.count(0) >= 985); }
@Test public void testProcessOutput() throws Exception { Configuration conf = getConfiguration(); conf.setInt("mapred.map.tasks", NUM_MAPS); Random rng = RandomUtils.getRandom(); // prepare the output TreeID[] keys = new TreeID[NUM_TREES]; MapredOutput[] values = new MapredOutput[NUM_TREES]; int[] firstIds = new int[NUM_MAPS]; randomKeyValues(rng, keys, values, firstIds); // store the output in a sequence file Path base = getTestTempDirPath("testdata"); FileSystem fs = base.getFileSystem(conf); Path outputFile = new Path(base, "PartialBuilderTest.seq"); Writer writer = SequenceFile.createWriter(fs, conf, outputFile, TreeID.class, MapredOutput.class); try { for (int index = 0; index < NUM_TREES; index++) { writer.append(keys[index], values[index]); } } finally { Closeables.close(writer, false); } // load the output and make sure its valid TreeID[] newKeys = new TreeID[NUM_TREES]; Node[] newTrees = new Node[NUM_TREES]; PartialBuilder.processOutput(new Job(conf), base, newKeys, newTrees); // check the forest for (int tree = 0; tree < NUM_TREES; tree++) { assertEquals(values[tree].getTree(), newTrees[tree]); } assertTrue("keys not equal", Arrays.deepEquals(keys, newKeys)); }
@Test public void testSplits() throws Exception { int n = 1; int maxNumSplits = 100; int maxNbTrees = 1000; Random rng = RandomUtils.getRandom(); for (int nloop = 0; nloop < n; nloop++) { int numSplits = rng.nextInt(maxNumSplits) + 1; int nbTrees = rng.nextInt(maxNbTrees) + 1; Configuration conf = getConfiguration(); Builder.setNbTrees(conf, nbTrees); InMemInputFormat inputFormat = new InMemInputFormat(); List<InputSplit> splits = inputFormat.getSplits(conf, numSplits); assertEquals(numSplits, splits.size()); int nbTreesPerSplit = nbTrees / numSplits; int totalTrees = 0; int expectedId = 0; for (int index = 0; index < numSplits; index++) { assertTrue(splits.get(index) instanceof InMemInputSplit); InMemInputSplit split = (InMemInputSplit) splits.get(index); assertEquals(expectedId, split.getFirstId()); if (index < numSplits - 1) { assertEquals(nbTreesPerSplit, split.getNbTrees()); } else { assertEquals(nbTrees - totalTrees, split.getNbTrees()); } totalTrees += split.getNbTrees(); expectedId += split.getNbTrees(); } } }
@Test public void testTreeID() { Random rng = RandomUtils.getRandom(); for (int nloop = 0; nloop < 1000000; nloop++) { int partition = Math.abs(rng.nextInt()); int treeId = rng.nextInt(TreeID.MAX_TREEID); TreeID t1 = new TreeID(partition, treeId); assertEquals(partition, t1.partition()); assertEquals(treeId, t1.treeId()); TreeID t2 = new TreeID(); t2.set(partition, treeId); assertEquals(partition, t2.partition()); assertEquals(treeId, t2.treeId()); } }
@Test public void testTopItemsRandom() throws Exception { long[] ids = new long[100]; for (int i = 0; i < 100; i++) { ids[i] = i; } LongPrimitiveIterator possibleItemIds = new LongPrimitiveArrayIterator(ids); final Random random = RandomUtils.getRandom(); TopItems.Estimator<Long> estimator = new TopItems.Estimator<Long>() { @Override public double estimate(Long thing) { return random.nextDouble(); } }; List<RecommendedItem> topItems = TopItems.getTopItems(10, possibleItemIds, null, estimator); assertEquals(10, topItems.size()); double last = 2.0; for (RecommendedItem topItem : topItems) { assertTrue(topItem.getValue() <= last); last = topItem.getItemID(); } }
private void initializeModel() { TopicModel topicModel = new TopicModel( numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms, numUpdatingThreads, initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight); topicModel.setConf(getConf()); TopicModel updatedModel = initialModelCorpusFraction == 0 ? new TopicModel(numTopics, numTerms, eta, alpha, null, terms, numUpdatingThreads, 1) : topicModel; updatedModel.setConf(getConf()); docTopicCounts = new DenseMatrix(numDocuments, numTopics); docTopicCounts.assign(1.0 / numTopics); modelTrainer = new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms); }
@Override public int hashCode() { return feature.hashCode() ^ RandomUtils.hashDouble(value) ^ maxIndex ^ categories.hashCode(); }
@Override public int hashCode() { return RandomUtils.hashDouble(weight) ^ index; }
public class Uniform extends AbstractContinousDistribution { private double min; private double max; // The uniform random number generated shared by all <b>static</b> methods. protected static final Uniform shared = new Uniform(RandomUtils.getRandom()); /** * Constructs a uniform distribution with the given minimum and maximum, using a {@link * org.apache.mahout.math.jet.random.engine.MersenneTwister} seeded with the given seed. */ public Uniform(double min, double max, int seed) { this(min, max, RandomUtils.getRandom(seed)); } /** Constructs a uniform distribution with the given minimum and maximum. */ public Uniform(double min, double max, Random randomGenerator) { setRandomGenerator(randomGenerator); setState(min, max); } /** Constructs a uniform distribution with <tt>min=0.0</tt> and <tt>max=1.0</tt>. */ public Uniform(Random randomGenerator) { this(0, 1, randomGenerator); } /** Returns the cumulative distribution function (assuming a continous uniform distribution). */ @Override public double cdf(double x) { if (x <= min) { return 0.0; } if (x >= max) { return 1.0; } return (x - min) / (max - min); } /** Returns a uniformly distributed random <tt>boolean</tt>. */ public boolean nextBoolean() { return randomGenerator.nextDouble() > 0.5; } /** * Returns a uniformly distributed random number in the open interval <tt>(min,max)</tt> * (excluding <tt>min</tt> and <tt>max</tt>). */ @Override public double nextDouble() { return min + (max - min) * randomGenerator.nextDouble(); } /** * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt> * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public double nextDoubleFromTo(double from, double to) { return from + (to - from) * randomGenerator.nextDouble(); } /** * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt> * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public float nextFloatFromTo(float from, float to) { return (float) nextDoubleFromTo(from, to); } /** * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt> * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public int nextIntFromTo(int from, int to) { return (int) ((long) from + (long) ((1L + (long) to - (long) from) * randomGenerator.nextDouble())); } /** * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt> * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public long nextLongFromTo(long from, long to) { /* Doing the thing turns out to be more tricky than expected. avoids overflows and underflows. treats cases like from=-1, to=1 and the like right. the following code would NOT solve the problem: return (long) (Doubles.randomFromTo(from,to)); rounding avoids the unsymmetric behaviour of casts from double to long: (long) -0.7 = 0, (long) 0.7 = 0. checking for overflows and underflows is also necessary. */ // first the most likely and also the fastest case. if (from >= 0 && to < Long.MAX_VALUE) { return from + (long) (nextDoubleFromTo(0.0, to - from + 1)); } // would we get a numeric overflow? // if not, we can still handle the case rather efficient. double diff = ((double) to) - (double) from + 1.0; if (diff <= Long.MAX_VALUE) { return from + (long) (nextDoubleFromTo(0.0, diff)); } // now the pathologic boundary cases. // they are handled rather slow. long random; if (from == Long.MIN_VALUE) { if (to == Long.MAX_VALUE) { // return Math.round(nextDoubleFromTo(from,to)); int i1 = nextIntFromTo(Integer.MIN_VALUE, Integer.MAX_VALUE); int i2 = nextIntFromTo(Integer.MIN_VALUE, Integer.MAX_VALUE); return ((i1 & 0xFFFFFFFFL) << 32) | (i2 & 0xFFFFFFFFL); } random = Math.round(nextDoubleFromTo(from, to + 1)); if (random > to) { random = from; } } else { random = Math.round(nextDoubleFromTo(from - 1, to)); if (random < from) { random = to; } } return random; } /** Returns the probability distribution function (assuming a continous uniform distribution). */ @Override public double pdf(double x) { if (x <= min || x >= max) { return 0.0; } return 1.0 / (max - min); } /** Sets the internal state. */ public void setState(double min, double max) { if (max < min) { setState(max, min); return; } this.min = min; this.max = max; } /** Returns a uniformly distributed random <tt>boolean</tt>. */ public static boolean staticNextBoolean() { synchronized (shared) { return shared.nextBoolean(); } } /** * Returns a uniformly distributed random number in the open interval <tt>(0,1)</tt> (excluding * <tt>0</tt> and <tt>1</tt>). */ public static double staticNextDouble() { synchronized (shared) { return shared.nextDouble(); } } /** * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt> * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public static double staticNextDoubleFromTo(double from, double to) { synchronized (shared) { return shared.nextDoubleFromTo(from, to); } } /** * Returns a uniformly distributed random number in the open interval <tt>(from,to)</tt> * (excluding <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public static float staticNextFloatFromTo(float from, float to) { synchronized (shared) { return shared.nextFloatFromTo(from, to); } } /** * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt> * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public static int staticNextIntFromTo(int from, int to) { synchronized (shared) { return shared.nextIntFromTo(from, to); } } /** * Returns a uniformly distributed random number in the closed interval <tt>[from,to]</tt> * (including <tt>from</tt> and <tt>to</tt>). Pre conditions: <tt>from <= to</tt>. */ public static long staticNextLongFromTo(long from, long to) { synchronized (shared) { return shared.nextLongFromTo(from, to); } } /** Returns a String representation of the receiver. */ public String toString() { return this.getClass().getName() + '(' + min + ',' + max + ')'; } }
/** * Constructs a uniform distribution with the given minimum and maximum, using a {@link * org.apache.mahout.math.jet.random.engine.MersenneTwister} seeded with the given seed. */ public Uniform(double min, double max, int seed) { this(min, max, RandomUtils.getRandom(seed)); }
public void testProcessOutput() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); // each instance label is its index in the dataset int labelId = Utils.findLabel(descriptor); for (int index = 0; index < numInstances; index++) { source[index][labelId] = index; } String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); // prepare a data converter Dataset dataset = DataLoader.generateDataset(descriptor, sData); DataConverter converter = new DataConverter(dataset); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Reporter reporter = Reporter.NULL; int[] keys = new int[numMaps]; Step0Output[] values = new Step0Output[numMaps]; int[] expectedIds = new int[numMaps]; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); expectedIds[p] = converter.convert(0, value.toString()).label; } size++; } keys[p] = p; values[p] = new Step0Output(firstKey, size); } Step0Output[] partitions = Step0Job.processOutput(keys, values); int[] actualIds = Step0Output.extractFirstIds(partitions); assertTrue( "Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds), Arrays.equals(expectedIds, actualIds)); }
protected AbstractDifferenceRecommenderEvaluatorCrossDomain() { random = RandomUtils.getRandom(); maxPreference = Float.NaN; minPreference = Float.NaN; }
@Test public void testSSVDSolver() throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = new LinkedList<Closeable>(); Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; Vector dv; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); double muAmplitude = 50.0; int m = 1000; for (int i = 0; i < m; i++) { dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n / 5; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5)); } roww.set(i); vw.set(dv); w.append(roww, vw); } closeables.remove(w); w.close(); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); int ablockRows = 251; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] {aPath}, svdOutPath, ablockRows, k, p, 3); // ssvd.setcUHalfSigma(true); // ssvd.setcVHalfSigma(true); ssvd.setOverwrite(true); ssvd.run(); double[] stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo double[][] a = SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf); // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new // Array2DRowRealMatrix(a)); SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a)); a = null; double[] svalues2 = svd2.getSingularValues(); dumpSv(svalues2); for (int i = 0; i < k + p; i++) { Assert.assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon); } double[][] q = SSVDSolver.loadDistributedRowMatrix( fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(q), false, s_epsilon); double[][] u = SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "U/[^_]*"), conf); SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon); double[][] v = SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "V/[^_]*"), conf); SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon); }