@Test public void testConstant() { double tolerancePerc = 10.0; // 10% of correct value int nSamples = 500; int nFeatures = 3; int constant = 100; INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); // Checking if we gets nans assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0))); myNormalizer.transform(sampleDataSet); // Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); // Checking to see if transformed values are close enough to zero assertEquals( Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0, constant * tolerancePerc / 100.0); myNormalizer.revert(sampleDataSet); // Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); assertEquals( Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0, constant * tolerancePerc / 100.0); }
@Test public void testTransform() { /*Random dataset is generated such that AX + B where X is from a normal distribution with mean 0 and std 1 The mean of above will be B and std A Obtained mean and std dev are compared to theoretical Transformed values should be the same as X with the same seed. */ long randSeed = 7139183; int nFeatures = 2; int nSamples = 6400; int bsize = 8; int a = 2; int b = 10; INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc; double maxDeltaPerc, sampleMeanSEM; genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed); genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed); genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed); NormalizerStandardize myNormalizer = new NormalizerStandardize(); DataSetIterator normIterator = normData.getIter(bsize); DataSetIterator expectedIterator = expectedData.getIter(bsize); DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize); myNormalizer.fit(normIterator); double tolerancePerc = 5.0; // within 5% sampleMean = myNormalizer.getMean(); sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean)); assertTrue( sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0) < tolerancePerc); // sanity check to see if it's within the theoretical standard error of mean sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0); assertTrue(sampleMeanSEM < 2.6); // 99% of the time it should be within this many SEMs tolerancePerc = 10.0; // within 10% sampleStd = myNormalizer.getStd(); sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd)); assertTrue( sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0) < tolerancePerc); normIterator.setPreProcessor(myNormalizer); while (normIterator.hasNext()) { INDArray before = beforeTransformIterator.next().getFeatures(); INDArray after = normIterator.next().getFeatures(); INDArray expected = expectedIterator.next().getFeatures(); delta = Transforms.abs(after.sub(expected)); deltaPerc = delta.div(before.sub(expected)); deltaPerc.muli(100); maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0); // System.out.println("=== BEFORE ==="); // System.out.println(before); // System.out.println("=== AFTER ==="); // System.out.println(after); // System.out.println("=== SHOULD BE ==="); // System.out.println(expected); assertTrue(maxDeltaPerc < tolerancePerc); } }