@Test public void testConstant() { double tolerancePerc = 10.0; // 10% of correct value int nSamples = 500; int nFeatures = 3; int constant = 100; INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); // Checking if we gets nans assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0))); myNormalizer.transform(sampleDataSet); // Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); // Checking to see if transformed values are close enough to zero assertEquals( Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0, constant * tolerancePerc / 100.0); myNormalizer.revert(sampleDataSet); // Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); assertEquals( Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0, constant * tolerancePerc / 100.0); }
public double similarity(String word, int k1, String word2, int k2) { if (k1 > K || k2 > K) return -1; if (word.equals(word2) && k1 == k2) return 1.0; INDArray vector = Transforms.unitVec(getWordVectorMatrix(word, k1)); INDArray vector2 = Transforms.unitVec(getWordVectorMatrix(word2, k2)); if (vector == null || vector2 == null) return -1; return Nd4j.getBlasWrapper().dot(vector, vector2); }
protected void getAdadeltaGradient(INDArray gradient) { adadeltaRMSGradient = adadeltaRMSGradient .mul(adadeltaMomentum) .add(gradient.mul(gradient).mul(1 - adadeltaMomentum)); gradient.muli( Transforms.sqrt(adadeltaRMSUpdate.add(adadeltaEps)) .div(Transforms.sqrt(adadeltaRMSGradient.add(adadeltaEps)))); adadeltaRMSUpdate.mul(adadeltaMomentum).add(gradient.mul(gradient).mul(1 - adadeltaMomentum)); }
@Override public double calcL1() { if (!conf.isUseRegularization() || conf.getL1() <= 0.0) return 0.0; double l1 = Transforms.abs(getParam(GRUParamInitializer.RECURRENT_WEIGHT_KEY)) .sum(Integer.MAX_VALUE) .getDouble(0) + Transforms.abs(getParam(GRUParamInitializer.INPUT_WEIGHT_KEY)) .sum(Integer.MAX_VALUE) .getDouble(0); return conf.getL1() * l1; }
@Test public void testBruteForce() { /* This test creates a dataset where feature values are multiples of consecutive natural numbers The obtained values are compared to the theoretical mean and std dev */ double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 5120; int x = 1, y = 2, z = 3; INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x); INDArray featureY = featureX.mul(y); INDArray featureZ = featureX.mul(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); double meanNaturalNums = (nSamples + 1) / 2.0; INDArray theoreticalMean = Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}); double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0); INDArray theoreticalStd = Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxMeanDeltaPerc < tolerancePerc); INDArray stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxStdDeltaPerc < tolerancePerc); // SAME TEST WITH THE ITERATOR int bSize = 10; tolerancePerc = 1; // 1% of correct value DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); myNormalizer.fit(sampleIter); meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxMeanDeltaPerc < tolerancePerc); stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxStdDeltaPerc < tolerancePerc); }
/** * Words nearest based on positive and negative words * @param top the top n words * * @return the words nearest the mean of the words */ @Override public Collection<String> wordsNearest(INDArray words, int top) { if (lookupTable instanceof InMemoryLookupTable) { InMemoryLookupTable l = (InMemoryLookupTable) lookupTable; INDArray syn0 = l.getSyn0(); if (!normalized) { synchronized (this) { if (!normalized) { syn0.diviColumnVector(syn0.norm1(1)); normalized = true; } } } INDArray similarity = Transforms.unitVec(words).mmul(syn0.transpose()); List<Double> highToLowSimList = getTopN(similarity, top + 20); List<WordSimilarity> result = new ArrayList<>(); for (int i = 0; i < highToLowSimList.size(); i++) { String word = vocabCache.wordAtIndex(highToLowSimList.get(i).intValue()); if (word != null && !word.equals("UNK") && !word.equals("STOP")) { INDArray otherVec = lookupTable.vector(word); double sim = Transforms.cosineSim(words, otherVec); result.add(new WordSimilarity(word, sim)); } } Collections.sort(result, new SimilarityComparator()); return getLabels(result, top); } Counter<String> distances = new Counter<>(); for (String s : vocabCache.words()) { INDArray otherVec = lookupTable.vector(s); double sim = Transforms.cosineSim(words, otherVec); distances.incrementCount(s, sim); } distances.keepTopNKeys(top); return distances.keySet(); }
@Test public void testUnderOverflow() { // This dataset will be basically constant with a small std deviation // And the constant is large. Checking if algorithm can handle double tolerancePerc = 1; // Within 1 % double toleranceAbs = 0.0005; int nSamples = 1000; int bSize = 10; int x = -1000000, y = 1000000; double z = 1000000; INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x); INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y); INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z}); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleIter); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean); assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc); // this just has to not barf // myNormalizer.transform(sampleIter); myNormalizer.transform(sampleDataSet); }
@Test public void testAdamUpdater() { INDArray m, v; double lr = 0.01; int iteration = 0; double beta1 = 0.8; double beta2 = 0.888; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .iterations(iteration) .adamMeanDecay(beta1) .adamVarDecay(beta2) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADAM) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, iteration, 1); double beta1t = FastMath.pow(beta1, iteration); double beta2t = FastMath.pow(beta2, iteration); double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon; Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); m = Nd4j.zeros(val.shape()); v = Nd4j.zeros(val.shape()); m.muli(beta1).addi(val.mul(1.0 - beta1)); v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2)); gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon)); if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) { System.out.println(Arrays.toString(gradExpected.dup().data().asFloat())); System.out.println( Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat())); } assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4); assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4); }
@Test public void testWordsNearestBasic1() throws Exception { // WordVectors vec = WordVectorSerializer.loadTxtVectors(new // File("/ext/Temp/Models/model.dat_trans")); vec.setModelUtils(new BasicModelUtils<VocabWord>()); String target = "energy"; INDArray arr1 = vec.getWordVectorMatrix(target).dup(); System.out.println("[-]: " + arr1); System.out.println("[+]: " + Transforms.unitVec(arr1)); Collection<String> list = vec.wordsNearest(target, 10); log.info("Transpose model results:"); printWords(target, list, vec); list = vec.wordsNearest(target, 10); log.info("Transpose model results 2:"); printWords(target, list, vec); list = vec.wordsNearest(target, 10); log.info("Transpose model results 3:"); printWords(target, list, vec); INDArray arr2 = vec.getWordVectorMatrix(target).dup(); assertEquals(arr1, arr2); }
/** * Returns the similarity of 2 words. Result value will be in range [-1,1], where -1.0 is exact * opposite similarity, i.e. NO similarity, and 1.0 is total match of two word vectors. However, * most of time you'll see values in range [0,1], but that's something depends of training corpus. * * <p>Returns NaN if any of labels not exists in vocab, or any label is null * * @param label1 the first word * @param label2 the second word * @return a normalized similarity (cosine similarity) */ @Override public double similarity(String label1, String label2) { if (label1 == null || label2 == null) { log.debug( "LABELS: " + label1 + ": " + (label1 == null ? "null" : EXISTS) + ";" + label2 + " vec2:" + (label2 == null ? "null" : EXISTS)); return Double.NaN; } INDArray vec1 = lookupTable.vector(label1).dup(); INDArray vec2 = lookupTable.vector(label2).dup(); if (vec1 == null || vec2 == null) { log.debug( label1 + ": " + (vec1 == null ? "null" : EXISTS) + ";" + label2 + " vec2:" + (vec2 == null ? "null" : EXISTS)); return Double.NaN; } if (label1.equals(label2)) return 1.0; return Transforms.cosineSim(vec1, vec2); }
/** * Get the top n words most similar to the given word * * @param word the word to compare * @param n the n to get * @return the top n words */ public Collection<String> wordsNearestSum(String word, int n) { INDArray vec = Transforms.unitVec(this.getWordVectorMatrix(word)); if (lookupTable() instanceof InMemoryLookupTable) { InMemoryLookupTable l = (InMemoryLookupTable) lookupTable(); INDArray syn0 = l.getSyn0(); INDArray weights = syn0.norm2(0).rdivi(1).muli(vec); INDArray distances = syn0.mulRowVector(weights).sum(1); INDArray[] sorted = Nd4j.sortWithIndices(distances, 0, false); INDArray sort = sorted[0]; List<String> ret = new ArrayList<>(); SequenceElement word2 = vocab().wordFor(word); if (n > sort.length()) n = sort.length(); // there will be a redundant word for (int i = 0; i < n + 1; i++) { if (sort.getInt(i) == word2.getIndex()) continue; String add = vocab().wordAtIndex(sort.getInt(i)); if (add == null || add.equals("UNK") || add.equals("STOP")) { continue; } ret.add(vocab().wordAtIndex(sort.getInt(i))); } return ret; } if (vec == null) return new ArrayList<>(); Counter<String> distances = new Counter<>(); for (String s : vocab().words()) { if (s.equals(word)) continue; INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(vec, otherVec); distances.incrementCount(s, sim); } distances.keepTopNKeys(n); return distances.keySet(); }
/** * Words nearest based on positive and negative words * * @param positive the positive words * @param negative the negative words * @param top the top n words * @return the words nearest the mean of the words */ public Collection<String> wordsNearestSum( Collection<String> positive, Collection<String> negative, int top) { INDArray words = Nd4j.create(lookupTable().layerSize()); Set<String> union = SetUtils.union(new HashSet<>(positive), new HashSet<>(negative)); for (String s : positive) words.addi(lookupTable().vector(s)); for (String s : negative) words.addi(lookupTable.vector(s).mul(-1)); if (lookupTable() instanceof InMemoryLookupTable) { InMemoryLookupTable l = (InMemoryLookupTable) lookupTable(); INDArray syn0 = l.getSyn0(); INDArray weights = syn0.norm2(0).rdivi(1).muli(words); INDArray distances = syn0.mulRowVector(weights).sum(1); INDArray[] sorted = Nd4j.sortWithIndices(distances, 0, false); INDArray sort = sorted[0]; List<String> ret = new ArrayList<>(); if (top > sort.length()) top = sort.length(); // there will be a redundant word int end = top; for (int i = 0; i < end; i++) { String word = vocab.wordAtIndex(sort.getInt(i)); if (union.contains(word)) { end++; if (end >= sort.length()) break; continue; } String add = vocab().wordAtIndex(sort.getInt(i)); if (add == null || add.equals("UNK") || add.equals("STOP")) { end++; if (end >= sort.length()) break; continue; } ret.add(vocab().wordAtIndex(sort.getInt(i))); } return ret; } Counter<String> distances = new Counter<>(); for (String s : vocab().words()) { INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(words, otherVec); distances.incrementCount(s, sim); } distances.keepTopNKeys(top); return distances.keySet(); }
/** * @return * @throws NumberFormatException * @throws IOException * @throws FileNotFoundException */ private static Word2Vec readBinaryModel(File modelFile) throws NumberFormatException, IOException { InMemoryLookupTable lookupTable; VocabCache cache; INDArray syn0; int words, size; try (BufferedInputStream bis = new BufferedInputStream( GzipUtils.isCompressedFilename(modelFile.getName()) ? new GZIPInputStream(new FileInputStream(modelFile)) : new FileInputStream(modelFile)); DataInputStream dis = new DataInputStream(bis)) { words = Integer.parseInt(readString(dis)); size = Integer.parseInt(readString(dis)); syn0 = Nd4j.create(words, size); cache = new InMemoryLookupCache(false); lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().cache(cache).vectorLength(size).build(); String word; for (int i = 0; i < words; i++) { word = readString(dis); log.trace("Loading " + word + " with word " + i); if (word.isEmpty()) { continue; } float[] vector = new float[size]; for (int j = 0; j < size; j++) { vector[j] = readFloat(dis); } syn0.putRow(i, Transforms.unitVec(Nd4j.create(vector))); cache.addWordToIndex(cache.numWords(), word); cache.addToken(new VocabWord(1, word)); cache.putVocabWord(word); } } Word2Vec ret = new Word2Vec(); lookupTable.setSyn0(syn0); ret.setVocab(cache); ret.setLookupTable(lookupTable); return ret; }
@Test public void testRMSPropUpdater() { double lr = 0.01; double rmsDecay = 0.25; Map<String, INDArray> lastG = new HashMap<>(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .rmsDecay(rmsDecay) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { key = entry.getKey(); val = entry.getValue(); INDArray lastGTmp = lastG.get(key); if (lastGTmp == null) lastGTmp = Nd4j.zeros(val.shape()); lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay)); gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(Nd4j.EPS_THRESHOLD))); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); lastG.put(key, lastGTmp); } assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4); }
/** * Apply the regularization * * @param layer * @param gradient * @param param */ public void postApply(Layer layer, INDArray gradient, String param) { NeuralNetConfiguration conf = layer.conf(); INDArray params = layer.getParam(param); if (conf.isUseRegularization() && conf.getLayer().getL2() > 0 && !(param.equals(DefaultParamInitializer.BIAS_KEY))) gradient.addi( params.mul( conf.getLayer() .getL2())); // dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function if (conf.isUseRegularization() && conf.getLayer().getL1() > 0 && !(param.equals(DefaultParamInitializer.BIAS_KEY))) gradient.addi(Transforms.sign(params).muli(conf.getLayer().getL1())); if (conf.isMiniBatch()) gradient.divi(layer.getInputMiniBatchSize()); if (conf.isConstrainGradientToUnitNorm()) gradient.divi(gradient.norm2(Integer.MAX_VALUE)); }
/** * @param modelFile * @return * @throws FileNotFoundException * @throws IOException * @throws NumberFormatException */ private static Word2Vec readTextModel(File modelFile) throws IOException, NumberFormatException { InMemoryLookupTable lookupTable; VocabCache cache; INDArray syn0; BufferedReader reader = new BufferedReader(new FileReader(modelFile)); String line = reader.readLine(); String[] initial = line.split(" "); int words = Integer.parseInt(initial[0]); int layerSize = Integer.parseInt(initial[1]); syn0 = Nd4j.create(words, layerSize); cache = new InMemoryLookupCache(); int currLine = 0; while ((line = reader.readLine()) != null) { String[] split = line.split(" "); String word = split[0]; if (word.isEmpty()) { continue; } float[] vector = new float[split.length - 1]; for (int i = 1; i < split.length; i++) { vector[i - 1] = Float.parseFloat(split[i]); } syn0.putRow(currLine, Transforms.unitVec(Nd4j.create(vector))); cache.addWordToIndex(cache.numWords(), word); cache.addToken(new VocabWord(1, word)); cache.putVocabWord(word); } lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().cache(cache).vectorLength(layerSize).build(); lookupTable.setSyn0(syn0); Word2Vec ret = new Word2Vec(); ret.setVocab(cache); ret.setLookupTable(lookupTable); reader.close(); return ret; }
/** * Words nearest based on positive and negative words * @param top the top n words * * @return the words nearest the mean of the words */ @Override public Collection<String> wordsNearest(INDArray words, int top) { if (lookupTable() instanceof InMemoryLookupTable) { InMemoryLookupTable l = (InMemoryLookupTable) lookupTable(); INDArray syn0 = l.getSyn0(); INDArray weights = syn0.norm2(0).rdivi(1).muli(words); INDArray distances = syn0.mulRowVector(weights).mean(1); INDArray[] sorted = Nd4j.sortWithIndices(distances, 0, false); INDArray sort = sorted[0]; List<String> ret = new ArrayList<>(); if (top > sort.length()) top = sort.length(); // there will be a redundant word int end = top; for (int i = 0; i < end; i++) { VocabCache vocabCache = vocab(); int s = sort.getInt(0, i); String add = vocabCache.wordAtIndex(s); if (add == null || add.equals("UNK") || add.equals("STOP")) { end++; if (end >= sort.length()) break; continue; } ret.add(vocabCache.wordAtIndex(s)); } return ret; } Counter<String> distances = new Counter<>(); for (String s : vocab().words()) { INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(words, otherVec); distances.incrementCount(s, sim); } distances.keepTopNKeys(top); return distances.keySet(); }
/** * Get the top n words most similar to the given word * * @param word the word to compare * @param n the n to get * @return the top n words */ public Collection<String> wordsNearest(String word, int n) { /* TODO: This is temporary solution and we should get rid of flat array scan. Probably, after VPTree implementation gets fixed */ if (!vocab.hasToken(word)) return new ArrayList<>(); INDArray mean = getWordVectorMatrix(word); Counter<String> distances = new Counter<>(); for (String s : vocab().words()) { if (s.equals(word)) continue; INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(mean, otherVec); distances.incrementCount(s, sim); } distances.keepTopNKeys(n - 1); return distances.keySet(); // return wordsNearest(Arrays.asList(word),new ArrayList<String>(),n); }
public Collection<String> wordsNearest(String word, int k, int n) { INDArray vector = Transforms.unitVec(getWordVectorMatrix(word, k)); INDArray similarity = vector.mmul(syn0.transpose()); List<Double> highToLowSimList = getTopN(similarity, n); List<String> ret = new ArrayList(); for (int i = 1; i < highToLowSimList.size(); i++) { word = vocab.wordAtIndex(highToLowSimList.get(i).intValue() % vocab.numWords()) + "(" + highToLowSimList.get(i).intValue() / vocab.numWords() + ")"; if (word != null && !word.equals("UNK") && !word.equals("STOP")) { ret.add(word); if (ret.size() >= n) { break; } } } return ret; }
@Test public void testRevert() { double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 500; int nFeatures = 3; INDArray featureSet = Nd4j.randn(nSamples, nFeatures); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); DataSet transformed = sampleDataSet.copy(); myNormalizer.transform(transformed); // System.out.println(transformed.getFeatures()); myNormalizer.revert(transformed); // System.out.println(transformed.getFeatures()); INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures())) .div(sampleDataSet.getFeatures()); double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0); assertTrue(maxdeltaPerc < tolerancePerc); }
@Test public void testAdaGradUpdater() { double lr = 1e-2; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4); }
/** * Returns the similarity of 2 words. Result value will be in range [-1,1], where -1.0 is exact * opposite similarity, i.e. NO similarity, and 1.0 is total match of two word vectors. However, * most of time you'll see values in range [0,1], but that's something depends of training corpus. * * @param word the first word * @param word2 the second word * @return a normalized similarity (cosine similarity) */ public double similarity(String word, String word2) { if (word.equals(word2)) return 1.0; if (getWordVectorMatrix(word) == null || getWordVectorMatrix(word2) == null) return -1; return Transforms.cosineSim(getWordVectorMatrix(word), getWordVectorMatrix(word2)); }
/** * Words nearest based on positive and negative words * * @param positive the positive words * @param negative the negative words * @param top the top n words * @return the words nearest the mean of the words */ @Override public Collection<String> wordsNearest( Collection<String> positive, Collection<String> negative, int top) { // Check every word is in the model for (String p : SetUtils.union(new HashSet<>(positive), new HashSet<>(negative))) { if (!vocab().containsWord(p)) { return new ArrayList<>(); } } WeightLookupTable weightLookupTable = lookupTable(); INDArray words = Nd4j.create(positive.size() + negative.size(), weightLookupTable.layerSize()); int row = 0; Set<String> union = SetUtils.union(new HashSet<>(positive), new HashSet<>(negative)); for (String s : positive) { words.putRow(row++, weightLookupTable.vector(s)); } for (String s : negative) { words.putRow(row++, weightLookupTable.vector(s).mul(-1)); } INDArray mean = words.isMatrix() ? words.mean(0) : words; // TODO this should probably be replaced with wordsNearest(mean, top) if (weightLookupTable instanceof InMemoryLookupTable) { InMemoryLookupTable l = (InMemoryLookupTable) weightLookupTable; INDArray syn0 = l.getSyn0(); syn0.diviRowVector(syn0.norm2(0)); INDArray similarity = Transforms.unitVec(mean).mmul(syn0.transpose()); // We assume that syn0 is normalized. // Hence, the following division is not needed anymore. // distances.diviRowVector(distances.norm2(1)); // INDArray[] sorted = Nd4j.sortWithIndices(distances,0,false); List<Double> highToLowSimList = getTopN(similarity, top + union.size()); List<String> ret = new ArrayList<>(); for (int i = 0; i < highToLowSimList.size(); i++) { String word = vocab().wordAtIndex(highToLowSimList.get(i).intValue()); if (word != null && !word.equals("UNK") && !word.equals("STOP") && !union.contains(word)) { ret.add(word); if (ret.size() >= top) { break; } } } return ret; } Counter<String> distances = new Counter<>(); for (String s : vocab().words()) { INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(mean, otherVec); distances.incrementCount(s, sim); } distances.keepTopNKeys(top); return distances.keySet(); }
private void backpropDerivativesAndError( Tree tree, MultiDimensionalMap<String, String, INDArray> binaryTD, MultiDimensionalMap<String, String, INDArray> binaryCD, MultiDimensionalMap<String, String, INDArray> binaryINDArrayTD, Map<String, INDArray> unaryCD, Map<String, INDArray> wordVectorD, INDArray deltaUp) { if (tree.isLeaf()) { return; } INDArray currentVector = tree.vector(); String category = tree.label(); category = basicCategory(category); // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class INDArray goldLabel = Nd4j.create(numOuts, 1); int goldClass = tree.goldLabel(); if (goldClass >= 0) { assert goldClass <= numOuts : "Tried adding a label that was >= to the number of configured outputs " + numOuts + " with label " + goldClass; goldLabel.putScalar(goldClass, 1.0f); } Double nodeWeight = classWeights.get(goldClass); if (nodeWeight == null) nodeWeight = 1.0; INDArray predictions = tree.prediction(); // If this is an unlabeled class, transform deltaClass to 0. We could // make this more efficient by eliminating various of the below // calculations, but this would be the easiest way to handle the // unlabeled class INDArray deltaClass = null; if (predictions.data().dataType() == DataBuffer.Type.DOUBLE) { deltaClass = goldClass >= 0 ? Nd4j.getBlasWrapper().scal(nodeWeight, predictions.sub(goldLabel)) : Nd4j.create(predictions.rows(), predictions.columns()); } else { deltaClass = goldClass >= 0 ? Nd4j.getBlasWrapper() .scal((float) nodeWeight.doubleValue(), predictions.sub(goldLabel)) : Nd4j.create(predictions.rows(), predictions.columns()); } INDArray localCD = deltaClass.mmul(Nd4j.appendBias(currentVector).transpose()); double error = -(Transforms.log(predictions).muli(goldLabel).sum(Integer.MAX_VALUE).getDouble(0)); error = error * nodeWeight; tree.setError(error); if (tree.isPreTerminal()) { // below us is a word vector unaryCD.put(category, unaryCD.get(category).add(localCD)); String word = tree.children().get(0).label(); word = getVocabWord(word); INDArray currentVectorDerivative = Nd4j.getExecutioner() .execAndReturn( Nd4j.getOpFactory().createTransform(activationFunction, currentVector)); INDArray deltaFromClass = getUnaryClassification(category).transpose().mmul(deltaClass); deltaFromClass = deltaFromClass.get(interval(0, numHidden), interval(0, 1)).mul(currentVectorDerivative); INDArray deltaFull = deltaFromClass.add(deltaUp); INDArray wordVector = wordVectorD.get(word); wordVectorD.put(word, wordVector.add(deltaFull)); } else { // Otherwise, this must be a binary node String leftCategory = basicCategory(tree.children().get(0).label()); String rightCategory = basicCategory(tree.children().get(1).label()); if (combineClassification) { unaryCD.put("", unaryCD.get("").add(localCD)); } else { binaryCD.put( leftCategory, rightCategory, binaryCD.get(leftCategory, rightCategory).add(localCD)); } INDArray currentVectorDerivative = Nd4j.getExecutioner() .execAndReturn( Nd4j.getOpFactory().createTransform(activationFunction, currentVector)); INDArray deltaFromClass = getBinaryClassification(leftCategory, rightCategory).transpose().mmul(deltaClass); INDArray mult = deltaFromClass.get(interval(0, numHidden), interval(0, 1)); deltaFromClass = mult.muli(currentVectorDerivative); INDArray deltaFull = deltaFromClass.add(deltaUp); INDArray leftVector = tree.children().get(0).vector(); INDArray rightVector = tree.children().get(1).vector(); INDArray childrenVector = Nd4j.appendBias(leftVector, rightVector); // deltaFull 50 x 1, childrenVector: 50 x 2 INDArray add = binaryTD.get(leftCategory, rightCategory); INDArray W_df = deltaFromClass.mmul(childrenVector.transpose()); binaryTD.put(leftCategory, rightCategory, add.add(W_df)); INDArray deltaDown; if (useDoubleTensors) { INDArray Wt_df = getINDArrayGradient(deltaFull, leftVector, rightVector); binaryINDArrayTD.put( leftCategory, rightCategory, binaryINDArrayTD.get(leftCategory, rightCategory).add(Wt_df)); deltaDown = computeINDArrayDeltaDown( deltaFull, leftVector, rightVector, getBinaryTransform(leftCategory, rightCategory), getBinaryINDArray(leftCategory, rightCategory)); } else { deltaDown = getBinaryTransform(leftCategory, rightCategory).transpose().mmul(deltaFull); } INDArray leftDerivative = Nd4j.getExecutioner() .execAndReturn(Nd4j.getOpFactory().createTransform(activationFunction, leftVector)); INDArray rightDerivative = Nd4j.getExecutioner() .execAndReturn(Nd4j.getOpFactory().createTransform(activationFunction, rightVector)); INDArray leftDeltaDown = deltaDown.get(interval(0, deltaFull.rows()), interval(0, 1)); INDArray rightDeltaDown = deltaDown.get(interval(deltaFull.rows(), deltaFull.rows() * 2), interval(0, 1)); backpropDerivativesAndError( tree.children().get(0), binaryTD, binaryCD, binaryINDArrayTD, unaryCD, wordVectorD, leftDerivative.mul(leftDeltaDown)); backpropDerivativesAndError( tree.children().get(1), binaryTD, binaryCD, binaryINDArrayTD, unaryCD, wordVectorD, rightDerivative.mul(rightDeltaDown)); } }
@Test public void testAdaDeltaUpdate() { INDArray dxSquared; Map<String, INDArray> msg = new HashMap<>(); Map<String, INDArray> msdx = new HashMap<>(); double rho = 0.85; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .rho(rho) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (int i = 0; i < 2; i++) { updater.update(layer, gradient, i, 1); // calculations for one iteration / update for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { key = entry.getKey(); val = entry.getValue(); INDArray msgTmp = msg.get(key); INDArray msdxTmp = msdx.get(key); if (msgTmp == null) { msgTmp = Nd4j.zeros(val.shape()); msdxTmp = Nd4j.zeros(val.shape()); } msgTmp.muli(rho); msgTmp.addi(1 - rho).muli(val.mul(val)); gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)) .divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))) .muli(val); gradientDup.setGradientFor(key, gradExpected); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); msdxTmp.muli(rho); dxSquared = gradExpected.mul(gradExpected); msdxTmp.addi(dxSquared.muli(1 - rho)); msg.put(key, msgTmp); msdx.put(key, msdxTmp); } assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4); } }
@Test public void testTransform() { /*Random dataset is generated such that AX + B where X is from a normal distribution with mean 0 and std 1 The mean of above will be B and std A Obtained mean and std dev are compared to theoretical Transformed values should be the same as X with the same seed. */ long randSeed = 7139183; int nFeatures = 2; int nSamples = 6400; int bsize = 8; int a = 2; int b = 10; INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc; double maxDeltaPerc, sampleMeanSEM; genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed); genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed); genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed); NormalizerStandardize myNormalizer = new NormalizerStandardize(); DataSetIterator normIterator = normData.getIter(bsize); DataSetIterator expectedIterator = expectedData.getIter(bsize); DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize); myNormalizer.fit(normIterator); double tolerancePerc = 5.0; // within 5% sampleMean = myNormalizer.getMean(); sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean)); assertTrue( sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0) < tolerancePerc); // sanity check to see if it's within the theoretical standard error of mean sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0); assertTrue(sampleMeanSEM < 2.6); // 99% of the time it should be within this many SEMs tolerancePerc = 10.0; // within 10% sampleStd = myNormalizer.getStd(); sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd)); assertTrue( sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0) < tolerancePerc); normIterator.setPreProcessor(myNormalizer); while (normIterator.hasNext()) { INDArray before = beforeTransformIterator.next().getFeatures(); INDArray after = normIterator.next().getFeatures(); INDArray expected = expectedIterator.next().getFeatures(); delta = Transforms.abs(after.sub(expected)); deltaPerc = delta.div(before.sub(expected)); deltaPerc.muli(100); maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0); // System.out.println("=== BEFORE ==="); // System.out.println(before); // System.out.println("=== AFTER ==="); // System.out.println(after); // System.out.println("=== SHOULD BE ==="); // System.out.println(expected); assertTrue(maxDeltaPerc < tolerancePerc); } }